Converting Documents with openoffice to Differrent Formats in Java

There is lots of requirements in most commercial projects to convert documents in various formats . For example

HTML —–> Pdf
Doc —–> Pdf
PPT —–> Images

etc

Openoffice has created a very versatile office software which is opensource and can be used in conversion of documents from one format to another

Here is one such example that Converts HTML to PDF. I used JodConverter which is one of the best libraries around which wraps complex openoffice interations with simple object oriented methods

/**
 * 
 */
package com.linkwithweb.converter;

import java.io.ByteArrayInputStream;
import java.io.OutputStream;
import java.net.ConnectException;

import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.DocumentFamily;
import com.artofsolving.jodconverter.DocumentFormat;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;

/**
 * Prerequisites
 * %OPENOFFICE_HOME%/soffice -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard
 * 
 * @author Ashwin Kumar
 * 
 */
public class OpenOfficeConverter {
	// connect to an OpenOffice.org instance running on port 8100
	private OpenOfficeConnection connection = null;

	/**
	 * @param oHTMLText
	 * @param oOutputStream
	 */
	public void convertFromHTMLToPDF(String oHTMLText,
			OutputStream oOutputStream) {
		try {
			DocumentFormat inputDocumentFormat = new DocumentFormat("HTML",
					DocumentFamily.TEXT, "text/html", "html");
			inputDocumentFormat.setExportFilter(DocumentFamily.TEXT,
					"HTML 	(StarWriter)");
			DocumentFormat outputDocumentFormat = new DocumentFormat(
					"Portable	Document Format", DocumentFamily.TEXT,
					"application/pdf", "pdf");
			outputDocumentFormat.setExportFilter(DocumentFamily.TEXT,
					"writer_pdf_Export");
			DocumentConverter oDocumentConverter = new OpenOfficeDocumentConverter(
					connection);
			oDocumentConverter.convert(
					new ByteArrayInputStream(oHTMLText.getBytes()),
					inputDocumentFormat, oOutputStream, outputDocumentFormat);
		} catch (Exception e) {
			// TODO: handle exception
		}
	}

	/**
	 * 
	 */
	public void openConnection() {
		try {
			if (connection == null || !connection.isConnected())
				connection = new SocketOpenOfficeConnection(8100);
			connection.connect();
		} catch (ConnectException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	/**
	 * 
	 */
	public void openConnection(String host, int port) {
		try {
			if (connection == null || !connection.isConnected())
				connection = new SocketOpenOfficeConnection(host, port);
			connection.connect();
		} catch (ConnectException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}

	/**
	 * 
	 */
	public void closeConnection() {
		// close the connection
		try {
			if (connection != null && connection.isConnected())
				connection.disconnect();
		} catch (Exception e) {
			// TODO: handle exception
		}
	}
}

Now to Use above Method Here is sample Code

/**
 * 
 */
package com.linkwithweb.converter;

import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

/**
 * @author Ashwin Kumar
 * 
 */
public class HTMLToPDFConverter {

	public static void main(String[] args) {

		String strHindiHTML = "Hello hi tehrere<h1>heading</h1>";
		OpenOfficeConverter oOpenOfficeConverter = new OpenOfficeConverter();
		oOpenOfficeConverter.openConnection();
		OutputStream out = null;
		try {
			out = new FileOutputStream("test.pdf");

			oOpenOfficeConverter.convertFromHTMLToPDF(strHindiHTML, out);
		} catch (FileNotFoundException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (Throwable th) {
			// TODO Auto-generated catch block
			th.printStackTrace();
		} finally {
			if (out != null) {
				try {
					out.close();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			}
		}
		oOpenOfficeConverter.closeConnection();
		/*
		 * ByteArrayOutputStream oByteArrayOutputStream = new ByteArrayOutputStream();
		 * 
		 * ByteArrayInputStream oByteArrayInputStream = new ByteArrayInputStream(
		 * oByteArrayOutputStream.toByteArray());
		 */
	}

}

Below is sample Maven Configuration to Build the project


<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>com.linkwithweb.documents</groupId>
	<artifactId>DocumentConverter</artifactId>
	<version>0.0.1-SNAPSHOT</version>
	<name>DocumentConverter</name>
	<description>DocumentConverter</description>
	<dependencies>
		<dependency>
			<groupId>com.artofsolving</groupId>
			<artifactId>jodconverter</artifactId>
			<version>2.2.1</version>
		</dependency>
		
		<dependency>
			<groupId>org.slf4j</groupId>
			<artifactId>slf4j-api</artifactId>
			<version>1.6.1</version>
		</dependency>		

	</dependencies>

	<build>
		<finalName>DocumentConverter</finalName>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>2.3.1</version>
				<configuration>
					<source>1.5</source>
					<target>1.5</target>
				</configuration>
			</plugin>
			<plugin>
				<groupId>com.artofsolving</groupId>
				<artifactId>jodconverter-maven-plugin</artifactId>
				<version>2.2.1</version>
				<configuration>
					<sourceDirectory>${basedir}/src/site/resources</sourceDirectory>
					<outputDirectory>${project.reporting.outputDirectory}</outputDirectory>
					<include>**/*.odt</include>
					<outputFormat>pdf</outputFormat>
				</configuration>
				<executions>
					<execution>
						<id>convert </id>
						<phase>site</phase>
						<goals>
							<goal>convert</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
		</plugins>
	</build>

	<properties>
		<org.easymock.version>2.3</org.easymock.version>
		<org.springframework.version>2.5.6</org.springframework.version>
		<commons.lang.version>2.1</commons.lang.version>
		<log4j.version>1.2.14</log4j.version>
		<org.freemarker.version>2.3.15</org.freemarker.version>
		<manifest.file>src/main/resources/META-INF/MANIFEST.MF</manifest.file>
	</properties>
</project>

Attached is sample Project. Rename it to .rar file

DocumentConverter(Rename Extension to .rar)