There is lots of requirements in most commercial projects to convert documents in various formats . For example
HTML —–> Pdf
Doc —–> Pdf
PPT —–> Images
etc
Openoffice has created a very versatile office software which is opensource and can be used in conversion of documents from one format to another
Here is one such example that Converts HTML to PDF. I used JodConverter which is one of the best libraries around which wraps complex openoffice interations with simple object oriented methods
/**
*
*/
package com.linkwithweb.converter;
import java.io.ByteArrayInputStream;
import java.io.OutputStream;
import java.net.ConnectException;
import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.DocumentFamily;
import com.artofsolving.jodconverter.DocumentFormat;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.OpenOfficeDocumentConverter;
/**
* Prerequisites
* %OPENOFFICE_HOME%/soffice -headless -accept="socket,host=127.0.0.1,port=8100;urp;" -nofirststartwizard
*
* @author Ashwin Kumar
*
*/
public class OpenOfficeConverter {
// connect to an OpenOffice.org instance running on port 8100
private OpenOfficeConnection connection = null;
/**
* @param oHTMLText
* @param oOutputStream
*/
public void convertFromHTMLToPDF(String oHTMLText,
OutputStream oOutputStream) {
try {
DocumentFormat inputDocumentFormat = new DocumentFormat("HTML",
DocumentFamily.TEXT, "text/html", "html");
inputDocumentFormat.setExportFilter(DocumentFamily.TEXT,
"HTML (StarWriter)");
DocumentFormat outputDocumentFormat = new DocumentFormat(
"Portable Document Format", DocumentFamily.TEXT,
"application/pdf", "pdf");
outputDocumentFormat.setExportFilter(DocumentFamily.TEXT,
"writer_pdf_Export");
DocumentConverter oDocumentConverter = new OpenOfficeDocumentConverter(
connection);
oDocumentConverter.convert(
new ByteArrayInputStream(oHTMLText.getBytes()),
inputDocumentFormat, oOutputStream, outputDocumentFormat);
} catch (Exception e) {
// TODO: handle exception
}
}
/**
*
*/
public void openConnection() {
try {
if (connection == null || !connection.isConnected())
connection = new SocketOpenOfficeConnection(8100);
connection.connect();
} catch (ConnectException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
*
*/
public void openConnection(String host, int port) {
try {
if (connection == null || !connection.isConnected())
connection = new SocketOpenOfficeConnection(host, port);
connection.connect();
} catch (ConnectException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
*
*/
public void closeConnection() {
// close the connection
try {
if (connection != null && connection.isConnected())
connection.disconnect();
} catch (Exception e) {
// TODO: handle exception
}
}
}
Now to Use above Method Here is sample Code
/**
*
*/
package com.linkwithweb.converter;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
/**
* @author Ashwin Kumar
*
*/
public class HTMLToPDFConverter {
public static void main(String[] args) {
String strHindiHTML = "Hello hi tehrere<h1>heading</h1>";
OpenOfficeConverter oOpenOfficeConverter = new OpenOfficeConverter();
oOpenOfficeConverter.openConnection();
OutputStream out = null;
try {
out = new FileOutputStream("test.pdf");
oOpenOfficeConverter.convertFromHTMLToPDF(strHindiHTML, out);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Throwable th) {
// TODO Auto-generated catch block
th.printStackTrace();
} finally {
if (out != null) {
try {
out.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
oOpenOfficeConverter.closeConnection();
/*
* ByteArrayOutputStream oByteArrayOutputStream = new ByteArrayOutputStream();
*
* ByteArrayInputStream oByteArrayInputStream = new ByteArrayInputStream(
* oByteArrayOutputStream.toByteArray());
*/
}
}
Below is sample Maven Configuration to Build the project
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.linkwithweb.documents</groupId>
<artifactId>DocumentConverter</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>DocumentConverter</name>
<description>DocumentConverter</description>
<dependencies>
<dependency>
<groupId>com.artofsolving</groupId>
<artifactId>jodconverter</artifactId>
<version>2.2.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.6.1</version>
</dependency>
</dependencies>
<build>
<finalName>DocumentConverter</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.1</version>
<configuration>
<source>1.5</source>
<target>1.5</target>
</configuration>
</plugin>
<plugin>
<groupId>com.artofsolving</groupId>
<artifactId>jodconverter-maven-plugin</artifactId>
<version>2.2.1</version>
<configuration>
<sourceDirectory>${basedir}/src/site/resources</sourceDirectory>
<outputDirectory>${project.reporting.outputDirectory}</outputDirectory>
<include>**/*.odt</include>
<outputFormat>pdf</outputFormat>
</configuration>
<executions>
<execution>
<id>convert </id>
<phase>site</phase>
<goals>
<goal>convert</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
<properties>
<org.easymock.version>2.3</org.easymock.version>
<org.springframework.version>2.5.6</org.springframework.version>
<commons.lang.version>2.1</commons.lang.version>
<log4j.version>1.2.14</log4j.version>
<org.freemarker.version>2.3.15</org.freemarker.version>
<manifest.file>src/main/resources/META-INF/MANIFEST.MF</manifest.file>
</properties>
</project>
Attached is sample Project. Rename it to .rar file
DocumentConverter(Rename Extension to .rar)