Skip to content
Snippets Groups Projects
Commit 8d661414 authored by juanf's avatar juanf
Browse files

SSDM-5725 : Fix, real word files and work in most word processors +

better html output with Base64 images.
parent 8f090875
No related branches found
No related tags found
No related merge requests found
...@@ -2,19 +2,20 @@ package ch.ethz.sis; ...@@ -2,19 +2,20 @@ package ch.ethz.sis;
import java.io.ByteArrayOutputStream; import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.util.Base64;
import org.docx4j.jaxb.Context; import org.docx4j.convert.in.xhtml.XHTMLImporterImpl;
import org.docx4j.openpackaging.contenttype.ContentType;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage; import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.openpackaging.parts.PartName; import org.eclipse.jetty.client.HttpClient;
import org.docx4j.openpackaging.parts.WordprocessingML.AlternativeFormatInputPart; import org.eclipse.jetty.client.api.ContentResponse;
import org.docx4j.relationships.Relationship; import org.eclipse.jetty.client.api.Request;
import org.docx4j.wml.CTAltChunk;
import org.jsoup.Jsoup; import org.jsoup.Jsoup;
import org.jsoup.nodes.Document; import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element; import org.jsoup.nodes.Element;
import org.jsoup.select.Elements; import org.jsoup.select.Elements;
import ch.systemsx.cisd.common.http.JettyHttpClientFactory;
public class DOCXBuilder { public class DOCXBuilder {
public static void main(String[] args) throws Exception { public static void main(String[] args) throws Exception {
DOCXBuilder docx = new DOCXBuilder(); DOCXBuilder docx = new DOCXBuilder();
...@@ -39,15 +40,20 @@ public class DOCXBuilder { ...@@ -39,15 +40,20 @@ public class DOCXBuilder {
private boolean closed; private boolean closed;
public DOCXBuilder() { public DOCXBuilder() {
System.setProperty("javax.xml.transform.TransformerFactory", "com.sun.org.apache.xalan.internal.xsltc.trax.TransformerFactoryImpl");
closed = false; closed = false;
doc = new StringBuffer(); doc = new StringBuffer();
startDoc(); startDoc();
} }
public void setDocument(String doc) {
this.doc = new StringBuffer(doc);
closed = true;
}
private void startDoc() { private void startDoc() {
if (!closed) { if (!closed) {
doc.append( doc.append("<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">");
doc.append("<html xmlns=\"http://www.w3.org/1999/xhtml\">"); doc.append("<html xmlns=\"http://www.w3.org/1999/xhtml\">");
doc.append("<head></head>"); doc.append("<head></head>");
doc.append("<body>"); doc.append("<body>");
...@@ -74,7 +80,7 @@ public class DOCXBuilder { ...@@ -74,7 +80,7 @@ public class DOCXBuilder {
if (!closed) if (!closed)
{ {
value = cleanXMLEnvelope(value); value = cleanXMLEnvelope(value);
doc.append("<p>").append(getImgFixed(value)).append("</p>"); doc.append("<p>").append(fixImageSizes(value)).append("</p>");
} }
} }
...@@ -91,45 +97,44 @@ public class DOCXBuilder { ...@@ -91,45 +97,44 @@ public class DOCXBuilder {
} }
public byte[] getHTMLBytes() throws Exception { public byte[] getHTMLBytes() throws Exception {
endDoc(); if (!closed) {
return doc.toString().getBytes(); endDoc();
}
String docWithImg = encodeImgAsBase64(doc.toString());
return docWithImg.getBytes();
} }
public byte[] getDocBytes() throws Exception { public byte[] getDocBytes() throws Exception {
// .. Finish Document // .. Finish Document
endDoc(); if (!closed) {
endDoc();
// .. HTML Code }
Document xhtmldoc = Jsoup.parse(doc.toString());
xhtmldoc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage(); WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.createPackage();
AlternativeFormatInputPart afiPart = new AlternativeFormatInputPart(new PartName("/hw.html")); XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(wordMLPackage);
afiPart.setBinaryData(doc.toString().getBytes()); wordMLPackage.getMainDocumentPart().getContent().addAll(XHTMLImporter.convert( xhtmldoc.html(), null) );
afiPart.setContentType(new ContentType("text/html"));
Relationship altChunkRel = wordMLPackage.getMainDocumentPart().addTargetPart(afiPart);
// .. the bit in document body
CTAltChunk ac = Context.getWmlObjectFactory().createCTAltChunk();
ac.setId(altChunkRel.getId());
wordMLPackage.getMainDocumentPart().addObject(ac);
// .. content type
wordMLPackage.getContentTypeManager().addDefaultContentType("html", "text/html");
ByteArrayOutputStream outStream = new ByteArrayOutputStream(); ByteArrayOutputStream outStream = new ByteArrayOutputStream();
wordMLPackage.save(outStream); wordMLPackage.save(outStream);
return outStream.toByteArray(); return outStream.toByteArray();
} }
private String cleanXMLEnvelope(String value) { private String cleanXMLEnvelope(String value) {
if (value.startsWith(START_RICH_TEXT) && value.endsWith(END_RICH_TEXT)) { if (value.startsWith(START_RICH_TEXT) && value.endsWith(END_RICH_TEXT)) {
value = value.substring(START_RICH_TEXT.length() + 3, value.length() - END_RICH_TEXT.length()); value = value.substring(START_RICH_TEXT.length() + 3, value.length() - END_RICH_TEXT.length());
} }
return value; return value;
} }
private String getImgFixed(String value) { private String fixImageSizes(String value) {
Document doc = Jsoup.parse(value); Document doc = Jsoup.parse(value);
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
Elements elements = doc.select("img"); Elements elements = doc.select("img");
for (Element element : elements) { for (Element element : elements) {
String style = element.attr("style"); String style = element.attr("style");
if (style != null) { if (style != null) {
...@@ -153,4 +158,28 @@ public class DOCXBuilder { ...@@ -153,4 +158,28 @@ public class DOCXBuilder {
return doc.html(); return doc.html();
} }
private String encodeImgAsBase64(String value) {
Document doc = Jsoup.parse(value);
doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
Elements elements = doc.select("img");
for (Element element : elements) {
String src = element.attr("src");
try {
element.attr("src", getDataUriFromUri(src));
} catch(Exception ex) {
}
}
return doc.html();
}
private static String getDataUriFromUri(String url) throws Exception {
HttpClient client = JettyHttpClientFactory.getHttpClient();
Request requestEntity = client.newRequest(url).method("GET");
ContentResponse contentResponse = requestEntity.send();
return "data:"+contentResponse.getMediaType()+";base64,"+Base64.getEncoder().encodeToString(contentResponse.getContent());
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment