HTML5 support

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@145 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
henrikjust 2012-04-01 11:29:12 +00:00
parent ecacd13bce
commit d1ef4d8f8c
14 changed files with 198 additions and 54 deletions

View file

@ -2,6 +2,9 @@ Changelog for Writer2LaTeX version 1.2 -> 1.4
---------- version 1.3.1 alpha ----------
[w2x] Added support for HTML5 as export type (the ConverterFactory understands the pseudo-MIME type text/html5).
The converter creates polyglot HTML5 documents, i.e. documents will be conforming to HTML5 as well as XML standards.
[all] Optimized the parsing of the source document saving some time and space (several intermediate steps and large byte arrays
are now avoided)

Binary file not shown.

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2011 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2011-06-05)
* Version 1.4 (2012-03-30)
*
*/
@ -53,7 +53,7 @@ import writer2latex.util.Misc;
* <pre>java -jar writer2latex.jar [options] source [target]</pre>
* <p>Where the available options are
* <ul>
* <li><code>-latex</code>, <code>-bibtex</code>, <code>-xhtml</code>,
* <li><code>-latex</code>, <code>-bibtex</code>, <code>-html5</code>, <code>-xhtml</code>,
<code>-xhtml+mathml</code>, <code>-xhtml+mathml+xsl</code>, <code>-epub</code>
* <li><code>-recurse</code>
* <li><code>-ultraclean</code>, <code>-clean</code>, <code>-pdfscreen</code>,
@ -328,6 +328,7 @@ public final class Application {
System.out.println(" -xhtml11");
System.out.println(" -xhtml+mathml");
System.out.println(" -xhtml+mathml+xsl");
System.out.println(" -html5");
System.out.println(" -epub");
System.out.println(" -recurse");
System.out.println(" -template[=]<template file>");
@ -360,6 +361,7 @@ public final class Application {
if (sArg.startsWith("-")) { // found an option
if ("-latex".equals(sArg)) { sTargetMIME = MIMETypes.LATEX; }
else if ("-bibtex".equals(sArg)) { sTargetMIME = MIMETypes.BIBTEX; }
else if ("-html5".equals(sArg)) { sTargetMIME = MIMETypes.HTML5; }
else if ("-xhtml".equals(sArg)) { sTargetMIME = MIMETypes.XHTML; }
else if ("-xhtml11".equals(sArg)) { sTargetMIME = MIMETypes.XHTML11; }
else if ("-xhtml+mathml".equals(sArg)) { sTargetMIME = MIMETypes.XHTML_MATHML; }

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-27)
* Version 1.4 (2012-03-28)
*
*/
@ -33,7 +33,7 @@ public class ConverterFactory {
// Version information
private static final String VERSION = "1.3.1";
private static final String DATE = "2012-03-27";
private static final String DATE = "2012-03-28";
/** Return the Writer2LaTeX version in the form
* (major version).(minor version).(patch level)<br/>
@ -61,6 +61,10 @@ public class ConverterFactory {
* <li><code>application/xhtml+xml</code> for XHTML+MathML</li>
* <li><code>application/xml</code> for XHTML+MathML using stylesheets from w3c's
* math working group</li>
* <li><code>text/html5</code> for HTML5 documents
* Note that this is <em>not</em> the recommended media type for HTML5
* (see http://wiki.whatwg.org/), but it is used internally
* by Writer2xhtml to distinguish from HTML5</li>
* <li><code>application/epub+zip</code></li> for EPUB format
* </ul>
*
@ -88,6 +92,9 @@ public class ConverterFactory {
else if (MIMETypes.XHTML_MATHML_XSL.equals(sMIME)) {
converter = createInstance("writer2latex.xhtml.XhtmlMathMLXSLConverter");
}
else if (MIMETypes.HTML5.equals(sMIME)) {
converter = createInstance("writer2latex.xhtml.Html5Converter");
}
else if (MIMETypes.EPUB.equals(sMIME)) {
converter = createInstance("writer2latex.epub.EPUBConverter");
}

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2011 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2011-07-22)
* Version 1.4 (2012-03-27)
*
*/
@ -45,12 +45,14 @@ public class MIMETypes {
public static final String SVM="application/x-openoffice-gdimetafile;windows_formatname=\"GDIMetaFile\"";
public static final String PDF="application/pdf";
// Desitination formats
// Destination formats
public static final String XHTML="text/html";
/** This is a fake Mime type, for internal use only */
/** This is a fake MIME type, for internal use only */
public static final String XHTML11="application/xhtml11";
public static final String XHTML_MATHML="application/xhtml+xml";
public static final String XHTML_MATHML_XSL="application/xml";
/** This is a fake MIME type, for internal use only */
public static final String HTML5="text/html5";
public static final String EPUB="application/epub+zip";
public static final String LATEX="application/x-latex";
public static final String BIBTEX="application/x-bibtex";

View file

@ -20,10 +20,12 @@
*
* All Rights Reserved.
*
* Version 1.2 (2012-02-26)
* Version 1.4 (2012-03-30)
*
*/
// TODO: When polyglot markup uses either a textarea or pre element, the text within the element does not begin with a newline.
package writer2latex.xhtml;
import java.io.File;
@ -650,13 +652,19 @@ public class Converter extends ConverterBase {
Element head = htmlDoc.getHeadNode();
if (head!=null) {
// Declare charset (we need this for xhtml because we have no <?xml ... ?>)
// Declare charset (we need this for XHTML 1.0 strict and HTML5 because we have no <?xml ... ?>)
if (nType==XhtmlDocument.XHTML10) {
Element meta = htmlDOM.createElement("meta");
meta.setAttribute("http-equiv","Content-Type");
meta.setAttribute("content","text/html; charset="+htmlDoc.getEncoding().toLowerCase());
head.appendChild(meta);
}
else if (nType==XhtmlDocument.HTML5) {
// The encoding should be UTF-8, but we still respect the user's setting
Element meta = htmlDOM.createElement("meta");
meta.setAttribute("charset",htmlDoc.getEncoding().toUpperCase());
head.appendChild(meta);
}
// Add meta data (for EPUB the meta data belongs to the .opf file)
if (!bOPS) {

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2010 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2010-05-09)
* Version 1.4 (2012-03-30)
*
*/
@ -77,8 +77,8 @@ public class ConverterHelper {
}
if (info.sLang!=null) {
hnode.setAttribute("xml:lang",info.sLang);
if (converter.getType()==XhtmlDocument.XHTML10) {
hnode.setAttribute("lang",info.sLang); // HTML4 compatibility
if (converter.getType()==XhtmlDocument.XHTML10 || converter.getType()==XhtmlDocument.HTML5) {
hnode.setAttribute("lang",info.sLang); // HTML4 compatibility/polyglot HTML5S
}
}
if (info.sDir!=null) {

View file

@ -0,0 +1,35 @@
/************************************************************************
*
* Html5Converter.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-28)
*
*/
package writer2latex.xhtml;
public class Html5Converter extends Converter {
public Html5Converter() {
super(XhtmlDocument.HTML5);
}
}

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2011 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2011-04-20)
* Version 1.4 (2012-03-28)
*
*/
@ -204,20 +204,28 @@ public class TableConverter extends ConverterHelper {
// Mozilla (like OOo) includes them.
// If the first row contains colspan we have to add <col> anyway
if (!config.xhtmlIgnoreTableDimensions()) {
if (view.getRelTableWidth()!=null) {
for (int nCol=0; nCol<nColCount; nCol++) {
Element col = converter.createElement("col");
hnode.appendChild(col);
col.setAttribute("style","width:"+view.getRelColumnWidth(nCol));
}
}
else if (bFirstRowColSpan) {
for (int nCol=0; nCol<nColCount; nCol++) {
Element col = converter.createElement("col");
hnode.appendChild(col);
col.setAttribute("style","width:"+getTableSc().colScale(view.getColumnWidth(nCol)));
}
}
if (view.getRelTableWidth()!=null || bFirstRowColSpan) {
Element colgroup = hnode;
if (converter.nType==XhtmlDocument.HTML5) {
// Polyglot HTML5 documents must use an explicit colgroup
colgroup = converter.createElement("colgroup");
hnode.appendChild(colgroup);
}
if (view.getRelTableWidth()!=null) {
for (int nCol=0; nCol<nColCount; nCol++) {
Element col = converter.createElement("col");
colgroup.appendChild(col);
col.setAttribute("style","width:"+view.getRelColumnWidth(nCol));
}
}
else if (bFirstRowColSpan) {
for (int nCol=0; nCol<nColCount; nCol++) {
Element col = converter.createElement("col");
colgroup.appendChild(col);
col.setAttribute("style","width:"+getTableSc().colScale(view.getColumnWidth(nCol)));
}
}
}
}
// Indentify head
@ -227,7 +235,13 @@ public class TableConverter extends ConverterHelper {
}
if (nBodyStart==0 || nBodyStart==nRowCount) {
// all body or all head
traverseRows(view,0,nRowCount,hnode);
Element tbody = hnode;
if (converter.nType==XhtmlDocument.HTML5) {
// Polyglot HTML5 documents must use an explicit tbody
tbody = converter.createElement("tbody");
hnode.appendChild(tbody);
}
traverseRows(view,0,nRowCount,tbody);
}
else {
// Create thead

View file

@ -16,15 +16,16 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2010 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2010-10-27)
* Version 1.4 (2012-04-01)
*
*/
//TODO: Add named entities outside ISO-latin 1
//TODO: Add named entities outside ISO-latin 1
//TODO: When polyglot markup uses either a textarea or pre element, the text within the element does not begin with a newline.
package writer2latex.xhtml;
@ -72,12 +73,16 @@ public class XhtmlDocument extends DOMDocument {
*/
public static final int XHTML_MATHML_XSL = 3;
/** Constant to identify HTML5 documents */
public static final int HTML5 = 4;
// Some static data
private static final String[] sExtension = { ".html", ".xhtml", ".xhtml", ".xml" };
private static final String[] sExtension = { ".html", ".xhtml", ".xhtml", ".xml", ".html" };
private static Set<String> blockPrettyPrint;
private static Set<String> conditionalBlockPrettyPrint;
private static Set<String> emptyElements;
private static Set<String> emptyHtml5Elements;
private static String[] entities; // Not convenient to define directly due to a lot of null values
// Type of document
@ -135,7 +140,24 @@ public class XhtmlDocument extends DOMDocument {
emptyElements.add("img");
emptyElements.add("area");
emptyElements.add("input");
emptyElements.add("col");
emptyElements.add("col");
// These elements are empty in HTML5
emptyHtml5Elements = new HashSet<String>();
emptyHtml5Elements.add("base");
emptyHtml5Elements.add("meta");
emptyHtml5Elements.add("link");
emptyHtml5Elements.add("hr");
emptyHtml5Elements.add("br");
emptyHtml5Elements.add("param");
emptyHtml5Elements.add("img");
emptyHtml5Elements.add("area");
emptyHtml5Elements.add("input");
emptyHtml5Elements.add("col");
emptyHtml5Elements.add("command");
emptyHtml5Elements.add("embed");
emptyHtml5Elements.add("keygen");
emptyHtml5Elements.add("source");
// Named character entities (currently only those within the ISO latin 1 range)
entities = new String[256];
@ -262,7 +284,7 @@ public class XhtmlDocument extends DOMDocument {
DocumentBuilder builder = builderFactory.newDocumentBuilder();
DOMImplementation domImpl = builder.getDOMImplementation();
String[] sDocType = getDoctypeStrings();
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
contentDOM = domImpl.createDocument("http://www.w3.org/1999/xhtml","html",doctype);
}
catch (Throwable t) {
@ -287,6 +309,7 @@ public class XhtmlDocument extends DOMDocument {
case XHTML11: return MIMETypes.XHTML_MATHML; // TODO: Change the constant names in MIMETypes, this is a bit confusing...
case XHTML_MATHML: return MIMETypes.XHTML_MATHML;
case XHTML_MATHML_XSL: return MIMETypes.XHTML_MATHML_XSL;
case HTML5: return MIMETypes.HTML5;
}
return "";
}
@ -312,6 +335,7 @@ public class XhtmlDocument extends DOMDocument {
public Element getFooterNode() { return footerNode; }
public void createHeaderFooter() {
// TODO: Use semantic elements for HTML5?
headerNode = getContentDOM().createElement("div");
headerNode.setAttribute("id",sHeaderId);
bodyNode.appendChild(headerNode);
@ -342,7 +366,7 @@ public class XhtmlDocument extends DOMDocument {
DocumentBuilder builder = builderFactory.newDocumentBuilder();
DOMImplementation domImpl = builder.getDOMImplementation();
String[] sDocType = getDoctypeStrings();
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
newDOM = domImpl.createDocument("http://www.w3.org/1999/xhtml",
templateDOM.getDocumentElement().getTagName(),doctype);
setContentDOM(newDOM);
@ -373,7 +397,7 @@ public class XhtmlDocument extends DOMDocument {
}
private String[] getDoctypeStrings() {
// Define publicId and systemId
// Define publicId and systemId (null for HTML5)
String sPublicId = null;
String sSystemId = null;
switch (nType) {
@ -520,8 +544,9 @@ public class XhtmlDocument extends DOMDocument {
// Add a BOM if the user desires so
if (bAddBOM) { osw.write("\uFEFF"); }
// Omit xml prolog for pure xhtml 1.0 strict documents (to be browser safe)
if (nType!=XHTML10) {
// Omit XML prolog for pure XHTML 1.0 strict documents (HTML 4 compaitbility)
// and for HTML5 documents (polyglot document)
if (nType!=XHTML10 && nType!=HTML5) {
osw.write("<?xml version=\"1.0\" encoding=\""+sEncoding+"\" ?>\n");
}
// Either specify doctype or xsl transformation (the user may require
@ -533,13 +558,18 @@ public class XhtmlDocument extends DOMDocument {
osw.write("<?xml-stylesheet type=\"text/xsl\" href=\""+sXsltPath+sSlash+"pmathml.xsl\"?>\n");
}
else if (!bNoDoctype) {
DocumentType docType = getContentDOM().getDoctype();
if (docType!=null) {
osw.write("<!DOCTYPE html PUBLIC \"");
osw.write(docType.getPublicId());
osw.write("\" \"");
osw.write(docType.getSystemId());
osw.write("\">\n");
if (nType==HTML5) {
osw.write("<!DOCTYPE html>\n");
}
else {
DocumentType docType = getContentDOM().getDoctype();
if (docType!=null) {
osw.write("<!DOCTYPE html PUBLIC \"");
osw.write(docType.getPublicId());
osw.write("\" \"");
osw.write(docType.getSystemId());
osw.write("\">\n");
}
}
}
Element doc = getContentDOM().getDocumentElement();
@ -578,8 +608,8 @@ public class XhtmlDocument extends DOMDocument {
}
}
private static boolean isEmpty(String sTagName) {
return emptyElements.contains(sTagName);
private boolean isEmpty(String sTagName) {
return nType==HTML5 ? emptyHtml5Elements.contains(sTagName) : emptyElements.contains(sTagName);
}
// Write nodes; we only need element, text and comment nodes
@ -589,8 +619,9 @@ public class XhtmlDocument extends DOMDocument {
case Node.ELEMENT_NODE:
if (isEmpty(node.getNodeName())) {
// This node must be empty, we ignore child nodes
String sNodeName = node.getNodeName();
if (nLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("<"+node.getNodeName());
osw.write("<"+sNodeName);
writeAttributes(node,osw);
osw.write(" />");
if (nLevel>=0) { osw.write("\n"); }
@ -626,7 +657,7 @@ public class XhtmlDocument extends DOMDocument {
osw.write("<"+node.getNodeName());
writeAttributes(node,osw);
// HTML compatibility: use end-tag even if empty
if (nType<=XHTML11) {
if (nType<=XHTML11 || nType==HTML5) {
osw.write("></"+node.getNodeName()+">");
}
else {

View file

@ -15,6 +15,7 @@
or <emph>File - Export - XHTML 1.1</emph><br/>
or <emph>File - Export - XHTML 1.1 + MathML 2.0</emph><br/>
or <emph>File - Export - XHTML 1.1 + MathML 2.0 (xsl)</emph>
or <emph>File - Export - HTML5</emph>
</section>
<paragraph role="heading" level="2" xml-lang="en-US">Style</paragraph>

View file

@ -13,6 +13,7 @@
<section id="howtoget" xml-lang="en-US">
Choose <emph>File - Export - XHTML 1.0 strict</emph><br/>
or <emph>File - Export - XHTML 1.1</emph>
or <emph>File - Export - HTML5</emph>
</section>
<bookmark xml-lang="en-US" branch="hid/org.openoffice.da.writer2xhtml.oxt:OptionsCalcConfig" id="bm_optionscalc_config"/>

View file

@ -3,6 +3,34 @@
<node oor:name="Filters">
<node oor:name="org.openoffice.da.writer2xhtml5" oor:op="replace" oor:finalized="true" oor:mandatory="true">
<prop oor:name="FileFormatVersion"><value>0</value></prop>
<prop oor:name="Type"><value>writer_xhtml5_File</value></prop>
<prop oor:name="DocumentService"><value>com.sun.star.text.TextDocument</value></prop>
<prop oor:name="UIComponent"><value>org.openoffice.da.comp.writer2xhtml.XhtmlOptionsDialog</value></prop>
<prop oor:name="UserData"><value>org.openoffice.da.comp.writer2xhtml.W2XExportFilter unused com.sun.star.comp.Writer.XMLOasisImporter com.sun.star.comp.Writer.XMLOasisExporter staroffice/sxw text/html5</value></prop>
<prop oor:name="FilterService"><value>com.sun.star.comp.Writer.XmlFilterAdaptor</value></prop>
<prop oor:name="TemplateName"/>
<prop oor:name="UIName">
<value>HTML 5</value>
</prop>
<prop oor:name="Flags"><value>EXPORT ALIEN 3RDPARTYFILTER</value></prop>
</node>
<node oor:name="org.openoffice.da.calc2xhtml5" oor:op="replace" oor:finalized="true" oor:mandatory="true">
<prop oor:name="FileFormatVersion"><value>0</value></prop>
<prop oor:name="Type"><value>writer_xhtml5_File</value></prop>
<prop oor:name="DocumentService"><value>com.sun.star.sheet.SpreadsheetDocument</value></prop>
<prop oor:name="UIComponent"><value>org.openoffice.da.comp.writer2xhtml.XhtmlOptionsDialogCalc</value></prop>
<prop oor:name="UserData"><value>org.openoffice.da.comp.writer2xhtml.W2XExportFilter unused com.sun.star.comp.Calc.XMLOasisImporter com.sun.star.comp.Calc.XMLOasisExporter staroffice/sxc text/html5</value></prop>
<prop oor:name="FilterService"><value>com.sun.star.comp.Writer.XmlFilterAdaptor</value></prop><!-- sic! -->
<prop oor:name="TemplateName"/>
<prop oor:name="UIName">
<value>HTML5</value>
</prop>
<prop oor:name="Flags"><value>EXPORT ALIEN 3RDPARTYFILTER</value></prop>
</node>
<node oor:name="org.openoffice.da.writer2xhtml" oor:op="replace" oor:finalized="true" oor:mandatory="true">
<prop oor:name="FileFormatVersion"><value>0</value></prop>
<prop oor:name="Type"><value>writer_xhtml10_File</value></prop>

View file

@ -3,6 +3,18 @@
<node oor:name="Types">
<node oor:name="writer_xhtml5_File" oor:op="replace" oor:finalized="true" oor:mandatory="true">
<prop oor:name="DetectService"/>
<prop oor:name="URLPattern"/>
<prop oor:name="Extensions"><value>html</value></prop>
<prop oor:name="MediaType"/>
<prop oor:name="Preferred"><value>false</value></prop>
<prop oor:name="UIName">
<value>HTML 5</value>
</prop>
<prop oor:name="ClipboardFormat"/>
</node>
<node oor:name="writer_xhtml10_File" oor:op="replace" oor:finalized="true" oor:mandatory="true">
<prop oor:name="DetectService"/>
<prop oor:name="URLPattern"/>