997 lines
40 KiB
Java
997 lines
40 KiB
Java
/************************************************************************
|
|
*
|
|
* XhtmlDocument.java
|
|
*
|
|
* Copyright: 2002-2015 by Henrik Just
|
|
*
|
|
* This file is part of Writer2LaTeX.
|
|
*
|
|
* Writer2LaTeX is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Writer2LaTeX is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Writer2LaTeX. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* Version 1.6 (2015-05-05)
|
|
*
|
|
*/
|
|
|
|
//TODO: Add named entities outside ISO-latin 1
|
|
//TODO: When polyglot markup uses either a textarea or pre element, the text within the element does not begin with a newline.
|
|
|
|
package writer2latex.xhtml;
|
|
|
|
import org.w3c.dom.NodeList;
|
|
import org.w3c.dom.Element;
|
|
import org.w3c.dom.Node;
|
|
import org.w3c.dom.NamedNodeMap;
|
|
import org.w3c.dom.Document;
|
|
import org.w3c.dom.DocumentType;
|
|
import org.w3c.dom.DOMImplementation;
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
import javax.xml.parsers.DocumentBuilder;
|
|
import javax.xml.parsers.ParserConfigurationException;
|
|
|
|
import writer2latex.base.DOMDocument;
|
|
import writer2latex.office.XMLString;
|
|
|
|
import java.io.InputStream;
|
|
import java.io.OutputStream;
|
|
import java.io.OutputStreamWriter;
|
|
import java.io.IOException;
|
|
import java.util.HashSet;
|
|
import java.util.Set;
|
|
|
|
/**
|
|
* An implementation of <code>Document</code> for
|
|
* XHTML documents.
|
|
*/
|
|
public class XhtmlDocument extends DOMDocument {
|
|
|
|
/** Constant to identify XHTML 1.0 strict documents */
|
|
public static final int XHTML10 = 0;
|
|
|
|
/** Constant to identify XHTML 1.1 documents */
|
|
public static final int XHTML11 = 1;
|
|
|
|
/** Constant to identify XHTML + MathML documents */
|
|
public static final int XHTML_MATHML = 2;
|
|
|
|
/** Constant to identify HTML5 documents */
|
|
public static final int HTML5 = 3;
|
|
|
|
// Some static data
|
|
private static final String[] sExtension = { ".html", ".xhtml", ".xhtml", ".html" };
|
|
|
|
private static Set<String> blockPrettyPrint;
|
|
private static Set<String> conditionalBlockPrettyPrint;
|
|
private static Set<String> emptyElements;
|
|
private static Set<String> emptyHtml5Elements;
|
|
private static String[] entities; // Not convenient to define directly due to a lot of null values
|
|
|
|
// Type of document
|
|
private int nType;
|
|
|
|
// Configuration
|
|
private String sEncoding = "UTF-8";
|
|
private boolean bUseNamedEntities = false;
|
|
private boolean bHexadecimalEntities = true;
|
|
private char cLimit = 65535;
|
|
private boolean bNoDoctype = false;
|
|
private boolean bAddBOM = false;
|
|
private boolean bPrettyPrint = true;
|
|
private String sContentId = "content";
|
|
private String sHeaderId = "header";
|
|
private String sFooterId = "footer";
|
|
private String sPanelId = "panel";
|
|
|
|
// Content
|
|
private Element headNode = null;
|
|
private Element bodyNode = null;
|
|
private Element titleNode = null;
|
|
private Element contentNode = null;
|
|
private Element panelNode = null;
|
|
private Element headerNode = null;
|
|
private Element footerNode = null;
|
|
|
|
// Initialize static data
|
|
static {
|
|
// Paragraphs and headings always block pretty printing
|
|
blockPrettyPrint = new HashSet<String>();
|
|
blockPrettyPrint.add("p");
|
|
blockPrettyPrint.add("h1");
|
|
blockPrettyPrint.add("h2");
|
|
blockPrettyPrint.add("h3");
|
|
blockPrettyPrint.add("h4");
|
|
blockPrettyPrint.add("h5");
|
|
blockPrettyPrint.add("h6");
|
|
|
|
// List items and table cells may block pretty printing, depending on the context
|
|
conditionalBlockPrettyPrint = new HashSet<String>();
|
|
conditionalBlockPrettyPrint.add("li");
|
|
conditionalBlockPrettyPrint.add("th");
|
|
conditionalBlockPrettyPrint.add("td");
|
|
|
|
// These elements are empty
|
|
emptyElements = new HashSet<String>();
|
|
emptyElements.add("base");
|
|
emptyElements.add("meta");
|
|
emptyElements.add("link");
|
|
emptyElements.add("hr");
|
|
emptyElements.add("br");
|
|
emptyElements.add("param");
|
|
emptyElements.add("img");
|
|
emptyElements.add("area");
|
|
emptyElements.add("input");
|
|
emptyElements.add("col");
|
|
|
|
// These elements are empty in HTML5
|
|
emptyHtml5Elements = new HashSet<String>();
|
|
emptyHtml5Elements.add("base");
|
|
emptyHtml5Elements.add("meta");
|
|
emptyHtml5Elements.add("link");
|
|
emptyHtml5Elements.add("hr");
|
|
emptyHtml5Elements.add("br");
|
|
emptyHtml5Elements.add("param");
|
|
emptyHtml5Elements.add("img");
|
|
emptyHtml5Elements.add("area");
|
|
emptyHtml5Elements.add("input");
|
|
emptyHtml5Elements.add("col");
|
|
emptyHtml5Elements.add("command");
|
|
emptyHtml5Elements.add("embed");
|
|
emptyHtml5Elements.add("keygen");
|
|
emptyHtml5Elements.add("source");
|
|
|
|
// Named character entities (currently only those within the ISO latin 1 range)
|
|
entities = new String[256];
|
|
// Latin 1 symbols
|
|
entities[160]=" ";
|
|
entities[161]="¡";
|
|
entities[162]="¢";
|
|
entities[163]="£";
|
|
entities[164]="¤";
|
|
entities[165]="¥";
|
|
entities[166]="¦";
|
|
entities[167]="§";
|
|
entities[168]="¨";
|
|
entities[169]="©";
|
|
entities[170]="ª";
|
|
entities[171]="«";
|
|
entities[172]="¬";
|
|
entities[173]="­";
|
|
entities[174]="®";
|
|
entities[175]="¯";
|
|
entities[176]="°";
|
|
entities[177]="±";
|
|
entities[178]="²";
|
|
entities[179]="³";
|
|
entities[180]="´";
|
|
entities[181]="µ";
|
|
entities[182]="¶";
|
|
entities[183]="·";
|
|
entities[184]="¸";
|
|
entities[185]="¹";
|
|
entities[186]="º";
|
|
entities[187]="»";
|
|
entities[188]="¼";
|
|
entities[189]="½";
|
|
entities[190]="¾";
|
|
entities[191]="¿";
|
|
entities[215]="×";
|
|
entities[247]="÷";
|
|
// Latin 1 characters
|
|
entities[192]="À";
|
|
entities[193]="Á";
|
|
entities[194]="Â";
|
|
entities[195]="Ã";
|
|
entities[196]="Ä";
|
|
entities[197]="Å";
|
|
entities[198]="Æ";
|
|
entities[199]="Ç";
|
|
entities[200]="È";
|
|
entities[201]="É";
|
|
entities[202]="Ê";
|
|
entities[203]="Ë";
|
|
entities[204]="Ì";
|
|
entities[205]="Í";
|
|
entities[206]="Î";
|
|
entities[207]="Ï";
|
|
entities[208]="Ð";
|
|
entities[209]="Ñ";
|
|
entities[210]="Ò";
|
|
entities[211]="Ó";
|
|
entities[212]="Ô";
|
|
entities[213]="Õ";
|
|
entities[214]="Ö";
|
|
entities[216]="Ø";
|
|
entities[217]="Ù";
|
|
entities[218]="Ú";
|
|
entities[219]="Û";
|
|
entities[220]="Ü";
|
|
entities[221]="Ý";
|
|
entities[222]="Þ";
|
|
entities[223]="ß";
|
|
entities[224]="à";
|
|
entities[225]="á";
|
|
entities[226]="â";
|
|
entities[227]="ã";
|
|
entities[228]="ä";
|
|
entities[229]="å";
|
|
entities[230]="æ";
|
|
entities[231]="ç";
|
|
entities[232]="è";
|
|
entities[233]="é";
|
|
entities[234]="ê";
|
|
entities[235]="ë";
|
|
entities[236]="ì";
|
|
entities[237]="í";
|
|
entities[238]="î";
|
|
entities[239]="ï";
|
|
entities[240]="ð";
|
|
entities[241]="ñ";
|
|
entities[242]="ò";
|
|
entities[243]="ó";
|
|
entities[244]="ô";
|
|
entities[245]="õ";
|
|
entities[246]="ö";
|
|
entities[248]="ø";
|
|
entities[249]="ù";
|
|
entities[250]="ú";
|
|
entities[251]="û";
|
|
entities[252]="ü";
|
|
entities[253]="ý";
|
|
entities[254]="þ";
|
|
entities[255]="ÿ";
|
|
}
|
|
|
|
public static final String getExtension(int nType) {
|
|
return sExtension[nType];
|
|
}
|
|
|
|
/**
|
|
* Constructor. This constructor also creates the DOM (minimal: root, head,
|
|
* title and body node only)
|
|
* @param name name of this document
|
|
* @param nType the type of document
|
|
*/
|
|
public XhtmlDocument(String name, int nType) {
|
|
super(name,sExtension[nType]);
|
|
this.nType = nType;
|
|
|
|
// create DOM
|
|
Document contentDOM = null;
|
|
try {
|
|
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
|
|
DocumentBuilder builder = builderFactory.newDocumentBuilder();
|
|
DOMImplementation domImpl = builder.getDOMImplementation();
|
|
String[] sDocType = getDoctypeStrings();
|
|
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
|
|
contentDOM = domImpl.createDocument("http://www.w3.org/1999/xhtml","html",doctype);
|
|
contentDOM.getDocumentElement().setAttribute("xmlns","http://www.w3.org/1999/xhtml");
|
|
// add head, title and body
|
|
headNode = contentDOM.createElement("head");
|
|
titleNode = contentDOM.createElement("title");
|
|
bodyNode = contentDOM.createElement("body");
|
|
contentDOM.getDocumentElement().appendChild(headNode);
|
|
headNode.appendChild(titleNode);
|
|
contentDOM.getDocumentElement().appendChild(bodyNode);
|
|
contentNode = bodyNode;
|
|
setContentDOM(contentDOM);
|
|
}
|
|
catch (ParserConfigurationException e) {
|
|
// The newDocumentBuilder() method may in theory throw this, but this will not happen
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
@Override public String getMIMEType() {
|
|
// Get the real MIME type, not the pseudo ones used by the converter API
|
|
// We always produce XHTML, thus
|
|
return "application/xhtml+xml";
|
|
}
|
|
|
|
@Override public boolean isMasterDocument() {
|
|
return true;
|
|
}
|
|
|
|
@Override public boolean containsMath() {
|
|
return bodyNode!=null ? containsMath(bodyNode) : false;
|
|
}
|
|
|
|
public Element getHeadNode() { return headNode; }
|
|
|
|
public Element getBodyNode() { return bodyNode; }
|
|
|
|
public Element getTitleNode() { return titleNode; }
|
|
|
|
public Element getContentNode() { return contentNode; }
|
|
|
|
public void setContentNode(Element contentNode) { this.contentNode = contentNode; }
|
|
|
|
public Element getPanelNode() { return panelNode; }
|
|
|
|
public Element getHeaderNode() { return headerNode; }
|
|
|
|
public Element getFooterNode() { return footerNode; }
|
|
|
|
public void createHeaderFooter() {
|
|
if (nType==HTML5) {
|
|
Element header1 = getContentDOM().createElement("header");
|
|
bodyNode.appendChild(header1);
|
|
headerNode = getContentDOM().createElement("nav");
|
|
header1.appendChild(headerNode);
|
|
}
|
|
else {
|
|
headerNode = getContentDOM().createElement("div");
|
|
bodyNode.appendChild(headerNode);
|
|
}
|
|
headerNode.setAttribute("id",sHeaderId);
|
|
|
|
contentNode = getContentDOM().createElement("div");
|
|
contentNode.setAttribute("id",sContentId);
|
|
bodyNode.appendChild(contentNode);
|
|
|
|
if (nType==HTML5) {
|
|
Element footer1 = getContentDOM().createElement("footer");
|
|
bodyNode.appendChild(footer1);
|
|
footerNode = getContentDOM().createElement("nav");
|
|
footer1.appendChild(footerNode);
|
|
}
|
|
else {
|
|
footerNode = getContentDOM().createElement("div");
|
|
bodyNode.appendChild(footerNode);
|
|
}
|
|
footerNode.setAttribute("id",sFooterId);
|
|
}
|
|
|
|
public void setContentDOM(Document doc) {
|
|
super.setContentDOM(doc);
|
|
collectNodes();
|
|
}
|
|
|
|
/** Does this document contain any math nodes?
|
|
*
|
|
* @return true if so
|
|
*/
|
|
public boolean hasMath() {
|
|
return hasMath(getContentDOM().getDocumentElement());
|
|
}
|
|
|
|
private boolean hasMath(Element node) {
|
|
// Check this element
|
|
if (node.getTagName().equals(XMLString.MATH)) {
|
|
return true;
|
|
}
|
|
// Check children
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE && hasMath((Element)child)) {
|
|
return true;
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
// Found nothing
|
|
return false;
|
|
}
|
|
|
|
public void read(InputStream is) throws IOException {
|
|
super.read(is);
|
|
collectNodes();
|
|
}
|
|
|
|
public void readFromTemplate(XhtmlDocument template) {
|
|
// create a new DOM
|
|
Document templateDOM = template.getContentDOM();
|
|
Document newDOM = null;
|
|
try {
|
|
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
|
|
DocumentBuilder builder = builderFactory.newDocumentBuilder();
|
|
DOMImplementation domImpl = builder.getDOMImplementation();
|
|
String[] sDocType = getDoctypeStrings();
|
|
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
|
|
newDOM = domImpl.createDocument("http://www.w3.org/1999/xhtml",
|
|
templateDOM.getDocumentElement().getTagName(),doctype);
|
|
setContentDOM(newDOM);
|
|
|
|
// Import attributes on root element
|
|
Element templateRoot = templateDOM.getDocumentElement();
|
|
Element newRoot = newDOM.getDocumentElement();
|
|
NamedNodeMap attributes = templateRoot.getAttributes();
|
|
int nCount = attributes.getLength();
|
|
for (int i=0; i<nCount; i++) {
|
|
Node attrNode = attributes.item(i);
|
|
newRoot.setAttribute(attrNode.getNodeName(), attrNode.getNodeValue());
|
|
}
|
|
|
|
// Import all child nodes from template
|
|
NodeList children = templateRoot.getChildNodes();
|
|
int nLen = children.getLength();
|
|
for (int i=0; i<nLen; i++) {
|
|
newRoot.appendChild(getContentDOM().importNode(children.item(i),true));
|
|
}
|
|
|
|
// get the entry point nodes
|
|
collectNodes();
|
|
}
|
|
catch (Throwable t) {
|
|
t.printStackTrace();
|
|
}
|
|
}
|
|
|
|
private String[] getDoctypeStrings() {
|
|
// Define publicId and systemId (null for HTML5)
|
|
String sPublicId = null;
|
|
String sSystemId = null;
|
|
switch (nType) {
|
|
case XHTML10 :
|
|
sPublicId = "-//W3C//DTD XHTML 1.0 Strict//EN";
|
|
sSystemId = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
|
|
break;
|
|
case XHTML11 :
|
|
sPublicId = "-//W3C//DTD XHTML 1.1//EN";
|
|
sSystemId = "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd";
|
|
break;
|
|
case XHTML_MATHML :
|
|
sPublicId = "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN";
|
|
sSystemId = "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd";
|
|
//sSystemId = "http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd"; (old version)
|
|
/* An alternative is to use XHTML + MathML + SVG:
|
|
sPublicId = "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN",
|
|
sSystemId = "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"); */
|
|
}
|
|
return new String[] { sPublicId, sSystemId };
|
|
}
|
|
|
|
private void collectNodes(Element elm) {
|
|
String sTagName = elm.getTagName();
|
|
if ("head".equals(sTagName)) {
|
|
headNode = elm;
|
|
}
|
|
else if ("body".equals(sTagName)) {
|
|
bodyNode = elm;
|
|
}
|
|
else if ("title".equals(sTagName)) {
|
|
titleNode = elm;
|
|
}
|
|
else {
|
|
String sId = elm.getAttribute("id");
|
|
if (sContentId.equals(sId)) { contentNode = elm; }
|
|
else if (sHeaderId.equals(sId)) { headerNode = elm; }
|
|
else if (sFooterId.equals(sId)) { footerNode = elm; }
|
|
else if (sPanelId.equals(sId)) { panelNode = elm; }
|
|
}
|
|
|
|
Node child = elm.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE) {
|
|
collectNodes((Element)child);
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
}
|
|
|
|
private void collectNodes() {
|
|
headNode = null;
|
|
bodyNode = null;
|
|
titleNode = null;
|
|
contentNode = null;
|
|
headerNode = null;
|
|
footerNode = null;
|
|
panelNode = null;
|
|
|
|
Element elm = getContentDOM().getDocumentElement();
|
|
collectNodes(elm);
|
|
if (contentNode==null) { contentNode = bodyNode!=null ? bodyNode : elm; }
|
|
if (headNode!=null && titleNode==null) {
|
|
titleNode = getContentDOM().createElement("title");
|
|
headNode.appendChild(titleNode);
|
|
}
|
|
}
|
|
|
|
public void setConfig(XhtmlConfig config) {
|
|
sEncoding = config.xhtmlEncoding().toUpperCase();
|
|
if ("UTF-16".equals(sEncoding)) {
|
|
cLimit = 65535;
|
|
}
|
|
else if ("ISO-8859-1".equals(sEncoding)) {
|
|
cLimit = 255;
|
|
}
|
|
else if ("US-ASCII".equals(sEncoding)) {
|
|
cLimit = 127;
|
|
}
|
|
else {
|
|
sEncoding = "UTF-8";
|
|
cLimit = 65535;
|
|
}
|
|
|
|
bAddBOM = config.xhtmlAddBOM() && sEncoding.equals("UTF-8");
|
|
bNoDoctype = config.xhtmlNoDoctype();
|
|
bPrettyPrint = config.prettyPrint();
|
|
bUseNamedEntities = config.useNamedEntities();
|
|
bHexadecimalEntities = config.hexadecimalEntities();
|
|
|
|
String[] sTemplateIds = config.templateIds().split(",");
|
|
int nIdCount = sTemplateIds.length;
|
|
if (nIdCount>0 && sTemplateIds[0].trim().length()>0) sContentId = sTemplateIds[0].trim(); else sContentId = "content";
|
|
if (nIdCount>1) sHeaderId = sTemplateIds[1].trim(); else sHeaderId = "header";
|
|
if (nIdCount>2) sFooterId = sTemplateIds[2].trim(); else sFooterId = "footer";
|
|
if (nIdCount>3) sPanelId = sTemplateIds[3].trim(); else sPanelId = "panel";
|
|
}
|
|
|
|
public String getEncoding() { return sEncoding; }
|
|
|
|
public String getFileExtension() { return super.getFileExtension(); }
|
|
|
|
// Optimize the usage of xml:dir and xml:lang attributes
|
|
private void optimize(Element node, String sLang, String sDir) {
|
|
if (node.hasAttribute("xml:lang")) {
|
|
if (node.getAttribute("xml:lang").equals(sLang)) {
|
|
node.removeAttribute("xml:lang");
|
|
if (node.hasAttribute("lang")) {
|
|
node.removeAttribute("lang");
|
|
}
|
|
}
|
|
else {
|
|
sLang = node.getAttribute("xml:lang");
|
|
}
|
|
}
|
|
if (node.hasAttribute("xml:dir")) {
|
|
if (node.getAttribute("xml:dir").equals(sDir)) {
|
|
node.removeAttribute("xml:dir");
|
|
}
|
|
else {
|
|
sDir = node.getAttribute("xml:dir");
|
|
}
|
|
}
|
|
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE) {
|
|
optimize((Element)child, sLang, sDir);
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Write out content to the supplied <code>OutputStream</code>.
|
|
* (with pretty printing)
|
|
* @param os XML <code>OutputStream</code>.
|
|
* @throws IOException If any I/O error occurs.
|
|
*/
|
|
public void write(OutputStream os) throws IOException {
|
|
OutputStreamWriter osw = new OutputStreamWriter(os,sEncoding);
|
|
// Add a BOM if the user desires so
|
|
if (bAddBOM) { osw.write("\uFEFF"); }
|
|
|
|
// Omit XML prolog for pure XHTML 1.0 strict documents (HTML 4 compaitbility)
|
|
// and for HTML5 documents (polyglot document)
|
|
if (nType!=XHTML10 && nType!=HTML5) {
|
|
osw.write("<?xml version=\"1.0\" encoding=\""+sEncoding+"\" ?>\n");
|
|
}
|
|
// Specify DOCTYPE (the user may require that no DOCTYPE is used;
|
|
// this may be desirable for further transformations)
|
|
if (!bNoDoctype) {
|
|
if (nType==HTML5) {
|
|
osw.write("<!DOCTYPE html>\n");
|
|
}
|
|
else {
|
|
DocumentType docType = getContentDOM().getDoctype();
|
|
if (docType!=null) {
|
|
osw.write("<!DOCTYPE html PUBLIC \"");
|
|
osw.write(docType.getPublicId());
|
|
osw.write("\" \"");
|
|
osw.write(docType.getSystemId());
|
|
osw.write("\">\n");
|
|
}
|
|
}
|
|
}
|
|
Element doc = getContentDOM().getDocumentElement();
|
|
|
|
optimize(doc,null,null);
|
|
write(doc,bPrettyPrint ? 0 : -1,osw);
|
|
osw.flush();
|
|
osw.close();
|
|
}
|
|
|
|
private static boolean blockThis(Element node) {
|
|
String sTagName = node.getTagName();
|
|
if (blockPrettyPrint.contains(sTagName)) {
|
|
return true;
|
|
}
|
|
else if (conditionalBlockPrettyPrint.contains(sTagName)) {
|
|
// Block pretty printing if the content is anything but elements that block pretty print
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE && !blockPrettyPrint.contains(child.getNodeName())) {
|
|
return true;
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
return false;
|
|
}
|
|
else {
|
|
// Other elements block pretty printing if they contain text nodes
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.TEXT_NODE) {
|
|
return true;
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private boolean isEmpty(String sTagName) {
|
|
return nType==HTML5 ? emptyHtml5Elements.contains(sTagName) : emptyElements.contains(sTagName);
|
|
}
|
|
|
|
// Write nodes; we only need element, text and comment nodes
|
|
private void write(Node node, int nLevel, OutputStreamWriter osw) throws IOException {
|
|
short nType = node.getNodeType();
|
|
switch (nType) {
|
|
case Node.ELEMENT_NODE:
|
|
if (isEmpty(node.getNodeName())) {
|
|
// This node must be empty, we ignore child nodes
|
|
String sNodeName = node.getNodeName();
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<"+sNodeName);
|
|
writeAttributes(node,osw);
|
|
osw.write(" />");
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
else if (node.hasChildNodes()) {
|
|
int nNextLevel = (nLevel<0 || blockThis((Element)node)) ? -1 : nLevel+1;
|
|
// Print start tag
|
|
boolean bRedundantElement = !node.hasAttributes() &&
|
|
(node.getNodeName().equals("a") || node.getNodeName().equals("span"));
|
|
if (!bRedundantElement) {
|
|
// Writer2xhtml may produce <a> and <span> without attributes, these are removed here
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<"+node.getNodeName());
|
|
writeAttributes(node,osw);
|
|
osw.write(">");
|
|
if (nNextLevel>=0) { osw.write("\n"); }
|
|
}
|
|
// Print children
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
write(child,nNextLevel,osw);
|
|
child = child.getNextSibling();
|
|
}
|
|
// Print end tag
|
|
if (!bRedundantElement) {
|
|
if (nNextLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("</"+node.getNodeName()+">");
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
}
|
|
else { // empty element
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<"+node.getNodeName());
|
|
writeAttributes(node,osw);
|
|
// HTML compatibility: use end-tag even if empty
|
|
if (nType<=XHTML11 || nType==HTML5) {
|
|
osw.write("></"+node.getNodeName()+">");
|
|
}
|
|
else {
|
|
osw.write(" />");
|
|
}
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
break;
|
|
case Node.TEXT_NODE:
|
|
write(node.getNodeValue(),osw);
|
|
break;
|
|
case Node.COMMENT_NODE:
|
|
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<!-- ");
|
|
//write(node.getNodeValue(),osw);
|
|
osw.write(node.getNodeValue());
|
|
osw.write(" -->");
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
}
|
|
|
|
private void writeAttributes(Node node, OutputStreamWriter osw) throws IOException {
|
|
NamedNodeMap attr = node.getAttributes();
|
|
int nLen = attr.getLength();
|
|
for (int i=0; i<nLen; i++) {
|
|
Node item = attr.item(i);
|
|
osw.write(" ");
|
|
write(item.getNodeName(),osw);
|
|
osw.write("=\"");
|
|
writeAttribute(item.getNodeValue(),osw);
|
|
osw.write("\"");
|
|
}
|
|
}
|
|
|
|
private void writeSpaces(int nCount, OutputStreamWriter osw) throws IOException {
|
|
for (int i=0; i<nCount; i++) { osw.write(" "); }
|
|
}
|
|
|
|
private void write(String s, OutputStreamWriter osw) throws IOException {
|
|
// Allow null strings, though this means there is a bug somewhere...
|
|
if (s==null) { osw.write("null"); return; }
|
|
int nLen = s.length();
|
|
char c;
|
|
for (int i=0; i<nLen; i++) {
|
|
c = s.charAt(i);
|
|
switch (c) {
|
|
case ('<'): osw.write("<"); break;
|
|
case ('>'): osw.write(">"); break;
|
|
case ('&'): osw.write("&"); break;
|
|
default:
|
|
write(c,osw);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void writeAttribute(String s, OutputStreamWriter osw) throws IOException {
|
|
int nLen = s.length();
|
|
char c;
|
|
for (int i=0; i<nLen; i++) {
|
|
c = s.charAt(i);
|
|
switch (c) {
|
|
case ('<'): osw.write("<"); break;
|
|
case ('>'): osw.write(">"); break;
|
|
case ('&'): osw.write("&"); break;
|
|
case ('"'): osw.write("""); break;
|
|
case ('\''): osw.write( nType == XHTML10 ? "'" : "'"); break;
|
|
default:
|
|
write(c,osw);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void write(char c, OutputStreamWriter osw) throws IOException {
|
|
if (bUseNamedEntities) {
|
|
if (c<256 && entities[c]!=null) {
|
|
// XHTML has a named entity here
|
|
osw.write(entities[c]);
|
|
return;
|
|
}
|
|
String s=getMathMLEntity(c);
|
|
if (s!=null && (nType==XHTML_MATHML)) {
|
|
// There's a MathML entity to use
|
|
osw.write(s);
|
|
return;
|
|
}
|
|
}
|
|
if (c>cLimit) {
|
|
if (bHexadecimalEntities) {
|
|
osw.write("&#x"+Integer.toHexString(c).toUpperCase()+";");
|
|
}
|
|
else {
|
|
osw.write("&#"+Integer.toString(c).toUpperCase()+";");
|
|
}
|
|
}
|
|
else {
|
|
osw.write(c);
|
|
}
|
|
}
|
|
|
|
|
|
// Translate character to MathML entity (contributed by Bruno Mascret)
|
|
private String getMathMLEntity(char c) {
|
|
switch (c) {
|
|
case '\u0192': return "ƒ";// lettre minuscule latine f hameon
|
|
case '\u0391': return "Α";// lettre majuscule grecque alpha
|
|
case '\u0392': return "Β";// lettre majuscule grecque beta
|
|
case '\u0393': return "Γ";// lettre majuscule grecque gamma
|
|
case '\u0394': return "Δ";// lettre majuscule grecque delta
|
|
case '\u0395': return "Ε";// lettre majuscule grecque epsilon
|
|
case '\u0396': return "Ζ";// lettre majuscule grecque zeta
|
|
case '\u0397': return "Η";// lettre majuscule grecque eta
|
|
case '\u0398': return "Θ";// lettre majuscule grecque theta
|
|
case '\u0399': return "Ι";// lettre majuscule grecque iota
|
|
case '\u039A': return "Κ";// lettre majuscule grecque kappa
|
|
case '\u039B': return "Λ";// lettre majuscule grecque lambda
|
|
case '\u039C': return "Μ";// lettre majuscule grecque mu
|
|
case '\u039D': return "Ν";// lettre majuscule grecque nu
|
|
case '\u039E': return "Ξ";// lettre majuscule grecque xi
|
|
case '\u039F': return "Ο";// lettre majuscule grecque omicron
|
|
case '\u03A0': return "Π";// lettre majuscule grecque pi
|
|
case '\u03A1': return "Ρ";// lettre majuscule grecque rho
|
|
case '\u03A3': return "Σ";// lettre majuscule grecque sigma (Il n'y pas de caractere Sigmaf ni U+03A2 non plus)
|
|
case '\u03A4': return "Τ";// lettre majuscule grecque tau
|
|
case '\u03A5': return "Υ";// lettre majuscule grecque upsilon
|
|
case '\u03A6': return "Φ";// lettre majuscule grecque phi
|
|
case '\u03A7': return "Χ";// lettre majuscule grecque chi
|
|
case '\u03A8': return "Ψ";// lettre majuscule grecque psi
|
|
case '\u03A9': return "Ω";// lettre majuscule grecque omega
|
|
case '\u03B1': return "α";// lettre minuscule grecque alpha
|
|
case '\u03B2': return "β";// lettre minuscule grecque beta
|
|
case '\u03B3': return "γ";// lettre minuscule grecque gamma
|
|
case '\u03B4': return "δ";// lettre minuscule grecque delta
|
|
//case '\u03B4': return "δ";// lettre minuscule grecque delta
|
|
case '\u03B5': return "ε";// lettre minuscule grecque epsilon
|
|
case '\u03B6': return "ζ";// lettre minuscule grecque zeta
|
|
case '\u03B7': return "η";// lettre minuscule grecque eta
|
|
case '\u03B8': return "θ";// lettre minuscule grecque theta
|
|
case '\u03B9': return "ι";// lettre minuscule grecque iota
|
|
case '\u03BA': return "κ";// lettre minuscule grecque kappa
|
|
case '\u03BB': return "λ";// lettre minuscule grecque lambda
|
|
case '\u03BC': return "μ";// lettre minuscule grecque mu
|
|
case '\u03BD': return "ν";// lettre minuscule grecque nu
|
|
case '\u03BE': return "ξ";// lettre minuscule grecque xi
|
|
case '\u03BF': return "ο";// lettre minuscule grecque omicron
|
|
case '\u03C0': return "π";// lettre minuscule grecque pi
|
|
case '\u03C1': return "ρ";// lettre minuscule grecque rho
|
|
case '\u03C2': return "ς";// lettre minuscule grecque final sigma
|
|
case '\u03C3': return "σ";// lettre minuscule grecque sigma
|
|
case '\u03C4': return "τ";// lettre minuscule grecque tau
|
|
case '\u03C5': return "υ";// lettre minuscule grecque upsilon
|
|
case '\u03C6': return "φ";// lettre minuscule grecque phi
|
|
case '\u03C7': return "χ";// lettre minuscule grecque chi
|
|
case '\u03C8': return "ψ";// lettre minuscule grecque psi
|
|
case '\u03C9': return "ω";// lettre minuscule grecque omega
|
|
case '\u03D1': return "ϑ";// lettre minuscule grecque theta symbol
|
|
case '\u03D2': return "ϒ";// symbole grec upsilon crochet
|
|
case '\u03D6': return "ϖ";// symbole grec pi
|
|
case '\u2022': return "•";// puce (Ce N'EST PAS la meme chose que l'operateur puce, U+2219)
|
|
case '\u2026': return "…";// points de suspension
|
|
case '\u2032': return "′";// prime
|
|
case '\u2033': return "″";// double prime
|
|
case '\u203E': return "‾";// tiret en chef
|
|
case '\u2044': return "⁄";// barre de fraction
|
|
case '\u2118': return "℘";// fonction elliptique de Weierstrass
|
|
case '\u2111': return "ℑ";// majuscule I gothique = partie imaginaire
|
|
case '\u211C': return "ℜ";// majuscule R gothique = partie reelle
|
|
case '\u2122': return "™";// symbole marque commerciale
|
|
case '\u2135': return "ℵ";// symbole alef = premier nombre transfini (Le symbole alef N'EST PAS pareil a la lettre hebreue alef, U+05D0 meme si on pourrait utiliser le meme glyphe pour representer les deux caracteres)
|
|
case '\u2190': return "←";// fleche vers la gauche
|
|
case '\u2191': return "↑";// fleche vers le haut
|
|
case '\u2192': return "→";// fleche vers la droite
|
|
case '\u2193': return "↓";// fleche vers le bas
|
|
case '\u2194': return "↔";// fleche bilaterale
|
|
case '\u21B5': return "↵";// fleche vers le bas avec coin vers la gauche = retour de chariot
|
|
case '\u21D0': return "⇐";// double fleche vers la gauche (ISO 10646 ne dit pas que lArr est la meme chose que la fleche 'est implique par' et n'a pas non plus d'autre caractere pour cette fonction. Alors ? On peut utiliser lArr pour 'est implique par' comme le suggere)
|
|
case '\u21D1': return "⇑";// double fleche vers le haut
|
|
case '\u21D2': return "⇒";// double fleche vers la droite (ISO 10646 ne dit pas qu'il s'agit du caractere 'implique' et n'a pas non plus d'autre caractere avec cette fonction. Alors ? On peut utiliser rArr pour 'implique' comme le suggere)
|
|
case '\u21D3': return "⇓";// double fleche vers le bas
|
|
case '\u21D4': return "⇔";// double fleche bilaterale
|
|
case '\u2200': return "∀";// pour tous
|
|
case '\u2202': return "∂";// derivee partielle
|
|
case '\u2203': return "∃";// il existe
|
|
case '\u2205': return "∅";// ensemble vide = symbole diametre
|
|
case '\u2207': return "∇";// nabla
|
|
case '\u2208': return "∈";// appartient
|
|
case '\u2209': return "∉";// n'appartient pas
|
|
case '\u220B': return "∋";// contient comme element (Est-ce qu'il ne pourrait pas y avoir un nom plus parlant que 'ni' ?)
|
|
case '\u220F': return "∏";// produit de la famille = signe produit (prod N'EST PAS le meme caractere que U+03A0 'lettre capitale grecque pi' meme si le meme glyphe peut s'utiliser pour les deux)
|
|
case '\u2211': return "∑";// sommation de la famille (sum N'EST PAS le meme caractere que U+03A3 'ettre capitale grecque sigma' meme si le meme glyphe peut s'utiliser pour les deux)
|
|
case '\u2212': return "−";// signe moins
|
|
case '\u2217': return "∗";// operateur asterisque
|
|
case '\u221A': return "√";// racine carree = signe radical
|
|
case '\u221D': return "∝";// proportionnel
|
|
case '\u221E': return "∞";// infini
|
|
case '\u2220': return "∠";// angle
|
|
case '\u2227': return "∧";// ET logique
|
|
case '\u2228': return "∨";// OU logique
|
|
case '\u2229': return "∩";// intersection = cap
|
|
case '\u222A': return "∪";// union = cup
|
|
case '\u222B': return "∫";// integrale
|
|
case '\u2234': return "∴";// par consequent
|
|
case '\u223C': return "∼";// operateur tilde = varie avec = similaire (L'operateur tilde N'EST PAS le meme caractere que le tilde U+007E, meme si le meme glyphe peut s'utiliser pour les deux)
|
|
case '\u2245': return "≅";// approximativement egal
|
|
case '\u2248': return "≈";// presque egal = asymptotique
|
|
case '\u2260': return "≠";// pas egal
|
|
case '\u2261': return "≡";// identique
|
|
//case '\u2261': return "≡";// identique
|
|
case '\u2264': return "≤";// plus petit ou egal
|
|
case '\u2265': return "≥";// plus grand ou egal
|
|
case '\u2282': return "⊂";// sous-ensemble de
|
|
case '\u2283': return "⊃";// sur-ensemble de (Remarquez que nsup 'pas un sur-ensemble de' 2285, n'est pas couvert par le codage de la police Symbol. Devrait-il l'etre par symetrie ? Il est dans)
|
|
case '\u2284': return "⊄";// pas un sous-ensemble de
|
|
case '\u2286': return "⊆";// sous-ensemble ou egal
|
|
case '\u2287': return "⊇";// sur-ensemble de ou egal
|
|
case '\u2295': return "⊕";// plus cercle = somme directe
|
|
case '\u2297': return "⊗";// multiplie par cercle = produit vectoriel
|
|
case '\u22A5': return "⊥";// taquet vers le haut = orthogonal = perpendiculaire
|
|
case '\u22C5': return "⋅";// operateur point (L'operateur point N'EST PAS le meme caractere que le 'point median', U+00B7)
|
|
case '\u2308': return "⌈";// plafond gauche = anglet gauche
|
|
case '\u2309': return "⌉";// plafond droite
|
|
case '\u230A': return "⌊";// plancher gauche
|
|
case '\u230B': return "⌋";// plancher droite
|
|
case '\u2329': return "⟨";// chevron vers la gauche (lang N'EST PAS le meme caractere que U+003C 'inferieur' ou U+2039 'guillemet simple vers la gauche')
|
|
case '\u232A': return "⟩";// chevron vers la droite (rang iN'EST PAS le meme caractere que U+003E 'superieur' ou U+203A 'guillemet simple vers la droite')
|
|
case '\u25CA': return "◊";// losange
|
|
case '\u2660': return "♠";// pique noir (Noir semble dire ici rempli par opposition ajoure)
|
|
case '\u2663': return "♣";// trefle noir
|
|
case '\u2665': return "♥";// coeur noir
|
|
case '\u2666': return "♦";// carreau noir
|
|
// truc pas prevus
|
|
case '\u2102': return "ℂ";// ensemble C des complexes
|
|
case '\u2115': return "ℕ";// ensemble N des entiers
|
|
case '\u211A': return "ℚ";// ensemble Q des rationnels
|
|
case '\u211D': return "ℝ";// ensemble R des reels
|
|
case '\u2124': return "ℤ";// ensemble R des entiers relatifs
|
|
case '\u2223': return "∣";// divise
|
|
case '\u2224': return "∤";// ne divise pas
|
|
case '\u2243': return "≃";// asymptotiquement egal
|
|
case '\u2244': return "≄";// asymptotiquement egal
|
|
case '\u2225': return "∥";// parallele
|
|
case '\u00B1': return "±";// plus ou moins
|
|
case '\u2213': return "∓"; // moins ou plus (different de plus ou moins)
|
|
case '\u2494': return "⩽"; // inferieur ou egal incline
|
|
case '\u2270': return "≰"; //non inferieur ou egal incline
|
|
case '\u00AC': return "¬";// signe not
|
|
case '\u00B0': return "ˆ";// petit cercle, operateur concatenation, normalement ° mais on va le considere comme circ
|
|
case '\u224A': return "≊";// approxivativement egal
|
|
case '\u002B': return "+"; // signe plus
|
|
case '\u00D7': return "×"; // signe multiplication (croix)
|
|
case '\u003D': return "="; // signe egal
|
|
case '\u226E': return "≮"; // non inferieur
|
|
case '\u2A7D': return "⩽"; // inferieur incline = leqslant
|
|
case '\u220A': return "∈";// appartient
|
|
case '\u2216': return "∖";// difference d'ensemble
|
|
case '\u2288': return "⊈";// ni un sous-ensemble ni egal
|
|
case '\u2289': return "⊉";// ni un surensemble ni egal
|
|
case '\u2285': return "⊅";// non un surensemble de
|
|
case '\u301A': return "⟦";// crochet gauche avec barre
|
|
case '\u301B': return "⟧";// crochet droit avec barre
|
|
case '\u2210': return "∐";// coproduit (Pi l'envers)
|
|
case '\u222C': return "∬";// integrale double
|
|
case '\u222D': return "∭";// integrale triple
|
|
case '\u222E': return "∮";// integrale de contour
|
|
case '\u222F': return "∯";// integrale de surface
|
|
case '\u2230': return "∰";// integrale de volume
|
|
case '\u210F': return "ℏ";// const de Planck sur 2Pi
|
|
case '\u2253': return "&;";// BUG points suspensions diagonale descendant droite
|
|
case '\u22EE': return "⋮";// points suspensions verticaux
|
|
case '\u22EF': return "⋯";// points suspensions horizontaux medians
|
|
case '\u22F0': return "⋰";// points suspensions diagonale montant droite
|
|
case '\u22F1': return "⋱";// points suspensions diagonale descendant droite
|
|
case '\u02DA': return "˚"; //rond en chef
|
|
case '\u00A8': return "¨"; // double point en chef(trema)
|
|
case '\u02D9': return "˙"; // point en chef
|
|
case '\u2015': return "―"; // barre horizonthale
|
|
case '\u00AF': return "¯"; // barre horizonthale en chef
|
|
case '\u0332': return "_"; // souligne
|
|
case '\u2222': return "∢"; // angle spherique
|
|
case '\u03F1': return "ϱ"; // symbole grec rho final
|
|
case '\u226B': return "≫"; // tres superieur
|
|
case '\u226A': return "≪"; // tres inferieur
|
|
default: return null;
|
|
}
|
|
}
|
|
|
|
private boolean containsMath(Element node) {
|
|
// First check the node itself
|
|
if (node.getTagName().equals("math")) {
|
|
return true;
|
|
}
|
|
// The check the children
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE) {
|
|
if (containsMath((Element)child)) {
|
|
return true;
|
|
}
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
// And then look no further
|
|
return false;
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|