
git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@70 f0f2a975-2e09-46c8-9428-3b39399b9f3c
916 lines
37 KiB
Java
916 lines
37 KiB
Java
/************************************************************************
|
|
*
|
|
* XhtmlDocument.java
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License version 2.1, as published by the Free Software Foundation.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
* MA 02111-1307 USA
|
|
*
|
|
* Copyright: 2002-2010 by Henrik Just
|
|
*
|
|
* All Rights Reserved.
|
|
*
|
|
* Version 1.2 (2010-06-19)
|
|
*
|
|
*/
|
|
|
|
//TODO: Add named entities outside ISO-latin 1
|
|
|
|
package writer2latex.xhtml;
|
|
|
|
import org.w3c.dom.NodeList;
|
|
import org.w3c.dom.Element;
|
|
import org.w3c.dom.Node;
|
|
//import org.w3c.dom.Element;
|
|
import org.w3c.dom.NamedNodeMap;
|
|
import org.w3c.dom.Document;
|
|
import org.w3c.dom.DocumentType;
|
|
import org.w3c.dom.DOMImplementation;
|
|
//import org.xml.sax.SAXException;
|
|
//import org.xml.sax.SAXParseException;
|
|
import javax.xml.parsers.DocumentBuilderFactory;
|
|
import javax.xml.parsers.DocumentBuilder;
|
|
//import javax.xml.parsers.ParserConfigurationException;
|
|
|
|
import writer2latex.api.MIMETypes;
|
|
import writer2latex.xmerge.DOMDocument;
|
|
|
|
import java.io.InputStream;
|
|
import java.io.OutputStream;
|
|
import java.io.OutputStreamWriter;
|
|
import java.io.IOException;
|
|
import java.util.HashSet;
|
|
import java.util.Set;
|
|
|
|
/**
|
|
* An implementation of <code>Document</code> for
|
|
* XHTML documents.
|
|
*/
|
|
public class XhtmlDocument extends DOMDocument {
|
|
|
|
/** Constant to identify XHTML 1.0 strict documents */
|
|
public static final int XHTML10 = 0;
|
|
|
|
/** Constant to identify XHTML 1.1 documents */
|
|
public static final int XHTML11 = 1;
|
|
|
|
/** Constant to identify XHTML + MathML documents */
|
|
public static final int XHTML_MATHML = 2;
|
|
|
|
/** Constant to identify XHTML + MathML documents using the XSL transformations
|
|
* from w3c's math working group (http://www.w3.org/Math/XSL/)
|
|
*/
|
|
public static final int XHTML_MATHML_XSL = 3;
|
|
|
|
// Some static data
|
|
private static final String[] sExtension = { ".html", ".xhtml", ".xhtml", ".xml" };
|
|
|
|
private static Set<String> blockPrettyPrint;
|
|
private static Set<String> conditionalBlockPrettyPrint;
|
|
private static Set<String> emptyElements;
|
|
private static String[] entities; // Not convenient to define directly due to a lot of null values
|
|
|
|
// Type of document
|
|
private int nType;
|
|
|
|
// Configuration
|
|
private String sEncoding = "UTF-8";
|
|
private boolean bUseNamedEntities = false;
|
|
private boolean bHexadecimalEntities = true;
|
|
private char cLimit = 65535;
|
|
private boolean bNoDoctype = false;
|
|
private boolean bAddBOM = false;
|
|
private boolean bPrettyPrint = true;
|
|
private String sXsltPath = "";
|
|
private String sContentId = "content";
|
|
private String sHeaderId = "header";
|
|
private String sFooterId = "footer";
|
|
private String sPanelId = "panel";
|
|
|
|
// Content
|
|
private Element headNode = null;
|
|
private Element bodyNode = null;
|
|
private Element titleNode = null;
|
|
private Element contentNode = null;
|
|
private Element panelNode = null;
|
|
private Element headerNode = null;
|
|
private Element footerNode = null;
|
|
|
|
// Initialize static data
|
|
static {
|
|
// Paragraphs and headings always block pretty printing
|
|
blockPrettyPrint = new HashSet<String>();
|
|
blockPrettyPrint.add("p");
|
|
blockPrettyPrint.add("h1");
|
|
blockPrettyPrint.add("h2");
|
|
blockPrettyPrint.add("h3");
|
|
blockPrettyPrint.add("h4");
|
|
blockPrettyPrint.add("h5");
|
|
blockPrettyPrint.add("h6");
|
|
|
|
// List items and table cells may block pretty printing, depending on the context
|
|
conditionalBlockPrettyPrint = new HashSet<String>();
|
|
conditionalBlockPrettyPrint.add("li");
|
|
conditionalBlockPrettyPrint.add("th");
|
|
conditionalBlockPrettyPrint.add("td");
|
|
|
|
// These elements are empty
|
|
emptyElements = new HashSet<String>();
|
|
emptyElements.add("base");
|
|
emptyElements.add("meta");
|
|
emptyElements.add("link");
|
|
emptyElements.add("hr");
|
|
emptyElements.add("br");
|
|
emptyElements.add("param");
|
|
emptyElements.add("img");
|
|
emptyElements.add("area");
|
|
emptyElements.add("input");
|
|
emptyElements.add("col");
|
|
|
|
// Named character entities (currently only those within the ISO latin 1 range)
|
|
entities = new String[256];
|
|
// Latin 1 symbols
|
|
entities[160]=" ";
|
|
entities[161]="¡";
|
|
entities[162]="¢";
|
|
entities[163]="£";
|
|
entities[164]="¤";
|
|
entities[165]="¥";
|
|
entities[166]="¦";
|
|
entities[167]="§";
|
|
entities[168]="¨";
|
|
entities[169]="©";
|
|
entities[170]="ª";
|
|
entities[171]="«";
|
|
entities[172]="¬";
|
|
entities[173]="­";
|
|
entities[174]="®";
|
|
entities[175]="¯";
|
|
entities[176]="°";
|
|
entities[177]="±";
|
|
entities[178]="²";
|
|
entities[179]="³";
|
|
entities[180]="´";
|
|
entities[181]="µ";
|
|
entities[182]="¶";
|
|
entities[183]="·";
|
|
entities[184]="¸";
|
|
entities[185]="¹";
|
|
entities[186]="º";
|
|
entities[187]="»";
|
|
entities[188]="¼";
|
|
entities[189]="½";
|
|
entities[190]="¾";
|
|
entities[191]="¿";
|
|
entities[215]="×";
|
|
entities[247]="÷";
|
|
// Latin 1 characters
|
|
entities[192]="À";
|
|
entities[193]="Á";
|
|
entities[194]="Â";
|
|
entities[195]="Ã";
|
|
entities[196]="Ä";
|
|
entities[197]="Å";
|
|
entities[198]="Æ";
|
|
entities[199]="Ç";
|
|
entities[200]="È";
|
|
entities[201]="É";
|
|
entities[202]="Ê";
|
|
entities[203]="Ë";
|
|
entities[204]="Ì";
|
|
entities[205]="Í";
|
|
entities[206]="Î";
|
|
entities[207]="Ï";
|
|
entities[208]="Ð";
|
|
entities[209]="Ñ";
|
|
entities[210]="Ò";
|
|
entities[211]="Ó";
|
|
entities[212]="Ô";
|
|
entities[213]="Õ";
|
|
entities[214]="Ö";
|
|
entities[216]="Ø";
|
|
entities[217]="Ù";
|
|
entities[218]="Ú";
|
|
entities[219]="Û";
|
|
entities[220]="Ü";
|
|
entities[221]="Ý";
|
|
entities[222]="Þ";
|
|
entities[223]="ß";
|
|
entities[224]="à";
|
|
entities[225]="á";
|
|
entities[226]="â";
|
|
entities[227]="ã";
|
|
entities[228]="ä";
|
|
entities[229]="å";
|
|
entities[230]="æ";
|
|
entities[231]="ç";
|
|
entities[232]="è";
|
|
entities[233]="é";
|
|
entities[234]="ê";
|
|
entities[235]="ë";
|
|
entities[236]="ì";
|
|
entities[237]="í";
|
|
entities[238]="î";
|
|
entities[239]="ï";
|
|
entities[240]="ð";
|
|
entities[241]="ñ";
|
|
entities[242]="ò";
|
|
entities[243]="ó";
|
|
entities[244]="ô";
|
|
entities[245]="õ";
|
|
entities[246]="ö";
|
|
entities[248]="ø";
|
|
entities[249]="ù";
|
|
entities[250]="ú";
|
|
entities[251]="û";
|
|
entities[252]="ü";
|
|
entities[253]="ý";
|
|
entities[254]="þ";
|
|
entities[255]="ÿ";
|
|
}
|
|
|
|
public static final String getExtension(int nType) {
|
|
return sExtension[nType];
|
|
}
|
|
|
|
/**
|
|
* Constructor. This constructor also creates the DOM (minimal: root, head,
|
|
* title and body node only) - unlike the constructors in
|
|
* writer2latex.xmerge.DOMDocument.
|
|
* @param name <code>Document</code> name.
|
|
* @param nType the type of document
|
|
*/
|
|
public XhtmlDocument(String name, int nType) {
|
|
super(name,sExtension[nType]);
|
|
this.nType = nType;
|
|
|
|
|
|
// create DOM
|
|
Document contentDOM = null;
|
|
try {
|
|
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
|
|
DocumentBuilder builder = builderFactory.newDocumentBuilder();
|
|
DOMImplementation domImpl = builder.getDOMImplementation();
|
|
String[] sDocType = getDoctypeStrings();
|
|
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
|
|
contentDOM = domImpl.createDocument("http://www.w3.org/1999/xhtml","html",doctype);
|
|
}
|
|
catch (Throwable t) {
|
|
t.printStackTrace();
|
|
}
|
|
contentDOM.getDocumentElement().setAttribute("xmlns","http://www.w3.org/1999/xhtml");
|
|
// add head, title and body
|
|
headNode = contentDOM.createElement("head");
|
|
titleNode = contentDOM.createElement("title");
|
|
bodyNode = contentDOM.createElement("body");
|
|
contentDOM.getDocumentElement().appendChild(headNode);
|
|
headNode.appendChild(titleNode);
|
|
contentDOM.getDocumentElement().appendChild(bodyNode);
|
|
contentNode = bodyNode;
|
|
setContentDOM(contentDOM);
|
|
|
|
}
|
|
|
|
@Override public String getMIMEType() {
|
|
switch (nType) {
|
|
case XHTML10: return MIMETypes.XHTML;
|
|
case XHTML11: return MIMETypes.XHTML_MATHML; // TODO: Change the constant names in MIMETypes, this is a bit confusing...
|
|
case XHTML_MATHML: return MIMETypes.XHTML_MATHML;
|
|
case XHTML_MATHML_XSL: return MIMETypes.XHTML_MATHML_XSL;
|
|
}
|
|
return "";
|
|
}
|
|
|
|
@Override public boolean isMasterDocument() {
|
|
return true;
|
|
}
|
|
|
|
public Element getHeadNode() { return headNode; }
|
|
|
|
public Element getBodyNode() { return bodyNode; }
|
|
|
|
public Element getTitleNode() { return titleNode; }
|
|
|
|
public Element getContentNode() { return contentNode; }
|
|
|
|
public void setContentNode(Element contentNode) { this.contentNode = contentNode; }
|
|
|
|
public Element getPanelNode() { return panelNode; }
|
|
|
|
public Element getHeaderNode() { return headerNode; }
|
|
|
|
public Element getFooterNode() { return footerNode; }
|
|
|
|
public void createHeaderFooter() {
|
|
headerNode = getContentDOM().createElement("div");
|
|
headerNode.setAttribute("id",sHeaderId);
|
|
bodyNode.appendChild(headerNode);
|
|
contentNode = getContentDOM().createElement("div");
|
|
contentNode.setAttribute("id",sContentId);
|
|
bodyNode.appendChild(contentNode);
|
|
footerNode = getContentDOM().createElement("div");
|
|
footerNode.setAttribute("id",sFooterId);
|
|
bodyNode.appendChild(footerNode);
|
|
}
|
|
|
|
public void setContentDOM(Document doc) {
|
|
super.setContentDOM(doc);
|
|
collectNodes();
|
|
}
|
|
|
|
public void read(InputStream is) throws IOException {
|
|
super.read(is);
|
|
collectNodes();
|
|
}
|
|
|
|
public void readFromTemplate(XhtmlDocument template) {
|
|
// create a new DOM
|
|
Document templateDOM = template.getContentDOM();
|
|
Document newDOM = null;
|
|
try {
|
|
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
|
|
DocumentBuilder builder = builderFactory.newDocumentBuilder();
|
|
DOMImplementation domImpl = builder.getDOMImplementation();
|
|
String[] sDocType = getDoctypeStrings();
|
|
DocumentType doctype = domImpl.createDocumentType("html", sDocType[0], sDocType[1]);
|
|
newDOM = domImpl.createDocument("http://www.w3.org/1999/xhtml",
|
|
templateDOM.getDocumentElement().getTagName(),doctype);
|
|
setContentDOM(newDOM);
|
|
|
|
// Import attributes on root element
|
|
Element templateRoot = templateDOM.getDocumentElement();
|
|
Element newRoot = newDOM.getDocumentElement();
|
|
NamedNodeMap attributes = templateRoot.getAttributes();
|
|
int nCount = attributes.getLength();
|
|
for (int i=0; i<nCount; i++) {
|
|
Node attrNode = attributes.item(i);
|
|
newRoot.setAttribute(attrNode.getNodeName(), attrNode.getNodeValue());
|
|
}
|
|
|
|
// Import all child nodes from template
|
|
NodeList children = templateRoot.getChildNodes();
|
|
int nLen = children.getLength();
|
|
for (int i=0; i<nLen; i++) {
|
|
newRoot.appendChild(getContentDOM().importNode(children.item(i),true));
|
|
}
|
|
|
|
// get the entry point nodes
|
|
collectNodes();
|
|
}
|
|
catch (Throwable t) {
|
|
t.printStackTrace();
|
|
}
|
|
}
|
|
|
|
private String[] getDoctypeStrings() {
|
|
// Define publicId and systemId
|
|
String sPublicId = null;
|
|
String sSystemId = null;
|
|
switch (nType) {
|
|
case XHTML10 :
|
|
sPublicId = "-//W3C//DTD XHTML 1.0 Strict//EN";
|
|
sSystemId = "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd";
|
|
break;
|
|
case XHTML11 :
|
|
sPublicId = "-//W3C//DTD XHTML 1.1//EN";
|
|
sSystemId = "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd";
|
|
break;
|
|
case XHTML_MATHML :
|
|
case XHTML_MATHML_XSL :
|
|
sPublicId = "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN";
|
|
sSystemId = "http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd";
|
|
//sSystemId = "http://www.w3.org/TR/MathML2/dtd/xhtml-math11-f.dtd"; (old version)
|
|
/* An alternative is to use XHTML + MathML + SVG:
|
|
sPublicId = "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN",
|
|
sSystemId = "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd"); */
|
|
}
|
|
return new String[] { sPublicId, sSystemId };
|
|
}
|
|
|
|
private void collectNodes(Element elm) {
|
|
String sTagName = elm.getTagName();
|
|
if ("head".equals(sTagName)) {
|
|
headNode = elm;
|
|
}
|
|
else if ("body".equals(sTagName)) {
|
|
bodyNode = elm;
|
|
}
|
|
else if ("title".equals(sTagName)) {
|
|
titleNode = elm;
|
|
}
|
|
else if ("div".equals(sTagName)) {
|
|
String sId = elm.getAttribute("id");
|
|
if (sContentId.equals(sId)) { contentNode = elm; }
|
|
else if (sHeaderId.equals(sId)) { headerNode = elm; }
|
|
else if (sFooterId.equals(sId)) { footerNode = elm; }
|
|
else if (sPanelId.equals(sId)) { panelNode = elm; }
|
|
}
|
|
|
|
Node child = elm.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE) {
|
|
collectNodes((Element)child);
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
}
|
|
|
|
private void collectNodes() {
|
|
headNode = null;
|
|
bodyNode = null;
|
|
titleNode = null;
|
|
contentNode = null;
|
|
headerNode = null;
|
|
footerNode = null;
|
|
panelNode = null;
|
|
|
|
Element elm = getContentDOM().getDocumentElement();
|
|
collectNodes(elm);
|
|
if (contentNode==null) { contentNode = bodyNode!=null ? bodyNode : elm; }
|
|
if (headNode!=null && titleNode==null) {
|
|
titleNode = getContentDOM().createElement("title");
|
|
headNode.appendChild(titleNode);
|
|
}
|
|
}
|
|
|
|
public void setConfig(XhtmlConfig config) {
|
|
sEncoding = config.xhtmlEncoding().toUpperCase();
|
|
if ("UTF-16".equals(sEncoding)) {
|
|
cLimit = 65535;
|
|
}
|
|
else if ("ISO-8859-1".equals(sEncoding)) {
|
|
cLimit = 255;
|
|
}
|
|
else if ("US-ASCII".equals(sEncoding)) {
|
|
cLimit = 127;
|
|
}
|
|
else {
|
|
sEncoding = "UTF-8";
|
|
cLimit = 65535;
|
|
}
|
|
|
|
bAddBOM = config.xhtmlAddBOM() && sEncoding.equals("UTF-8");
|
|
bNoDoctype = config.xhtmlNoDoctype();
|
|
bPrettyPrint = config.prettyPrint();
|
|
bUseNamedEntities = config.useNamedEntities();
|
|
bHexadecimalEntities = config.hexadecimalEntities();
|
|
sXsltPath = config.getXsltPath();
|
|
|
|
String[] sTemplateIds = config.templateIds().split(",");
|
|
int nIdCount = sTemplateIds.length;
|
|
if (nIdCount>0 && sTemplateIds[0].trim().length()>0) sContentId = sTemplateIds[0].trim(); else sContentId = "content";
|
|
if (nIdCount>1) sHeaderId = sTemplateIds[1].trim(); else sHeaderId = "header";
|
|
if (nIdCount>2) sFooterId = sTemplateIds[2].trim(); else sFooterId = "footer";
|
|
if (nIdCount>3) sPanelId = sTemplateIds[3].trim(); else sPanelId = "panel";
|
|
}
|
|
|
|
public String getEncoding() { return sEncoding; }
|
|
|
|
public String getFileExtension() { return super.getFileExtension(); }
|
|
|
|
// Optimize the usage of xml:dir and xml:lang attributes
|
|
private void optimize(Element node, String sLang, String sDir) {
|
|
if (node.hasAttribute("xml:lang")) {
|
|
if (node.getAttribute("xml:lang").equals(sLang)) {
|
|
node.removeAttribute("xml:lang");
|
|
if (node.hasAttribute("lang")) {
|
|
node.removeAttribute("lang");
|
|
}
|
|
}
|
|
else {
|
|
sLang = node.getAttribute("xml:lang");
|
|
}
|
|
}
|
|
if (node.hasAttribute("xml:dir")) {
|
|
if (node.getAttribute("xml:dir").equals(sDir)) {
|
|
node.removeAttribute("xml:dir");
|
|
}
|
|
else {
|
|
sDir = node.getAttribute("xml:dir");
|
|
}
|
|
}
|
|
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE) {
|
|
optimize((Element)child, sLang, sDir);
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Write out content to the supplied <code>OutputStream</code>.
|
|
* (with pretty printing)
|
|
* @param os XML <code>OutputStream</code>.
|
|
* @throws IOException If any I/O error occurs.
|
|
*/
|
|
public void write(OutputStream os) throws IOException {
|
|
OutputStreamWriter osw = new OutputStreamWriter(os,sEncoding);
|
|
// Add a BOM if the user desires so
|
|
if (bAddBOM) { osw.write("\uFEFF"); }
|
|
|
|
// Omit xml prolog for pure xhtml 1.0 strict documents (to be browser safe)
|
|
if (nType!=XHTML10) {
|
|
osw.write("<?xml version=\"1.0\" encoding=\""+sEncoding+"\" ?>\n");
|
|
}
|
|
// Either specify doctype or xsl transformation (the user may require
|
|
// that no doctype is used; this may be desirable for further transformations)
|
|
if (nType==XHTML_MATHML_XSL) {
|
|
// Original url: http://www.w3.org/Math/XSL/pmathml.xsl
|
|
// Add trailing slash if needed
|
|
String sSlash = sXsltPath.length()>0 && !sXsltPath.endsWith("/") ? "/" : "";
|
|
osw.write("<?xml-stylesheet type=\"text/xsl\" href=\""+sXsltPath+sSlash+"pmathml.xsl\"?>\n");
|
|
}
|
|
else if (!bNoDoctype) {
|
|
DocumentType docType = getContentDOM().getDoctype();
|
|
if (docType!=null) {
|
|
osw.write("<!DOCTYPE html PUBLIC \"");
|
|
osw.write(docType.getPublicId());
|
|
osw.write("\" \"");
|
|
osw.write(docType.getSystemId());
|
|
osw.write("\">\n");
|
|
}
|
|
}
|
|
Element doc = getContentDOM().getDocumentElement();
|
|
optimize(doc,null,null);
|
|
write(doc,bPrettyPrint ? 0 : -1,osw);
|
|
osw.flush();
|
|
osw.close();
|
|
}
|
|
|
|
private static boolean blockThis(Element node) {
|
|
String sTagName = node.getTagName();
|
|
if (blockPrettyPrint.contains(sTagName)) {
|
|
return true;
|
|
}
|
|
else if (conditionalBlockPrettyPrint.contains(sTagName)) {
|
|
// Block pretty printing if the content is anything but elements that block pretty print
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.ELEMENT_NODE && !blockPrettyPrint.contains(child.getNodeName())) {
|
|
return true;
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
return false;
|
|
}
|
|
else {
|
|
// Other elements block pretty printing if they contain text nodes
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
if (child.getNodeType()==Node.TEXT_NODE) {
|
|
return true;
|
|
}
|
|
child = child.getNextSibling();
|
|
}
|
|
return false;
|
|
}
|
|
}
|
|
|
|
private static boolean isEmpty(String sTagName) {
|
|
return emptyElements.contains(sTagName);
|
|
}
|
|
|
|
// Write nodes; we only need element, text and comment nodes
|
|
private void write(Node node, int nLevel, OutputStreamWriter osw) throws IOException {
|
|
short nType = node.getNodeType();
|
|
switch (nType) {
|
|
case Node.ELEMENT_NODE:
|
|
if (isEmpty(node.getNodeName())) {
|
|
// This node must be empty, we ignore child nodes
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<"+node.getNodeName());
|
|
writeAttributes(node,osw);
|
|
osw.write(" />");
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
else if (node.hasChildNodes()) {
|
|
int nNextLevel = (nLevel<0 || blockThis((Element)node)) ? -1 : nLevel+1;
|
|
// Print start tag
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<"+node.getNodeName());
|
|
writeAttributes(node,osw);
|
|
osw.write(">");
|
|
if (nNextLevel>=0) { osw.write("\n"); }
|
|
// Print children
|
|
Node child = node.getFirstChild();
|
|
while (child!=null) {
|
|
write(child,nNextLevel,osw);
|
|
child = child.getNextSibling();
|
|
}
|
|
// Print end tag
|
|
if (nNextLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("</"+node.getNodeName()+">");
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
else { // empty element
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<"+node.getNodeName());
|
|
writeAttributes(node,osw);
|
|
// HTML compatibility: use end-tag even if empty
|
|
if (nType<=XHTML11) {
|
|
osw.write("></"+node.getNodeName()+">");
|
|
}
|
|
else {
|
|
osw.write(" />");
|
|
}
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
break;
|
|
case Node.TEXT_NODE:
|
|
write(node.getNodeValue(),osw);
|
|
break;
|
|
case Node.COMMENT_NODE:
|
|
if (nLevel>=0) { writeSpaces(nLevel,osw); }
|
|
osw.write("<!-- ");
|
|
write(node.getNodeValue(),osw);
|
|
osw.write(" -->");
|
|
if (nLevel>=0) { osw.write("\n"); }
|
|
}
|
|
}
|
|
|
|
private void writeAttributes(Node node, OutputStreamWriter osw) throws IOException {
|
|
NamedNodeMap attr = node.getAttributes();
|
|
int nLen = attr.getLength();
|
|
for (int i=0; i<nLen; i++) {
|
|
Node item = attr.item(i);
|
|
osw.write(" ");
|
|
write(item.getNodeName(),osw);
|
|
osw.write("=\"");
|
|
writeAttribute(item.getNodeValue(),osw);
|
|
osw.write("\"");
|
|
}
|
|
}
|
|
|
|
private void writeSpaces(int nCount, OutputStreamWriter osw) throws IOException {
|
|
for (int i=0; i<nCount; i++) { osw.write(" "); }
|
|
}
|
|
|
|
private void write(String s, OutputStreamWriter osw) throws IOException {
|
|
// Allow null strings, though this means there is a bug somewhere...
|
|
if (s==null) { osw.write("null"); return; }
|
|
int nLen = s.length();
|
|
char c;
|
|
for (int i=0; i<nLen; i++) {
|
|
c = s.charAt(i);
|
|
switch (c) {
|
|
case ('<'): osw.write("<"); break;
|
|
case ('>'): osw.write(">"); break;
|
|
case ('&'): osw.write("&"); break;
|
|
default:
|
|
write(c,osw);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void writeAttribute(String s, OutputStreamWriter osw) throws IOException {
|
|
int nLen = s.length();
|
|
char c;
|
|
for (int i=0; i<nLen; i++) {
|
|
c = s.charAt(i);
|
|
switch (c) {
|
|
case ('<'): osw.write("<"); break;
|
|
case ('>'): osw.write(">"); break;
|
|
case ('&'): osw.write("&"); break;
|
|
case ('"'): osw.write("""); break;
|
|
case ('\''): osw.write( nType == XHTML10 ? "'" : "'"); break;
|
|
default:
|
|
write(c,osw);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void write(char c, OutputStreamWriter osw) throws IOException {
|
|
if (bUseNamedEntities) {
|
|
if (c<256 && entities[c]!=null) {
|
|
// XHTML has a named entity here
|
|
osw.write(entities[c]);
|
|
return;
|
|
}
|
|
String s=getMathMLEntity(c);
|
|
if (s!=null && (nType==XHTML_MATHML || nType==XHTML_MATHML_XSL)) {
|
|
// There's a MathML entity to use
|
|
osw.write(s);
|
|
return;
|
|
}
|
|
}
|
|
if (c>cLimit) {
|
|
if (bHexadecimalEntities) {
|
|
osw.write("&#x"+Integer.toHexString(c).toUpperCase()+";");
|
|
}
|
|
else {
|
|
osw.write("&#"+Integer.toString(c).toUpperCase()+";");
|
|
}
|
|
}
|
|
else {
|
|
osw.write(c);
|
|
}
|
|
}
|
|
|
|
|
|
// Translate character to MathML entity (contributed by Bruno Mascret)
|
|
private String getMathMLEntity(char c) {
|
|
switch (c) {
|
|
case '\u0192': return "ƒ";// lettre minuscule latine f hameon
|
|
case '\u0391': return "Α";// lettre majuscule grecque alpha
|
|
case '\u0392': return "Β";// lettre majuscule grecque beta
|
|
case '\u0393': return "Γ";// lettre majuscule grecque gamma
|
|
case '\u0394': return "Δ";// lettre majuscule grecque delta
|
|
case '\u0395': return "Ε";// lettre majuscule grecque epsilon
|
|
case '\u0396': return "Ζ";// lettre majuscule grecque zeta
|
|
case '\u0397': return "Η";// lettre majuscule grecque eta
|
|
case '\u0398': return "Θ";// lettre majuscule grecque theta
|
|
case '\u0399': return "Ι";// lettre majuscule grecque iota
|
|
case '\u039A': return "Κ";// lettre majuscule grecque kappa
|
|
case '\u039B': return "Λ";// lettre majuscule grecque lambda
|
|
case '\u039C': return "Μ";// lettre majuscule grecque mu
|
|
case '\u039D': return "Ν";// lettre majuscule grecque nu
|
|
case '\u039E': return "Ξ";// lettre majuscule grecque xi
|
|
case '\u039F': return "Ο";// lettre majuscule grecque omicron
|
|
case '\u03A0': return "Π";// lettre majuscule grecque pi
|
|
case '\u03A1': return "Ρ";// lettre majuscule grecque rho
|
|
case '\u03A3': return "Σ";// lettre majuscule grecque sigma (Il n'y pas de caractere Sigmaf ni U+03A2 non plus)
|
|
case '\u03A4': return "Τ";// lettre majuscule grecque tau
|
|
case '\u03A5': return "Υ";// lettre majuscule grecque upsilon
|
|
case '\u03A6': return "Φ";// lettre majuscule grecque phi
|
|
case '\u03A7': return "Χ";// lettre majuscule grecque chi
|
|
case '\u03A8': return "Ψ";// lettre majuscule grecque psi
|
|
case '\u03A9': return "Ω";// lettre majuscule grecque omega
|
|
case '\u03B1': return "α";// lettre minuscule grecque alpha
|
|
case '\u03B2': return "β";// lettre minuscule grecque beta
|
|
case '\u03B3': return "γ";// lettre minuscule grecque gamma
|
|
case '\u03B4': return "δ";// lettre minuscule grecque delta
|
|
//case '\u03B4': return "δ";// lettre minuscule grecque delta
|
|
case '\u03B5': return "ε";// lettre minuscule grecque epsilon
|
|
case '\u03B6': return "ζ";// lettre minuscule grecque zeta
|
|
case '\u03B7': return "η";// lettre minuscule grecque eta
|
|
case '\u03B8': return "θ";// lettre minuscule grecque theta
|
|
case '\u03B9': return "ι";// lettre minuscule grecque iota
|
|
case '\u03BA': return "κ";// lettre minuscule grecque kappa
|
|
case '\u03BB': return "λ";// lettre minuscule grecque lambda
|
|
case '\u03BC': return "μ";// lettre minuscule grecque mu
|
|
case '\u03BD': return "ν";// lettre minuscule grecque nu
|
|
case '\u03BE': return "ξ";// lettre minuscule grecque xi
|
|
case '\u03BF': return "ο";// lettre minuscule grecque omicron
|
|
case '\u03C0': return "π";// lettre minuscule grecque pi
|
|
case '\u03C1': return "ρ";// lettre minuscule grecque rho
|
|
case '\u03C2': return "ς";// lettre minuscule grecque final sigma
|
|
case '\u03C3': return "σ";// lettre minuscule grecque sigma
|
|
case '\u03C4': return "τ";// lettre minuscule grecque tau
|
|
case '\u03C5': return "υ";// lettre minuscule grecque upsilon
|
|
case '\u03C6': return "φ";// lettre minuscule grecque phi
|
|
case '\u03C7': return "χ";// lettre minuscule grecque chi
|
|
case '\u03C8': return "ψ";// lettre minuscule grecque psi
|
|
case '\u03C9': return "ω";// lettre minuscule grecque omega
|
|
case '\u03D1': return "ϑ";// lettre minuscule grecque theta symbol
|
|
case '\u03D2': return "ϒ";// symbole grec upsilon crochet
|
|
case '\u03D6': return "ϖ";// symbole grec pi
|
|
case '\u2022': return "•";// puce (Ce N'EST PAS la meme chose que l'operateur puce, U+2219)
|
|
case '\u2026': return "…";// points de suspension
|
|
case '\u2032': return "′";// prime
|
|
case '\u2033': return "″";// double prime
|
|
case '\u203E': return "‾";// tiret en chef
|
|
case '\u2044': return "⁄";// barre de fraction
|
|
case '\u2118': return "℘";// fonction elliptique de Weierstrass
|
|
case '\u2111': return "ℑ";// majuscule I gothique = partie imaginaire
|
|
case '\u211C': return "ℜ";// majuscule R gothique = partie reelle
|
|
case '\u2122': return "™";// symbole marque commerciale
|
|
case '\u2135': return "ℵ";// symbole alef = premier nombre transfini (Le symbole alef N'EST PAS pareil a la lettre hebreue alef, U+05D0 meme si on pourrait utiliser le meme glyphe pour representer les deux caracteres)
|
|
case '\u2190': return "←";// fleche vers la gauche
|
|
case '\u2191': return "↑";// fleche vers le haut
|
|
case '\u2192': return "→";// fleche vers la droite
|
|
case '\u2193': return "↓";// fleche vers le bas
|
|
case '\u2194': return "↔";// fleche bilaterale
|
|
case '\u21B5': return "↵";// fleche vers le bas avec coin vers la gauche = retour de chariot
|
|
case '\u21D0': return "⇐";// double fleche vers la gauche (ISO 10646 ne dit pas que lArr est la meme chose que la fleche 'est implique par' et n'a pas non plus d'autre caractere pour cette fonction. Alors ? On peut utiliser lArr pour 'est implique par' comme le suggere)
|
|
case '\u21D1': return "⇑";// double fleche vers le haut
|
|
case '\u21D2': return "⇒";// double fleche vers la droite (ISO 10646 ne dit pas qu'il s'agit du caractere 'implique' et n'a pas non plus d'autre caractere avec cette fonction. Alors ? On peut utiliser rArr pour 'implique' comme le suggere)
|
|
case '\u21D3': return "⇓";// double fleche vers le bas
|
|
case '\u21D4': return "⇔";// double fleche bilaterale
|
|
case '\u2200': return "∀";// pour tous
|
|
case '\u2202': return "∂";// derivee partielle
|
|
case '\u2203': return "∃";// il existe
|
|
case '\u2205': return "∅";// ensemble vide = symbole diametre
|
|
case '\u2207': return "∇";// nabla
|
|
case '\u2208': return "∈";// appartient
|
|
case '\u2209': return "∉";// n'appartient pas
|
|
case '\u220B': return "∋";// contient comme element (Est-ce qu'il ne pourrait pas y avoir un nom plus parlant que 'ni' ?)
|
|
case '\u220F': return "∏";// produit de la famille = signe produit (prod N'EST PAS le meme caractere que U+03A0 'lettre capitale grecque pi' meme si le meme glyphe peut s'utiliser pour les deux)
|
|
case '\u2211': return "∑";// sommation de la famille (sum N'EST PAS le meme caractere que U+03A3 'ettre capitale grecque sigma' meme si le meme glyphe peut s'utiliser pour les deux)
|
|
case '\u2212': return "−";// signe moins
|
|
case '\u2217': return "∗";// operateur asterisque
|
|
case '\u221A': return "√";// racine carree = signe radical
|
|
case '\u221D': return "∝";// proportionnel
|
|
case '\u221E': return "∞";// infini
|
|
case '\u2220': return "∠";// angle
|
|
case '\u2227': return "∧";// ET logique
|
|
case '\u2228': return "∨";// OU logique
|
|
case '\u2229': return "∩";// intersection = cap
|
|
case '\u222A': return "∪";// union = cup
|
|
case '\u222B': return "∫";// integrale
|
|
case '\u2234': return "∴";// par consequent
|
|
case '\u223C': return "∼";// operateur tilde = varie avec = similaire (L'operateur tilde N'EST PAS le meme caractere que le tilde U+007E, meme si le meme glyphe peut s'utiliser pour les deux)
|
|
case '\u2245': return "≅";// approximativement egal
|
|
case '\u2248': return "≈";// presque egal = asymptotique
|
|
case '\u2260': return "≠";// pas egal
|
|
case '\u2261': return "≡";// identique
|
|
//case '\u2261': return "≡";// identique
|
|
case '\u2264': return "≤";// plus petit ou egal
|
|
case '\u2265': return "≥";// plus grand ou egal
|
|
case '\u2282': return "⊂";// sous-ensemble de
|
|
case '\u2283': return "⊃";// sur-ensemble de (Remarquez que nsup 'pas un sur-ensemble de' 2285, n'est pas couvert par le codage de la police Symbol. Devrait-il l'etre par symetrie ? Il est dans)
|
|
case '\u2284': return "⊄";// pas un sous-ensemble de
|
|
case '\u2286': return "⊆";// sous-ensemble ou egal
|
|
case '\u2287': return "⊇";// sur-ensemble de ou egal
|
|
case '\u2295': return "⊕";// plus cercle = somme directe
|
|
case '\u2297': return "⊗";// multiplie par cercle = produit vectoriel
|
|
case '\u22A5': return "⊥";// taquet vers le haut = orthogonal = perpendiculaire
|
|
case '\u22C5': return "⋅";// operateur point (L'operateur point N'EST PAS le meme caractere que le 'point median', U+00B7)
|
|
case '\u2308': return "⌈";// plafond gauche = anglet gauche
|
|
case '\u2309': return "⌉";// plafond droite
|
|
case '\u230A': return "⌊";// plancher gauche
|
|
case '\u230B': return "⌋";// plancher droite
|
|
case '\u2329': return "⟨";// chevron vers la gauche (lang N'EST PAS le meme caractere que U+003C 'inferieur' ou U+2039 'guillemet simple vers la gauche')
|
|
case '\u232A': return "⟩";// chevron vers la droite (rang iN'EST PAS le meme caractere que U+003E 'superieur' ou U+203A 'guillemet simple vers la droite')
|
|
case '\u25CA': return "◊";// losange
|
|
case '\u2660': return "♠";// pique noir (Noir semble dire ici rempli par opposition ajoure)
|
|
case '\u2663': return "♣";// trefle noir
|
|
case '\u2665': return "♥";// coeur noir
|
|
case '\u2666': return "♦";// carreau noir
|
|
// truc pas prevus
|
|
case '\u2102': return "ℂ";// ensemble C des complexes
|
|
case '\u2115': return "ℕ";// ensemble N des entiers
|
|
case '\u211A': return "ℚ";// ensemble Q des rationnels
|
|
case '\u211D': return "ℝ";// ensemble R des reels
|
|
case '\u2124': return "ℤ";// ensemble R des entiers relatifs
|
|
case '\u2223': return "∣";// divise
|
|
case '\u2224': return "∤";// ne divise pas
|
|
case '\u2243': return "≃";// asymptotiquement egal
|
|
case '\u2244': return "≄";// asymptotiquement egal
|
|
case '\u2225': return "∥";// parallele
|
|
case '\u00B1': return "±";// plus ou moins
|
|
case '\u2213': return "∓"; // moins ou plus (different de plus ou moins)
|
|
case '\u2494': return "⩽"; // inferieur ou egal incline
|
|
case '\u2270': return "≰"; //non inferieur ou egal incline
|
|
case '\u00AC': return "¬";// signe not
|
|
case '\u00B0': return "ˆ";// petit cercle, operateur concatenation, normalement ° mais on va le considere comme circ
|
|
case '\u224A': return "≊";// approxivativement egal
|
|
case '\u002B': return "+"; // signe plus
|
|
case '\u00D7': return "×"; // signe multiplication (croix)
|
|
case '\u003D': return "="; // signe egal
|
|
case '\u226E': return "≮"; // non inferieur
|
|
case '\u2A7D': return "⩽"; // inferieur incline = leqslant
|
|
case '\u220A': return "∈";// appartient
|
|
case '\u2216': return "∖";// difference d'ensemble
|
|
case '\u2288': return "⊈";// ni un sous-ensemble ni egal
|
|
case '\u2289': return "⊉";// ni un surensemble ni egal
|
|
case '\u2285': return "⊅";// non un surensemble de
|
|
case '\u301A': return "⟦";// crochet gauche avec barre
|
|
case '\u301B': return "⟧";// crochet droit avec barre
|
|
case '\u2210': return "∐";// coproduit (Pi l'envers)
|
|
case '\u222C': return "∬";// integrale double
|
|
case '\u222D': return "∭";// integrale triple
|
|
case '\u222E': return "∮";// integrale de contour
|
|
case '\u222F': return "∯";// integrale de surface
|
|
case '\u2230': return "∰";// integrale de volume
|
|
case '\u210F': return "ℏ";// const de Planck sur 2Pi
|
|
case '\u2253': return "&;";// BUG points suspensions diagonale descendant droite
|
|
case '\u22EE': return "⋮";// points suspensions verticaux
|
|
case '\u22EF': return "⋯";// points suspensions horizontaux medians
|
|
case '\u22F0': return "⋰";// points suspensions diagonale montant droite
|
|
case '\u22F1': return "⋱";// points suspensions diagonale descendant droite
|
|
case '\u02DA': return "˚"; //rond en chef
|
|
case '\u00A8': return "¨"; // double point en chef(trema)
|
|
case '\u02D9': return "˙"; // point en chef
|
|
case '\u2015': return "―"; // barre horizonthale
|
|
case '\u00AF': return "¯"; // barre horizonthale en chef
|
|
case '\u0332': return "_"; // souligne
|
|
case '\u2222': return "∢"; // angle spherique
|
|
case '\u03F1': return "ϱ"; // symbole grec rho final
|
|
case '\u226B': return "≫"; // tres superieur
|
|
case '\u226A': return "≪"; // tres inferieur
|
|
default: return null;
|
|
}
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|