/************************************************************************ * * TextConverter.java * * Copyright: 2002-2015 by Henrik Just * * This file is part of Writer2LaTeX. * * Writer2LaTeX is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Writer2LaTeX is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Writer2LaTeX. If not, see . * * Version 1.6 (2015-07-23) * */ package writer2latex.xhtml.content; import java.util.Hashtable; import java.util.Stack; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import pro.litvinovg.xml.Debug; import org.w3c.dom.Element; import writer2latex.util.Misc; import writer2latex.xhtml.Converter; import writer2latex.xhtml.ODFPageSplitter; import writer2latex.xhtml.Parser; import writer2latex.xhtml.StyleInfo; import writer2latex.xhtml.XhtmlConfig; import writer2latex.xhtml.XhtmlStyleMap; import writer2latex.xhtml.XhtmlStyleMapItem; import writer2latex.office.FontDeclaration; import writer2latex.office.MasterPage; import writer2latex.office.OfficeStyle; import writer2latex.office.PageLayout; import writer2latex.office.ListCounter; import writer2latex.office.ListStyle; import writer2latex.office.StyleWithProperties; import writer2latex.office.OfficeReader; import static writer2latex.office.XMLString.*; /** This class handles text content */ public class TextParser extends Parser { // Data used to handle splitting over several files // TODO: Accessor methods for sections // Some (Sony?) EPUB readers have a limit on the file size of individual files // In any case very large files could be a performance problem, hence we do automatic splitting // after this number of characters. private int nSplitAfter = 150000; // TODO: Collect soft page breaks between table rows private boolean bPendingPageBreak = false; // We have encountered a page break which should be inserted asap private int splitHeadingLevel = 0; // The outline level at which to split files (0=no split) private int nRepeatLevels = 5; // The number of levels to repeat when splitting (0=no repeat) private int nLastSplitLevel = 1; // The outline level at which the last split occurred Element[] currentHeading = new Element[7]; // Last headings (repeated when splitting) private int nCharacterCount = 0; // The number of text characters in the current document // Counters for generated numbers private ListCounter outlineNumbering; private Hashtable listCounters = new Hashtable(); private String sCurrentListLabel = null; private ListStyle currentListStyle = null; private int nCurrentListLevel = 0; public Separator docSep = null; // Mode used to handle floats (depends on source doc type and config) private int nFloatMode; // Converter helpers used to handle all sorts of indexes private TOCParser tocParser; private LOFParser lofCv; private LOTParser lotCv; private AlphabeticalIndexParser indexCv; private BibliographyParser bibCv; // Converter helpers used to handle footnotes and endnotes private FootnoteParser footCv; private EndnoteParser endCv; // Sometimes we have to create an inlinenode in a block context // (labels for footnotes and endnotes) // We put it here and insert it in the first paragraph/heading to come: private Node asapNode = null; // When generating toc, a few things should be done differently private boolean bInToc = false; // Display hidden text? private boolean displayHiddenText = false; // Current page number int pageNum = 1; //Current master page name private String currentMasterPage = null; //Current master page name private String nextMasterPage = null; private boolean pagination = config.pagination(); private boolean breakBeforeNextNode = false; private boolean inTable = false; private boolean inList = false; private boolean inFootnote = false; private boolean inEndnote = false; private boolean inHeader = false; private boolean inFooter = false; private String endnotesContext = null; private String footnotesContext = null; PageContainer pageContainer = null; public TextParser(OfficeReader ofr, XhtmlConfig config, Converter converter) { super(ofr,config,converter); tocParser = new TOCParser(ofr, config, converter); lofCv = new LOFParser(ofr, config, converter); lotCv = new LOTParser(ofr, config, converter); bibCv = new BibliographyParser(ofr, config, converter); indexCv = new AlphabeticalIndexParser(ofr, config, converter); footCv = new FootnoteParser(ofr, config, converter); endCv = new EndnoteParser(ofr, config, converter); nSplitAfter = 1000*config.splitAfter(); splitHeadingLevel = config.getXhtmlSplitLevel(); nRepeatLevels = converter.isOPS() ? 0 : config.getXhtmlRepeatLevels(); // never repeat headings in EPUB nFloatMode = ofr.isText() && config.xhtmlFloatObjects() ? DrawParser.FLOATING : DrawParser.ABSOLUTE; outlineNumbering = new ListCounter(ofr.getOutlineStyle()); displayHiddenText = config.displayHiddenText(); pageContainer = converter.pageContainer; docSep = new Separator(config, converter); } /** Converts an office node as a complete text document * * @param onode the Office node containing the content to convert */ public void convertDocumentContent(Element onode) { Element hnode = converter.nextOutFile(); // Create form if (splitHeadingLevel==0) { Element form = getDrawParser().createForm(); if (form!=null) { hnode.appendChild(form); hnode = form; } } // Add cover image hnode = getDrawParser().insertCoverImage(hnode); //Extract table:index-body content from TOC if (!config.includeToc()){ extractRealTOC(onode); } //Split pages if (pagination) { onode = (Element) ODFPageSplitter.splitText(onode,ofr); } hnode = (Element)traverseBlockText(onode,hnode); // Add footnotes and endnotes insertFootnotes(hnode,true); addFooter(hnode); insertEndnotes(hnode, null); hnode = (Element) docSep.endDocument(hnode); // Generate all indexes bInToc = true; tocParser.generate(); bInToc = false; bInToc = true; tocParser.generatePanels(splitHeadingLevel); bInToc = false; } private void insertEndnotes(Element hnode, String section) { inEndnote = true; endCv.insertEndnotes(hnode,section); inEndnote = false; } private void extractRealTOC(Element onode) { NodeList tocs = onode.getElementsByTagName(TEXT_TABLE_OF_CONTENT); int i = 0; if (tocs == null){ return; } while (i < tocs.getLength()){ Node toc = tocs.item(i); NodeList indexBody = ((Element)toc).getElementsByTagName(TEXT_INDEX_BODY); if (indexBody == null || indexBody.item(0) == null){ i++; continue; } while (indexBody.item(0).hasChildNodes()){ Node child = indexBody.item(0).getFirstChild(); if (child.getNodeType() == Node.ELEMENT_NODE && ((Element) child).getTagName().equals(TEXT_INDEX_TITLE) ){ while(child.hasChildNodes()){ toc.getParentNode().insertBefore(child.getFirstChild(),toc); } //kamikadze child.getParentNode().removeChild(child); continue; } //Move childnodes before text:table-of-content toc.getParentNode().insertBefore(child, toc); } i++; } return; } public int getTocIndex() { return tocParser.getFileIndex(); } public int getAlphabeticalIndex() { return indexCv.getFileIndex(); } protected void setAsapNode(Element node) { asapNode = node; } //////////////////////////////////////////////////////////////////////// // NAVIGATION (fill header, footer and panel with navigation links) //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// // BLOCK TEXT (returns current html node at end of block) //////////////////////////////////////////////////////////////////////// public Node traverseBlockText(Node onode, Node hnode) { return parseText(onode,0,null,hnode); } public Node parseText(Node onode, int nLevel, String styleName, Node hnode) { if (!onode.hasChildNodes()) { return hnode; } NodeList nList = onode.getChildNodes(); int nLen = nList.getLength(); int i = 0; //hard Break after marker breakBeforeNextNode = false; while (i < nLen) { Node child = nList.item(i); if (child.getNodeType() == Node.ELEMENT_NODE) { String nodeName = child.getNodeName(); // Block splitting if (OfficeReader.isDrawElement(child)) { getDrawParser().handleDrawElement((Element)child,(Element)hnode,null,nFloatMode); } else if (nodeName.equals(TEXT_P)) { StyleWithProperties style = ofr.getParStyle(Misc.getAttribute(child,TEXT_STYLE_NAME)); hnode = processPageBreaks(child, hnode,style); nCharacterCount+=OfficeReader.getCharacterCount(child); // is there a block element, we should use? XhtmlStyleMap xpar = config.getXParStyleMap(); String sDisplayName = style!=null ? style.getDisplayName() : null; if (sDisplayName!=null && xpar.contains(sDisplayName)) { Node curHnode = hnode; XhtmlStyleMapItem map = xpar.get(sDisplayName); String sBlockElement = map.sBlockElement; String sBlockCss = map.sBlockCss; if (map.sBlockElement.length()>0) { Element block = converter.createElement(map.sBlockElement); if (!"(none)".equals(map.sBlockCss)) { block.setAttribute("class",map.sBlockCss); } hnode.appendChild(block); curHnode = block; } boolean bMoreParagraphs = true; do { handleParagraph(child,curHnode); bMoreParagraphs = false; if (++i 0){ para = (Element) paras.item(0); } NodeList heads = item.getElementsByTagName(TEXT_H); if (heads != null && heads.getLength() > 0){ head = (Element) heads.item(0); } if (para != null ){ StyleWithProperties paraStyle = ofr.getParStyle(Misc.getAttribute(para,TEXT_STYLE_NAME)); if (paraStyle != null) { newPageNumberProperty = paraStyle.getParProperty(STYLE_PAGE_NUMBER, true); } newPageNumberProperty = paraStyle.getParProperty(STYLE_PAGE_NUMBER, true); if (hasMasterPage(paraStyle) || newPageNumberProperty != null){ style = paraStyle; } } if (head != null && style == null){ StyleWithProperties headStyle = ofr.getParStyle(Misc.getAttribute(head,TEXT_STYLE_NAME)); if (headStyle != null) { newPageNumberProperty = headStyle.getParProperty(STYLE_PAGE_NUMBER, true); } if (hasMasterPage(headStyle) || newPageNumberProperty != null){ style = headStyle; } } } hnode = processPageBreaks(child, hnode,style); inList = true; if (getListParser().listIsOnlyHeadings(child)) { hnode = getListParser().handleFakeList(child,nLevel+1,styleName,hnode); } else { getListParser().handleList(child,nLevel+1,styleName,hnode); } inList = false; } else if (nodeName.equals(TABLE_TABLE)) { StyleWithProperties style = ofr.getTableStyle(Misc.getAttribute(child, TABLE_STYLE_NAME)); hnode = processPageBreaks(child, hnode,style); inTable = true; getTableParser().handleTable(child,hnode); inTable = false; } else if (nodeName.equals(TABLE_SUB_TABLE)) { getTableParser().handleTable(child,hnode); } else if (nodeName.equals(TEXT_SECTION)) { hnode = processPageBreaks(child, hnode,null); hnode = handleSection(child,hnode); } else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)) { if (!ofr.getTocReader((Element)child).isByChapter()) { } tocParser.handleIndex((Element)child,(Element)hnode); } else if (nodeName.equals(TEXT_ILLUSTRATION_INDEX)) { lofCv.handleLOF(child,hnode); } else if (nodeName.equals(TEXT_TABLE_INDEX)) { lotCv.handleLOT(child,hnode); } else if (nodeName.equals(TEXT_OBJECT_INDEX)) { // TODO } else if (nodeName.equals(TEXT_USER_INDEX)) { // TODO } else if (nodeName.equals(TEXT_ALPHABETICAL_INDEX)) { indexCv.handleIndex((Element)child,(Element)hnode); } else if (nodeName.equals(TEXT_BIBLIOGRAPHY)) { bibCv.handleIndex((Element)child,(Element)hnode); } else if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)) { breakBeforeNextNode = true; } else if (nodeName.equals(OFFICE_ANNOTATION)) { converter.handleOfficeAnnotation(child,hnode); } else if (nodeName.equals(TEXT_SEQUENCE_DECLS)) { //handleSeqeuenceDecls(child); } //TODO:IS IT NEEDED? hnode = getDrawParser().flushFullscreenFrames((Element)hnode); } i++; } return hnode; } /* Process a text:section tag (returns current html node) */ private Node handleSection(Node onode, Node hnode) { // Unlike headings, paragraphs and spans, text:display is not attached to the style: String lastEndnotesContext = endnotesContext; String lastFootnotesContext = footnotesContext; boolean pageWasOpened = docSep.isPageOpened(); String sectionName = Misc.getAttribute(onode,TEXT_NAME); String last = Misc.getAttribute(onode,"last"); boolean isLast = false; if (last != null && last.equals("true")) { isLast = true;} if (!displayHiddenText && "none".equals(Misc.getAttribute(onode,TEXT_DISPLAY))) { return hnode; } if (pageWasOpened) {hnode = docSep.closePage(hnode);} boolean removeStyleAtExit = setSectionStyle(onode); if (pageWasOpened) {hnode = docSep.openPage(hnode, pageNum);} hnode = traverseBlockText(onode, hnode); if (isLast) { insertEndnotes((Element) hnode, sectionName); } endnotesContext = lastEndnotesContext; footnotesContext = lastFootnotesContext; if (removeStyleAtExit) { pageContainer.removeStyle(); } return hnode; } private boolean setSectionStyle(Node onode) { boolean result = false; StyleWithProperties sectionStyle; String styleName = Misc.getAttribute(onode,TEXT_STYLE_NAME); String sectionName = Misc.getAttribute(onode,TEXT_NAME); OfficeStyle ofStyle = getSectionSP().getStyles().getStyle(styleName); if (ofStyle != null) { sectionStyle = (StyleWithProperties) ofStyle; int colCount = sectionStyle.getColCount(); if (colCount > 1 ) { String styleValue = "column-count: " + colCount + ";"; pageContainer.addStyle(styleValue); result = true; } String collectEndnotes = sectionStyle.getSectionProperty("endnote", false); if (collectEndnotes != null && collectEndnotes.equals("true")) { endnotesContext = sectionName; } String collectFootnotes = sectionStyle.getSectionProperty("footnote", false); if (collectFootnotes != null && collectFootnotes.equals("true")) { footnotesContext = sectionName; } } return result; } private void handleHeading(Element onode, Element hnode, boolean bAfterSplit) { int nListLevel = getOutlineLevel((Element)onode); boolean bUnNumbered = "true".equals(Misc.getAttribute(onode,TEXT_IS_LIST_HEADER)); boolean bRestart = "true".equals(Misc.getAttribute(onode,TEXT_RESTART_NUMBERING)); int nStartValue = Misc.getPosInteger(Misc.getAttribute(onode,TEXT_START_VALUE),1)-1; handleHeading(onode, hnode, bAfterSplit, ofr.getOutlineStyle(), nListLevel, bUnNumbered, bRestart, nStartValue); } /* * Process a text:h tag */ public void handleHeading(Element onode, Element hnode, boolean bAfterSplit, ListStyle listStyle, int nListLevel, boolean bUnNumbered, boolean bRestart, int nStartValue) { // Note: nListLevel may in theory be different from the outline level, // though the ui in OOo does not allow this // Numbering: It is possible to define outline numbering in CSS2 // using counters; but this is not supported in all browsers // TODO: Offer CSS2 solution as an alternative later. // Note: Conditional styles are not supported int nLevel = getOutlineLevel(onode); if (nLevel <= 6) { // Export as heading String sStyleName = onode.getAttribute(TEXT_STYLE_NAME); StyleWithProperties style = ofr.getParStyle(sStyleName); // Check for hidden text if (!displayHiddenText && style != null && "none".equals(style.getProperty(TEXT_DISPLAY))) { return; } // Numbering if (!bUnNumbered) { // If the heading uses a paragraph style which sets an explicit empty // list style name, it's unnumbered if (style != null) { String sListStyleName = style.getListStyleName(); if (sListStyleName != null && sListStyleName.length() == 0) { bUnNumbered = true; } } } ListCounter counter = null; String sLabel = ""; if (!bUnNumbered) { counter = getListParser().getListCounter(listStyle); if (bRestart) { counter.restart(nListLevel, nStartValue); } sLabel = counter.step(nListLevel).getLabel(); } // In EPUB export, a striked out heading will only appear in the external // toc boolean bTocOnly = false; if (converter.isOPS() && style != null) { String sStrikeOut = style.getProperty(STYLE_TEXT_LINE_THROUGH_STYLE, true); if (sStrikeOut != null && !"none".equals(sStrikeOut)) { bTocOnly = true; } } // Export the heading if (!bTocOnly) { // If split output, add headings of higher levels if (bAfterSplit && splitHeadingLevel > 0) { int nFirst = nLevel - nRepeatLevels; if (nFirst < 0) { nFirst = 0; } for (int i = nFirst; i < nLevel; i++) { if (currentHeading[i] != null) { hnode.appendChild(converter.importNode(currentHeading[i], true)); } } } // Apply style StyleInfo info = new StyleInfo(); info.sTagName = "h" + nLevel; getHeadingSP().readParentStyle(nLevel, sStyleName, info); getHeadingSP().applyStyle(nLevel, sStyleName, info); // add root element Element heading = converter.createElement(info.sTagName); hnode.appendChild(heading); writeStyle(info, heading); traverseFloats(onode, hnode, heading); // Apply writing direction /* * String sStyleName = Misc.getAttribute(onode,TEXT_STYLE_NAME); * StyleWithProperties style = ofr.getParStyle(sStyleName); if * (style!=null) { StyleInfo headInfo = new StyleInfo(); * StyleConverterHelper.applyDirection(style,headInfo); * getParSc().applyStyle(headInfo,heading); } */ // Prepend asapNode prependAsapNode(heading); // Prepend numbering if (!bUnNumbered) { getListParser().insertListLabel(listStyle, nListLevel, "SectionNumber", null, sLabel, heading); } // Add to toc if (!bInToc) { tocParser.handleHeading(onode, heading, sLabel); } // Convert content StyleInfo innerInfo = new StyleInfo(); getHeadingSP().applyInnerStyle(nLevel, sStyleName, innerInfo); Element content = heading; if (innerInfo.sTagName != null && innerInfo.sTagName.length() > 0) { content = converter.createElement(innerInfo.sTagName); heading.appendChild(content); writeStyle(innerInfo, content); } traverseInlineText(onode, content); // Add before/after text if required addBeforeAfter(heading, ofr.getParStyle(getParSP().getRealParStyleName(sStyleName)), config.getXHeadingStyleMap()); // Keep track of current headings for split output currentHeading[nLevel] = heading; for (int i = nLevel + 1; i <= 6; i++) { currentHeading[i] = null; } } else { if (!bInToc) { tocParser.handleHeadingExternal(onode, hnode, sLabel); } // Keep track of current headings for split output currentHeading[nLevel] = null; for (int i = nLevel + 1; i <= 6; i++) { currentHeading[i] = null; } } } else { // beyond h6 - export as ordinary paragraph handleParagraph(onode, hnode); } } /* * Process a text:p tag */ private void handleParagraph(Node onode, Node hnode) { boolean bIsEmpty = OfficeReader.isWhitespaceContent(onode); if (config.ignoreEmptyParagraphs() && bIsEmpty) { return; } String styleName = Misc.getAttribute(onode,TEXT_STYLE_NAME); StyleWithProperties style = ofr.getParStyle(styleName); if (!displayHiddenText && style!=null && "none".equals(style.getProperty(TEXT_DISPLAY))) { return; } Element par; // Hack because createParagraph doesn't work the way we need here :-( Element temp = converter.createElement("temp"); par = createParagraph(temp, styleName); prependAsapNode(par); traverseFloats(onode, hnode, par); hnode.appendChild(temp.getFirstChild()); // Maybe add to toc tocParser.handleParagraph((Element)onode, par, sCurrentListLabel); if (!bIsEmpty) { par = createTextBackground(par, styleName); if (config.listFormatting()==XhtmlConfig.HARD_LABELS) { getListParser().insertListLabel(currentListStyle, nCurrentListLevel, "ItemNumber", null, sCurrentListLabel, par); } sCurrentListLabel = null; traverseInlineText(onode,par); } else { // An empty paragraph (this includes paragraphs that only contains // whitespace) is ignored by the browser, hence we add   par.appendChild( converter.createTextNode("\u00A0") ); sCurrentListLabel = null; } if (converter.isOPS() && !par.hasChildNodes()) { // Finally, in EPUB export, if the exported paragraph turns out to be empty, remove it hnode.removeChild(par); } else { // Otherwise, add before/after text if required addBeforeAfter(par,ofr.getParStyle(getParSP().getRealParStyleName(styleName)),config.getXParStyleMap()); } } private void prependAsapNode(Node node) { if (asapNode!=null) { // May float past a split; check this first if (asapNode.getOwnerDocument()!=node.getOwnerDocument()) { asapNode = converter.importNode(asapNode,true); } node.appendChild(asapNode); asapNode = null; } } //////////////////////////////////////////////////////////////////////// // INLINE TEXT //////////////////////////////////////////////////////////////////////// /* Process floating frames bound to this inline text (ie. paragraph) */ private void traverseFloats(Node onode, Node hnodeBlock, Node hnodeInline) { Node child = onode.getFirstChild(); while (child!=null) { if (child.getNodeType()==Node.ELEMENT_NODE) { Element elm = (Element) child; if (OfficeReader.isDrawElement(elm)) { elm = getDrawParser().getRealDrawElement(elm); String sAnchor = elm.getAttribute(TEXT_ANCHOR_TYPE); if (Misc.isElement(elm, DRAW_FRAME)) { elm = Misc.getFirstChildElement(elm); } if (elm!=null) { String sTag = elm.getTagName(); // Convert only floating frames; text-boxes must always float if (!"as-char".equals(sAnchor)) { getDrawParser().handleDrawElement(elm,(Element)hnodeBlock, (Element)hnodeInline,nFloatMode); } else if (DRAW_TEXT_BOX.equals(sTag)) { getDrawParser().handleDrawElement(elm,(Element)hnodeBlock, (Element)hnodeInline,DrawParser.INLINE); } } } else if (OfficeReader.isTextElement(elm)) { // Do not descend into {foot|end}notes if (!OfficeReader.isNoteElement(elm)) { traverseFloats(elm,hnodeBlock,hnodeInline); } } } child = child.getNextSibling(); } } /* * Process inline text */ protected void traverseInlineText(Node onode, Node hnode) { // String styleName = Misc.getAttribute(onode, TEXT_STYLE_NAME); if (onode.hasChildNodes()) { NodeList nList = onode.getChildNodes(); int nLen = nList.getLength(); for (int i = 0; i < nLen; i++) { Node child = nList.item(i); short nodeType = child.getNodeType(); switch (nodeType) { case Node.TEXT_NODE: String s = child.getNodeValue(); if (s.length() > 0) { hnode.appendChild(converter.createTextNode(s)); } break; case Node.ELEMENT_NODE: String sName = child.getNodeName(); if (OfficeReader.isDrawElement(child)) { Element elm = getDrawParser().getRealDrawElement((Element) child); if (elm != null) { String sAnchor = (elm.getAttribute(TEXT_ANCHOR_TYPE)); if ("as-char".equals(sAnchor)) { getDrawParser().handleDrawElement(elm, null, (Element) hnode, DrawParser.INLINE); } } } else if (child.getNodeName().equals(TEXT_S)) { if (config.ignoreDoubleSpaces()) { hnode.appendChild(converter.createTextNode(" ")); } else { int count = Misc.getPosInteger(Misc.getAttribute(child, TEXT_C), 1); for (; count > 0; count--) { hnode.appendChild(converter.createTextNode("\u00A0")); } } } else if (sName.equals(TEXT_TAB_STOP)) { handleTabStop(child, hnode); } else if (sName.equals(TEXT_TAB)) { // oasis handleTabStop(child, hnode); } else if (sName.equals(TEXT_LINE_BREAK)) { if (!config.ignoreHardLineBreaks()) { hnode.appendChild(converter.createElement("br")); } } else if (sName.equals(TEXT_SPAN)) { handleSpan(child, hnode); } else if (sName.equals(TEXT_A)) { handleAnchor(child, hnode); } else if (sName.equals(TEXT_FOOTNOTE)) { footCv.handleNote(child, hnode, footnotesContext); } else if (sName.equals(TEXT_ENDNOTE)) { endCv.handleNote(child, hnode, endnotesContext); } else if (sName.equals(TEXT_NOTE)) { // oasis if ("endnote".equals(Misc.getAttribute(child, TEXT_NOTE_CLASS))) { endCv.handleNote(child, hnode, endnotesContext); } else { footCv.handleNote(child, hnode, footnotesContext); } } else if (sName.equals(TEXT_SEQUENCE)) { handleSequence(child, hnode); } else if (sName.equals(TEXT_PAGE_NUMBER)) { handlePageNumber(child, hnode); } else if (sName.equals(TEXT_PAGE_COUNT)) { handlePageCount(child, hnode); } else if (sName.equals(TEXT_SEQUENCE_REF)) { handleSequenceRef(child, hnode); } else if (sName.equals(TEXT_FOOTNOTE_REF)) { handleNoteRef(child, hnode); } else if (sName.equals(TEXT_ENDNOTE_REF)) { handleNoteRef(child, hnode); } else if (sName.equals(TEXT_NOTE_REF)) { // oasis handleNoteRef(child, hnode); } else if (sName.equals(TEXT_REFERENCE_MARK)) { handleReferenceMark(child, hnode); } else if (sName.equals(TEXT_REFERENCE_MARK_START)) { handleReferenceMark(child, hnode); } else if (sName.equals(TEXT_REFERENCE_REF)) { handleReferenceRef(child, hnode); } else if (sName.equals(TEXT_BOOKMARK)) { handleBookmark(child, hnode); } else if (sName.equals(TEXT_BOOKMARK_START)) { handleBookmark(child, hnode); } else if (sName.equals(TEXT_BOOKMARK_REF)) { handleBookmarkRef(child, hnode); } else if (sName.equals(TEXT_ALPHABETICAL_INDEX_MARK)) { if (!bInToc) { indexCv.handleIndexMark(child, hnode); } } else if (sName.equals(TEXT_ALPHABETICAL_INDEX_MARK_START)) { if (!bInToc) { indexCv.handleIndexMarkStart(child, hnode); } } else if (sName.equals(TEXT_TOC_MARK)) { tocParser.handleTocMark(child, hnode); } else if (sName.equals(TEXT_TOC_MARK_START)) { tocParser.handleTocMark(child, hnode); } else if (sName.equals(TEXT_BIBLIOGRAPHY_MARK)) { handleBibliographyMark(child, hnode); } else if (sName.equals(OFFICE_ANNOTATION)) { converter.handleOfficeAnnotation(child, hnode); } else if (sName.startsWith("text:")) { traverseInlineText(child, hnode); } // other tags are ignored; break; default: // Do nothing } } } } private void handleTabStop(Node onode, Node hnode) { // xhtml does not have tab stops, but we export and ASCII TAB character, which the // user may choose to format if (config.getXhtmlTabstopStyle().length()>0) { Element span = converter.createElement("span"); hnode.appendChild(span); span.setAttribute("class",config.getXhtmlTabstopStyle()); span.appendChild(converter.createTextNode("\t")); } else { hnode.appendChild(converter.createTextNode("\t")); } } private void handleSpan(Node onode, Node hnode) { StyleWithProperties style = ofr.getTextStyle(Misc.getAttribute(onode, TEXT_STYLE_NAME)); if (!displayHiddenText && style != null && "none".equals(style.getProperty(TEXT_DISPLAY))) { return; } if (!bInToc) { String sStyleName = Misc.getAttribute(onode, TEXT_STYLE_NAME); Element span = createInline((Element) hnode, sStyleName); traverseInlineText(onode, span); } else { traverseInlineText(onode, hnode); } } protected void traversePCDATA(Node onode, Node hnode) { if (onode.hasChildNodes()) { NodeList nl = onode.getChildNodes(); int nLen = nl.getLength(); for (int i=0; i0) { Node child = elm.getFirstChild(); if (child!=null) { elm.insertBefore(converter.createTextNode(mapItem.sBefore),child); } else { elm.appendChild(converter.createTextNode(mapItem.sBefore)); } } if (mapItem.sAfter!=null && mapItem.sAfter.length()>0) { elm.appendChild(converter.createTextNode(mapItem.sAfter)); } } } // Methods to query individual formatting properties (no inheritance) // Does this style contain the bold attribute? private boolean isBold(StyleWithProperties style) { String s = style.getProperty(FO_FONT_WEIGHT,false); return s!=null && "bold".equals(s); } // Does this style contain the italics/oblique attribute? private boolean isItalics(StyleWithProperties style) { String s = style.getProperty(FO_FONT_STYLE,false); return s!=null && !"normal".equals(s); } // Does this style contain a fixed pitch font? private boolean isFixed(StyleWithProperties style) { String s = style.getProperty(STYLE_FONT_NAME,false); String s2 = null; String s3 = null; if (s!=null) { FontDeclaration fd = (FontDeclaration) ofr.getFontDeclarations().getStyle(s); if (fd!=null) { s2 = fd.getFontFamilyGeneric(); s3 = fd.getFontPitch(); } } else { s = style.getProperty(FO_FONT_FAMILY,false); s2 = style.getProperty(STYLE_FONT_FAMILY_GENERIC,false); s3 = style.getProperty(STYLE_FONT_PITCH,false); } if ("fixed".equals(s3)) { return true; } if ("modern".equals(s2)) { return true; } return false; } // Does this style specify superscript? private boolean isSuperscript(StyleWithProperties style) { String sPos = style.getProperty(STYLE_TEXT_POSITION,false); if (sPos==null) return false; if (sPos.startsWith("sub")) return false; if (sPos.startsWith("-")) return false; if (sPos.startsWith("0%")) return false; return true; } // Does this style specify subscript? private boolean isSubscript(StyleWithProperties style) { String sPos = style.getProperty(STYLE_TEXT_POSITION,false); if (sPos==null) return false; if (sPos.startsWith("sub")) return true; if (sPos.startsWith("-")) return true; return false; } // Does this style specify underline? private boolean isUnderline(StyleWithProperties style) { String s; if (ofr.isOpenDocument()) { s = style.getProperty(STYLE_TEXT_UNDERLINE_STYLE,false); } else { s = style.getProperty(STYLE_TEXT_UNDERLINE,false); } return s!=null && !"none".equals(s); } // Does this style specify overstrike? private boolean isOverstrike(StyleWithProperties style) { String s; if (ofr.isOpenDocument()) { s = style.getProperty(STYLE_TEXT_LINE_THROUGH_STYLE,false); } else { s = style.getProperty(STYLE_TEXT_CROSSING_OUT,false); } return s!=null && !"none".equals(s); } /* apply hard formatting attribute style maps */ private Element applyAttributes(Element node, StyleWithProperties style) { // Do nothing if we convert hard formatting if (config.xhtmlFormatting()==XhtmlConfig.CONVERT_ALL || config.xhtmlFormatting()==XhtmlConfig.IGNORE_STYLES) { return node; } // Do nothing if this is not an automatic style if (style==null) { return node; } if (!style.isAutomatic()) { return node; } node = applyAttribute(node,"bold",isBold(style)); node = applyAttribute(node,"italics",isItalics(style)); node = applyAttribute(node,"fixed",isFixed(style)); node = applyAttribute(node,"superscript",isSuperscript(style)); node = applyAttribute(node,"subscript",isSubscript(style)); node = applyAttribute(node,"underline",isUnderline(style)); node = applyAttribute(node,"overstrike",isOverstrike(style)); return node; } /* apply hard formatting attribute style maps */ private Element applyAttribute(Element node, String sAttr, boolean bApply) { if (bApply) { XhtmlStyleMap xattr = config.getXAttrStyleMap(); if (xattr.contains(sAttr) && xattr.get(sAttr).sElement.length()>0) { XhtmlStyleMapItem map = xattr.get(sAttr); Element attr = converter.createElement(map.sElement); if (!"(none)".equals(map.sCss)) { attr.setAttribute("class",map.sCss); } node.appendChild(attr); return attr; } } return node; } /* Create a styled paragraph node */ protected Element createParagraph(Element node, String styleName) { StyleInfo info = new StyleInfo(); StyleWithProperties style = ofr.getParStyle(styleName); getParSP().readStyle(styleName,info); getParSP().readParentStyle(styleName,info); Element para = converter.createElement(info.sTagName); node.appendChild(para); writeStyle(info,para); if (style!=null && style.isAutomatic()) { return applyAttributes(para,style); } else { return para; } } /* Create an inline node with background style from paragraph style */ private Element createTextBackground(Element node, String sStyleName) { if (config.xhtmlFormatting()==XhtmlConfig.IGNORE_ALL || config.xhtmlFormatting()==XhtmlConfig.IGNORE_HARD) { return node; } String sBack = getParSP().getTextBackground(sStyleName); if (sBack.length()>0) { Element span = converter.createElement("span"); span.setAttribute("style",sBack); node.appendChild(span); return span; } else { return node; } } /* Create a styled inline node */ protected Element createInline(Element node, String sStyleName) { StyleInfo info = new StyleInfo(); getTextSP().readStyle(sStyleName,info); getTextSP().readParentStyle(sStyleName,info); Element newNode = node; if (info.hasAttributes() || !"span".equals(info.sTagName)) { // We (probably) need to create a new element newNode = converter.createElement(info.sTagName); writeStyle(info,newNode); // But we may want to merge it with the previous element Node prev = node.getLastChild(); if (prev!=null && Misc.isElement(prev, info.sTagName)) { // The previous node is of the same type, compare attributes Element prevNode = (Element) prev; if (newNode.getAttribute("class").equals(prevNode.getAttribute("class")) && newNode.getAttribute("style").equals(prevNode.getAttribute("style")) && newNode.getAttribute("xml:lang").equals(prevNode.getAttribute("xml:lang")) && newNode.getAttribute("dir").equals(prevNode.getAttribute("dir"))) { // Attribute style mapped elements are *not* merged, we will live with that return applyAttributes(prevNode,ofr.getTextStyle(sStyleName)); } } node.appendChild(newNode); } return applyAttributes(newNode,ofr.getTextStyle(sStyleName)); } protected int getOutlineLevel(Element node) { return ofr.isOpenDocument() ? Misc.getPosInteger(node.getAttribute(TEXT_OUTLINE_LEVEL),0): Misc.getPosInteger(node.getAttribute(TEXT_LEVEL),0); } private Node processPageBreaks(Node currentNode, Node hnode, StyleWithProperties style){ //Check for paragraph in current node in case currentNode is table // If currentNode is table //check for first para inside // Check it's master page, pageNumber, Page break before - if we already set MP - throw it away // And make a note to pass this paragraph next time you'll meet it. if (inUnreakableElement() ) { return hnode; } Integer newPageNumber = null; newPageNumber = getPageNumber(style, newPageNumber); if (currentMasterPage == null && style != null) { hnode = startDocument(hnode, style, newPageNumber); hnode = docSep.processOutlineLevel(currentNode, hnode, pageNum); } else if (hasMasterPage(style) || hasBreakBefore(style) || breakBeforeNextNode) { // Insert footnotes insertFootnotes(hnode,false); // Add previous MP footer //hnode = exitPageContainer((Element) hnode); addFooter(hnode); // Update MP updateMasterPageWith(style); setPageContainerStyle(); // Set new page number if defined or increment if not if (newPageNumber != null) { pageNum = newPageNumber; } else { pageNum++; fitPageNumberToMasterPageStyle(); } if (hasOutlineLevel(currentNode)) { hnode = docSep.processOutlineLevel(currentNode, hnode, pageNum); } else { hnode = docSep.processPageBreak(currentNode, hnode, pageNum); } // Print new header addHeader(hnode); //hnode = enterPageContainer(hnode); breakBeforeNextNode = false; return hnode; } else { hnode = docSep.processOutlineLevel(currentNode, hnode, pageNum); } if (checkHardBreakAfter(style)) { breakBeforeNextNode = true; } else { breakBeforeNextNode = false; } return hnode; } private boolean inUnreakableElement() { return inHeader || inFooter || inTable || inList || inFootnote || inEndnote; } private void insertFootnotes(Node hnode, boolean lastCall) { inFootnote = true; footCv.insertFootnotes(hnode, lastCall); inFootnote = false; } private Integer getPageNumber(StyleWithProperties style, Integer newPageNumber) { if (style != null) { // If style:paragraph-properties extists and contain // style:page-number String newPageNumberProperty = style.getParProperty(STYLE_PAGE_NUMBER, true); if (newPageNumberProperty != null) { // Truncate auto and other string values newPageNumberProperty = newPageNumberProperty.replaceAll("[^0-9]", ""); if (!newPageNumberProperty.isEmpty()) { // Save new page number newPageNumber = Integer.parseInt(newPageNumberProperty); } } } return newPageNumber; } private Node startDocument(Node hnode, StyleWithProperties style, Integer newPageNumber) { setFirstMasterPage(style); if (newPageNumber != null) { pageNum = newPageNumber; } else { } //Start tagging String sTitle = converter.getTitle(); hnode = docSep.startDocument(hnode, sTitle,pageNum); //Print header addHeader(hnode); //hnode = enterPageContainer(hnode); return hnode; } private void setFirstMasterPage(StyleWithProperties style) { updateMasterPageWith(style); if (currentMasterPage == null) { currentMasterPage = "Standard"; } setPageContainerStyle(); } private void setPageContainerStyle() { MasterPage mp = ofr.getFullMasterPage(currentMasterPage); PageLayout layout = ofr.getPageLayout(mp.getPageLayoutName()); String containerStyle = "column-count: " + layout.getColCount() + ";"; pageContainer.setRootStyle(containerStyle); } private void fitPageNumberToMasterPageStyle() { // TODO: READ master-page style MasterPage masterPage = ofr.getFullMasterPage(currentMasterPage); if (masterPage != null) { String pageLayoutName = masterPage.getPageLayoutName(); if (pageLayoutName != null) { PageLayout pageLayout = ofr.getPageLayout(pageLayoutName); if (pageLayout != null) { String pageUsage = pageLayout.getPageUsage(); if (pageUsage != null) { int parity = pageNum % 2; if (parity == 1 && pageUsage.equals("left")){ pageNum++; } if (parity == 0 && pageUsage.equals("right")){ pageNum++; } } else { } } else { } } else { } } else { } } private boolean hasMasterPage(StyleWithProperties style) { // Page break was found before if (style != null) { String sMasterPage = style.getMasterPageName(); if (sMasterPage != null && sMasterPage.length() > 0) { return true; } } return false; } private void updateMasterPageWith(StyleWithProperties style) { if (!hasMasterPage(style) && nextMasterPage == null) { return; } if (hasMasterPage(style)) { String sMasterPage = style.getMasterPageName(); currentMasterPage = sMasterPage; } else { currentMasterPage = nextMasterPage; } MasterPage masterPage = ofr.getFullMasterPage(currentMasterPage); nextMasterPage = masterPage.getProperty(STYLE_NEXT_STYLE_NAME); } private boolean hasBreakBefore(StyleWithProperties style) { if (style != null && "page".equals(style.getProperty(FO_BREAK_BEFORE))) { return true; } return false; } private boolean checkHardBreakAfter(StyleWithProperties style) { if (style != null && "page".equals(style.getProperty(FO_BREAK_AFTER))) { return true; } return false; } private Node addHeader(Node node) { if (!pagination) { return node; } Node headerNode = null; inHeader = true; MasterPage masterPage = ofr.getFullMasterPage(currentMasterPage); if (currentMasterPage != null) { String pageLayoutName = masterPage.getPageLayoutName(); PageLayout pageLayout = ofr.getPageLayout(pageLayoutName); String marginBottom = pageLayout.getFooterProperty(FO_MARGIN_BOTTOM); if (pageNum % 2 == 0) { headerNode = masterPage.getHeaderLeft(); } if (headerNode == null) { headerNode = masterPage.getHeader(); } if (headerNode != null) { //Create header element Element headerElement = converter.createElement("header"); if (marginBottom != null) { headerElement.setAttribute("style", "margin-bottom:"+getPageSP().scale(marginBottom)+"; height:auto; width:auto;"); } Node pageNode = node.getParentNode(); pageNode.insertBefore(headerElement, node); traverseBlockText(headerNode, headerElement); } } else { System.out.println("ERROR MP is null"); } inHeader = false; return node; } private Node addFooter(Node node) { if (!pagination) { return node; } Node footerNode = null; inFooter = true; MasterPage masterPage = ofr.getFullMasterPage(currentMasterPage); if (currentMasterPage != null) { String pageLayoutName = masterPage.getPageLayoutName(); PageLayout pageLayout = ofr.getPageLayout(pageLayoutName); String marginTop = pageLayout.getFooterProperty(FO_MARGIN_TOP); if (pageNum % 2 == 0) { footerNode = masterPage.getFooterLeft(); } if (footerNode == null) { footerNode = masterPage.getFooter(); } if (footerNode != null) { //Create footer element Element footerElement = converter.createElement("footer"); if (marginTop != null) { footerElement.setAttribute("style", "margin-top:"+getPageSP().scale(marginTop)+"; height:auto; width:auto;"); } Node pageNode = node.getParentNode(); pageNode.appendChild(footerElement); traverseBlockText(footerNode, footerElement); } } else { System.out.println("ERROR MP is null"); } inFooter = false; return node; } private boolean hasOutlineLevel(Node node) { if (Misc.isElement(node) && Misc.getAttribute(node, TEXT_OUTLINE_LEVEL) != null && !Misc.getAttribute(node, TEXT_OUTLINE_LEVEL).isEmpty()) { String title = docSep.getTitle(node).trim(); if (title == null || title.isEmpty()) { return false; } return true; } return false; } }