package writer2latex.xhtml; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import writer2latex.office.OfficeReader; import writer2latex.office.StyleWithProperties; import static writer2latex.office.XMLString.*; import writer2latex.util.Misc; public class PageSplitter { static Node truncatedListItemNodeContent = null; static OfficeReader officeReader = null; protected static Node splitSoftPageBreak(Node onode,OfficeReader ofr){ //Find par node with soft page break inside and split it officeReader = ofr; Document document = onode.getOwnerDocument(); Element softPageBreak = document.createElement(TEXT_SOFT_PAGE_BREAK); NodeList nodes = onode.getChildNodes(); int i = 0; //Loop through the content nodes and split nodes with soft page break while (i < nodes.getLength()){ Node child = nodes.item(i); //System.out.println("splitSoftPageBreak "); //Necessary check if node is an Element if (containsSPB(child)){ String nodeName = child.getNodeName(); //Create Duplicate Node! Element childFirstPart = (Element) child.cloneNode(false); StyleWithProperties style = null; if ((nodeName.equals(TEXT_P) || nodeName.equals(TEXT_H))) { //If SPB not the first node Node paraFirstPart = handleParagraph(child); if (paraFirstPart != null){ onode.insertBefore(paraFirstPart, child); style = ofr.getParStyle(Misc.getAttribute(child, TEXT_STYLE_NAME)); } } else if (nodeName.equals(TABLE_TABLE)) { if (handleTableTable(childFirstPart, child)){ onode.insertBefore(childFirstPart, child); style = ofr.getTableStyle(Misc.getAttribute(child, TABLE_STYLE_NAME)); } } else if (nodeName.equals(TEXT_LIST)) { if (handleList(childFirstPart, child)){ onode.insertBefore(childFirstPart, child); } } else if (nodeName.equals(TEXT_SECTION)) { if (handleSection(childFirstPart, child)){ onode.insertBefore(childFirstPart, child); style = ofr.getSectionStyle(Misc.getAttribute(child, TEXT_SECTION)); } } else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)){ //HACK containsSPB(childFirstPart); i++; continue; } else if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){ //HACK i++; continue; } //TODO: IF fo:break before in original table - don't create SPB if (style == null || !"page".equals(style.getProperty(FO_BREAK_BEFORE))){ onode.insertBefore(softPageBreak.cloneNode(false), child); } //HACK! if (truncatedListItemNodeContent != null){ NodeList itemNodeList= truncatedListItemNodeContent.getChildNodes(); while (itemNodeList.getLength() > 0){ onode.insertBefore(itemNodeList.item(0), child); } truncatedListItemNodeContent.getParentNode().removeChild(truncatedListItemNodeContent); truncatedListItemNodeContent = null; } if (!child.hasChildNodes()){ onode.removeChild(child); } continue; } i++; } return onode; } private static boolean handleList(Node listFirstPart, Node list){ NodeList listNodes = list.getChildNodes(); int i = 0; boolean dataMoved = false; while (listNodes.getLength() > i) { Node listChild = listNodes.item(i); if(listChild.getNodeType() == Node.ELEMENT_NODE){ String nodeName = listChild.getNodeName(); if (nodeName.equals(TEXT_LIST_HEADER)) { if(containsSPB(listChild)){ //Remove inner SPB removeSPB(listChild); //HACK :( break; } listFirstPart.appendChild(listChild.cloneNode(true)); //Get next element i++; } else if (nodeName.equals(TEXT_LIST_ITEM)) { if (containsSPB(listChild)){ Node listItemFirstPart = listChild.cloneNode(false); //remove SPB, move previous nodes to firstPart. if (handleListItem(listItemFirstPart,listChild)){ dataMoved = true; //Add first part of list item to previous list item listFirstPart.appendChild(listItemFirstPart); //Get list parent node and move cutted node //After First Part and SPB but before this list; //TODO!!!!!!!!!! truncatedListItemNodeContent = listChild; listFirstPart.getParentNode(); //If List item is empty - remove it if (!listChild.hasChildNodes()){ list.removeChild(listChild); } } //Add text:continue-numbering="true" if (dataMoved){ ((Element) list).setAttribute(TEXT_CONTINUE_NUMBERING, "true"); } break; } else { // Not with SPB yet, move node, set dataMoved=true listFirstPart.appendChild(listChild); dataMoved = true; } } } } return dataMoved; } //If SPB before first item - return false, remove SPB //Otherwise add childNodes before SPB to firstPart, return true private static boolean handleListItem(Node listItemFirstPart, Node listItem){ int i = 0; boolean dataMoved = false; NodeList listItemNodes = listItem.getChildNodes(); while(listItemNodes.getLength() > i){ Node listItemChild = listItemNodes.item(i); if(listItemChild.getNodeType() == Node.ELEMENT_NODE){ //Node name String nodeName = listItemChild.getNodeName(); if (containsSPB(listItemChild)){ Node listItemChildFirstPart = listItemChild.cloneNode(false); //Break if SPB if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)) { //Remove SPB.Return result listItem.removeChild(listItemChild); } else if (nodeName.equals(TEXT_LIST)) { if (handleList(listItemChildFirstPart, listItemChild)){ listItemFirstPart.appendChild(listItemChildFirstPart); dataMoved=true; } } else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) { Node paraFirstPart = handleParagraph(listItemChild); if (paraFirstPart != null){ listItemFirstPart.appendChild(paraFirstPart); dataMoved=true; } } break; //Move to first part } else { listItemFirstPart.appendChild(listItemChild); dataMoved = true; } } else { listItemFirstPart.appendChild(listItemChild); dataMoved = true; } //check internal nodes } return dataMoved; } //Needs finish private static boolean handleTableTable(Node tableFirstPart, Node table) { /* * // TODO: 0.Test if soft-page-break not at start of table // - in that * case just remove it and insert before table // 1.Create new table // * 2.Copy to it table:table-column's and // table:table-header-rows // * 3.Move nodes before soft-page-break to new table //4. IF in one * table:row exist more one Algorithm IF SPB at start - just move it * higher IF SPB between rows - just copy table move row and put SPB * between tables IF SPB inside row, inside cell - copy table, copy * empty row, copy each empty cell and in each cell move every node up * to the first SPB * * */ NodeList tableChildNodes = table.getChildNodes(); // Node counter int i = 0; boolean dataMoved = false; // Loop through the TABLE:TABLE child nodes while (tableChildNodes.getLength() > i) { Node tableChildNode = tableChildNodes.item(i); if (tableChildNode.getNodeType() == Node.ELEMENT_NODE) { //Node name String tableChildNodeName = tableChildNode.getNodeName(); //System.out.println("Table child node " + tableChildNodeName); if (containsSPB(tableChildNode)){ Node tableChildFirstPart = tableChildNode.cloneNode(false); if (tableChildNodeName.equals(TEXT_SOFT_PAGE_BREAK)) { // remove inner soft page break node table.removeChild(tableChildNode); } else if (tableChildNodeName.equals(TABLE_TABLE_ROW_GROUP)) { if (handleTableRowGroup(tableChildFirstPart, tableChildNode)){ dataMoved = true; tableFirstPart.appendChild(tableChildFirstPart); } } else if ( tableChildNodeName.equals(TABLE_TABLE_ROWS)) { if (handleTableRows(tableChildFirstPart, tableChildNode)){ dataMoved = true; tableFirstPart.appendChild(tableChildFirstPart); } } else if ( tableChildNodeName.equals(TABLE_TABLE_ROW)) { if (handleTableRow(tableChildFirstPart, tableChildNode)){ dataMoved = true; tableFirstPart.appendChild(tableChildFirstPart); } } else if (tableChildNodeName.equals(TABLE_TABLE_COLUMN) || tableChildNodeName.equals(TABLE_TABLE_COLUMN_GROUP) || tableChildNodeName.equals(TABLE_TABLE_HEADER_ROWS) || tableChildNodeName.equals(TABLE_TABLE_HEADER_COLUMNS)) { //Remove Soft Page Break removeSPB(tableChildNode); } break; } else { //Before SPB //Description nodes if (tableChildNodeName.equals(TABLE_TABLE_COLUMN) || tableChildNodeName.equals(TABLE_TABLE_COLUMN_GROUP) || tableChildNodeName.equals(TABLE_TABLE_HEADER_ROWS) || tableChildNodeName.equals(TABLE_TABLE_HEADER_COLUMNS)) { //Append to clone table tableFirstPart.appendChild(tableChildNode.cloneNode(true)); //increment counter i++; } else { //Append to clone table tableFirstPart.appendChild(tableChildNode); dataMoved = true; } } } } return dataMoved; } private static boolean handleTableRowGroup(Node tableRowGroupFistPart, Node tableRowGroup) { boolean dataMoved = false; // Node counter int i = 0; NodeList tableRowGroupChildNodes = tableRowGroup.getChildNodes(); while (tableRowGroupChildNodes.getLength() > i) { Node tableRowGroupChildNode = tableRowGroupChildNodes.item(0); if ((tableRowGroupChildNode.getNodeType() == Node.ELEMENT_NODE)) { String nodeName = tableRowGroupChildNode.getNodeName(); if (containsSPB(tableRowGroupChildNode)){ Node tableRowGroupChildFirstPart = tableRowGroupChildNode.cloneNode(false); if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){ // remove inner soft page break node tableRowGroup.removeChild(tableRowGroupChildNode); } else if (nodeName.equals(TABLE_TABLE_HEADER_ROWS)){ //Nothing IF table-header-rows found - it is description node, //Not needed to set dataMoved = true, not needed to append First part } else if (nodeName.equals(TABLE_TABLE_ROW)){ if (handleTableRow(tableRowGroupChildFirstPart, tableRowGroupChildNode)){ dataMoved = true; tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart); } } else if (nodeName.equals(TABLE_TABLE_ROW_GROUP)){ if (handleTableRowGroup(tableRowGroupChildFirstPart, tableRowGroupChildNode)){ dataMoved = true; tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart); } } else if (nodeName.equals(TABLE_TABLE_ROWS)){ if (handleTableRows(tableRowGroupChildFirstPart, tableRowGroupChildNode)){ dataMoved = true; tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart); } } break; } else { if (nodeName.equals(TABLE_TABLE_HEADER_ROWS)){ tableRowGroupFistPart.appendChild(tableRowGroupChildNode.cloneNode(true)); //increment counter i++; } else { tableRowGroupFistPart.appendChild(tableRowGroupChildNode); dataMoved = true; } } } else { //Append text nodes tableRowGroupFistPart.appendChild(tableRowGroupChildNode); dataMoved = true; } } return dataMoved; } private static boolean handleTableRows(Node tableRowsFistPart, Node tableRows) { boolean dataMoved = false; // Node counter int i = 0; NodeList tableRowsChildNodes = tableRows.getChildNodes(); while (tableRowsChildNodes.getLength() > i) { Node tableRowsChildNode = tableRowsChildNodes.item(0); if ((tableRowsChildNode.getNodeType() == Node.ELEMENT_NODE)) { String nodeName = tableRowsChildNode.getNodeName(); if (containsSPB(tableRowsChildNode)){ Node tableRowGroupChildFirstPart = tableRowsChildNode.cloneNode(false); if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){ // remove inner soft page break node tableRows.removeChild(tableRowsChildNode); } else if (nodeName.equals(TABLE_TABLE_ROW)){ if (handleTableRow(tableRowGroupChildFirstPart, tableRowsChildNode)){ dataMoved = true; tableRowsFistPart.appendChild(tableRowGroupChildFirstPart); } } break; } else { tableRowsFistPart.appendChild(tableRowsChildNode); dataMoved = true; } } else { System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-rows"); //Append text nodes //tableRowsFistPart.appendChild(tableRowsChildNode); //dataMoved = true; } } return dataMoved; } private static boolean handleTableRow(Node tableRowFistPart, Node tableRow) { boolean dataMoved = false; // Node counter int i = 0; NodeList tableRowChildNodes = tableRow.getChildNodes(); while (tableRowChildNodes.getLength() > i) { Node tableRowChildNode = tableRowChildNodes.item(i); if ((tableRowChildNode.getNodeType() == Node.ELEMENT_NODE)) { String nodeName = tableRowChildNode.getNodeName(); if (containsSPB(tableRowChildNode)){ Node tableRowGroupChildFirstPart = tableRowChildNode.cloneNode(false); if (nodeName.equals(TABLE_TABLE_CELL)){ if (handleCell(tableRowGroupChildFirstPart, tableRowChildNode)){ dataMoved = true; tableRowFistPart.appendChild(tableRowGroupChildFirstPart); } } else if (nodeName.equals(TABLE_COVERED_TABLE_CELL)){ //Implement handleCoveredCell in future if (handleCell(tableRowGroupChildFirstPart, tableRowChildNode)){ dataMoved = true; tableRowFistPart.appendChild(tableRowGroupChildFirstPart); } } } else { //System.out.println("HERE " + nodeName); //Move node without SPB above tableRowFistPart.appendChild(tableRowChildNode.cloneNode(true)); Node emptyCell = tableRowChildNode.cloneNode(false); Document document = tableRow.getOwnerDocument(); Element textP = document.createElement(TEXT_P); emptyCell.appendChild(textP); tableRow.insertBefore(emptyCell, tableRowChildNode); tableRow.removeChild(tableRowChildNode); dataMoved = true; } i++; } else { System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-row"); //Append text nodes //tableRowsFistPart.appendChild(tableRowsChildNode); //dataMoved = true; } } return dataMoved; } private static boolean handleCell(Node cellFirstPart, Node cellNode) { boolean dataMoved = false; // Node counter int i = 0; NodeList cellChildNodes = cellNode.getChildNodes(); while (cellChildNodes.getLength() > i) { Node cellChildNode = cellChildNodes.item(0); if ((cellChildNode.getNodeType() == Node.ELEMENT_NODE)) { String nodeName = cellChildNode.getNodeName(); if (containsSPB(cellChildNode)){ if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){ // remove inner soft page break node cellNode.removeChild(cellChildNode); } else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) { Node paraFirstPart = handleParagraph(cellChildNode); if (paraFirstPart != null){ cellFirstPart.appendChild(paraFirstPart); dataMoved=true; } } break; } else { cellFirstPart.appendChild(cellChildNode); dataMoved = true; } } else { //Append text nodes cellFirstPart.appendChild(cellChildNode); dataMoved = true; } } return dataMoved; } private static boolean handleSection(Node sectionFirstPart, Node section) { boolean dataMoved = false; // Node counter int i = 0; NodeList childs = section.getChildNodes(); while (childs.getLength() > i) { Node child = childs.item(0); if ((child.getNodeType() == Node.ELEMENT_NODE)) { String nodeName = child.getNodeName(); if (containsSPB(child)){ Node childFirstPart = child.cloneNode(false); if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){ // remove inner soft page break node section.removeChild(child); } else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) { Node paraFirstPart = handleParagraph(child); if (paraFirstPart != null){ sectionFirstPart.appendChild(paraFirstPart); dataMoved=true; } } else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)) { //HACK removeSPB(section); i++; continue; } else if (nodeName.equals(TABLE_TABLE)) { if (handleTableTable(childFirstPart, child)){ sectionFirstPart.appendChild(childFirstPart); dataMoved=true; } } else if (nodeName.equals(TEXT_SECTION)) { if (handleSection(childFirstPart, child)){ sectionFirstPart.appendChild(childFirstPart); dataMoved=true; } } else if (nodeName.equals(TEXT_LIST)) { if (handleList(childFirstPart, child)){ sectionFirstPart.appendChild(childFirstPart); dataMoved=true; } } //split node with spb and exit break; } else { sectionFirstPart.appendChild(child); dataMoved = true; } } else { //Append text nodes sectionFirstPart.appendChild(child); dataMoved = true; } } return dataMoved; } private static Node handleParagraph(Node para) { Node paraBefore = para.cloneNode(false); boolean dataMoved = false; int i = 0; NodeList сhilds = para.getChildNodes(); while (сhilds.getLength() > i) { Node child = сhilds.item(i); //NOT TEXT NODES if ((child.getNodeType() == Node.ELEMENT_NODE)) { String childName = child.getNodeName(); //SPB FOUND if (containsSPB(child)){ if (childName.equals(TEXT_SOFT_PAGE_BREAK)){ //removeSPB para.removeChild(child); } else { System.out.println("ERROR: SPB INSIDE Paragraph Element in inner element " + childName); System.exit(1); } break; //Other elements } else if (childName.equals(TEXT_BOOKMARK_START)){ paraBefore.appendChild(child.cloneNode(true)); i++; } else { dataMoved = appendChild(paraBefore, child); } //TEXT NODES } else { dataMoved = appendChild(paraBefore, child); } } addHyphen(para,paraBefore); if (dataMoved) { removeIndent(para); return paraBefore; } return null; } private static boolean appendChild(Node parent, Node child) { boolean dataMoved; parent.appendChild(child); dataMoved = true; return dataMoved; } private static void addHyphen(Node para, Node paraBefore) { //Debug.printNode(paraBefore); //Debug.printNode(para); Character softHyphen = 0x00ad; Character lastChar = getLastChar(paraBefore); Character firstChar = getFirstChar(para); if (lastChar == null || firstChar == null) { return; } Node lastNode = paraBefore; while (lastNode.hasChildNodes()) { lastNode = lastNode.getLastChild(); } String lineEndsWith = lastNode.getTextContent(); if (Character.isLetter(lastChar) && Character.isLetter(firstChar)) { lastNode.setTextContent(lastNode.getTextContent() + "\u2010"); } else if (lastChar.equals(softHyphen)) { lastNode.setTextContent(lineEndsWith.substring(0, lineEndsWith.length()-1) + "\u2010"); } } private static Character getLastChar(Node para) { if (para == null) { return null; } Node lastNode = para; while (lastNode.hasChildNodes()) { lastNode = lastNode.getLastChild(); } String content = lastNode.getTextContent(); if (content != null && !content.isEmpty()) { return content.charAt(content.length()-1); } return null; } private static Character getFirstChar(Node para) { if (para == null) { return null; } Node firstNode = para; while (firstNode.hasChildNodes()) { firstNode = firstNode.getFirstChild(); } String content = firstNode.getTextContent(); if (content != null && !content.isEmpty()) { return content.charAt(0); } return null; } private static void removeIndent(Node paraAfter) { String baseStyleName = Misc.getAttribute(paraAfter, TEXT_STYLE_NAME); String newStyleName = officeReader.cloneParStyle(baseStyleName); Node styleAttr = paraAfter.getAttributes().getNamedItem(TEXT_STYLE_NAME); styleAttr.setTextContent(newStyleName); StyleWithProperties newStyle = officeReader.getParStyle(Misc.getAttribute(paraAfter, TEXT_STYLE_NAME)); newStyle.setParProperty(FO_TEXT_INDENT, "0"); } private static void removeSPB(Node node) { if (node.getNodeType() == Node.ELEMENT_NODE) { if (node.getNodeName().equals(TEXT_SOFT_PAGE_BREAK)) { Node parent = node.getParentNode(); parent.removeChild(node); return; } if (node.hasChildNodes()) { int currentNo = 0; NodeList childNodes = node.getChildNodes(); while (currentNo < childNodes.getLength()) { Node childNode = childNodes.item(currentNo); removeSPB(childNode); currentNo++; } } } } private static boolean containsSPB(Node node) { if (node.getNodeType() == Node.ELEMENT_NODE) { if (node.getNodeName().equals(TEXT_SOFT_PAGE_BREAK)) { return true; } if (node.hasChildNodes()) { int currentNo = 0; NodeList childNodes = node.getChildNodes(); while (currentNo < childNodes.getLength()) { Node childNode = childNodes.item(currentNo); if (containsSPB(childNode)) { return true; } currentNo++; } } } return false; } }