From 96cb7d9a72a19b4500d3eeb12d43b072b24bae28 Mon Sep 17 00:00:00 2001 From: Georgy Litvinov Date: Fri, 24 Jul 2020 16:30:30 +0200 Subject: [PATCH] Refactored page splitter --- .../java/w2phtml/xhtml/ODFPageSplitter.java | 77 ++++++++++--------- .../w2phtml/xhtml/content/TextParser.java | 2 +- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/src/main/java/w2phtml/xhtml/ODFPageSplitter.java b/src/main/java/w2phtml/xhtml/ODFPageSplitter.java index 0d317e4..e7f55aa 100644 --- a/src/main/java/w2phtml/xhtml/ODFPageSplitter.java +++ b/src/main/java/w2phtml/xhtml/ODFPageSplitter.java @@ -16,9 +16,14 @@ public class ODFPageSplitter { static Node truncatedListItemNodeContent = null; static OfficeReader officeReader = null; - public static Node splitText(Node onode,OfficeReader ofr){ - //Find par node with soft page break inside and split it + public static void splitOfficeText(Node onode, OfficeReader ofr) { officeReader = ofr; + splitText(onode); + } + + + public static void splitText(Node onode){ + //Find par node with soft page break inside and split it Document document = onode.getOwnerDocument(); Element softPageBreak = document.createElement(TEXT_SOFT_PAGE_BREAK); NodeList nodes = onode.getChildNodes(); @@ -39,11 +44,11 @@ public class ODFPageSplitter { //If SPB not the first node if (handleParagraph(child)){ - style = ofr.getParStyle(Misc.getAttribute(child, TEXT_STYLE_NAME)); + style = officeReader.getParStyle(Misc.getAttribute(child, TEXT_STYLE_NAME)); } } else if (nodeName.equals(TABLE_TABLE)) { if (handleTableTable(child)){ - style = ofr.getTableStyle(Misc.getAttribute(child, TABLE_STYLE_NAME)); + style = officeReader.getTableStyle(Misc.getAttribute(child, TABLE_STYLE_NAME)); } } else if (nodeName.equals(TEXT_LIST)) { handleList(child); @@ -53,7 +58,7 @@ public class ODFPageSplitter { splitABIndex(child); } else if (nodeName.equals(TEXT_SECTION)) { if (handleSection(child)) { - style = ofr.getSectionStyle(Misc.getAttribute(child, TEXT_SECTION)); + style = officeReader.getSectionStyle(Misc.getAttribute(child, TEXT_SECTION)); } } else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)){ //HACK @@ -89,12 +94,12 @@ public class ODFPageSplitter { i++; } //Debug.printNode(onode); - return onode; } - private static void splitABIndex(Node abIndex) { + private static boolean splitABIndex(Node abIndex) { Node parent = abIndex.getParentNode(); Node abIndexFirstPart = abIndex.cloneNode(false); NodeList childs = abIndex.getChildNodes(); + boolean dataMoved = false; int i = 0; while (childs.getLength() > i) { Node child = childs.item(i); @@ -103,20 +108,24 @@ public class ODFPageSplitter { abIndexFirstPart.appendChild(child.cloneNode(true)); } else if (childName.equals(TEXT_INDEX_BODY)) { - Node indexBodyFirstPart = splitTextIndexBody(child); - abIndexFirstPart.appendChild(indexBodyFirstPart); + if (splitTextIndexBody(child)) { + abIndexFirstPart.appendChild(child.getPreviousSibling()); + dataMoved = true; + } } i++; } - if (abIndexFirstPart != null) { + if (dataMoved) { parent.insertBefore(abIndexFirstPart, abIndex); } + return dataMoved; } - private static void splitTextIllustrationIndex(Node illustrationIndex) { + private static boolean splitTextIllustrationIndex(Node illustrationIndex) { Node parent = illustrationIndex.getParentNode(); Node illustrationIndexFirstPart = illustrationIndex.cloneNode(false); NodeList childs = illustrationIndex.getChildNodes(); + boolean dataMoved = false; int i = 0; while (childs.getLength() > i) { Node child = childs.item(i); @@ -125,19 +134,23 @@ public class ODFPageSplitter { illustrationIndexFirstPart.appendChild(child.cloneNode(true)); } else if (childName.equals(TEXT_INDEX_BODY)) { - Node indexBodyFirstPart = splitTextIndexBody(child); - illustrationIndexFirstPart.appendChild(indexBodyFirstPart); + if(splitTextIndexBody(child)) { + illustrationIndexFirstPart.appendChild(child.getPreviousSibling()); + dataMoved = true; + } } i++; } - if (illustrationIndexFirstPart != null) { + if (dataMoved) { parent.insertBefore(illustrationIndexFirstPart, illustrationIndex); } + return dataMoved; } - private static Node splitTextIndexBody(Node indexBody) { + private static boolean splitTextIndexBody(Node indexBody) { Node indexBodyFirstPart = indexBody.cloneNode(false); NodeList childs = indexBody.getChildNodes(); + boolean dataMoved = false; int i = 0; while (childs.getLength() > i) { Node child = childs.item(i); @@ -147,17 +160,20 @@ public class ODFPageSplitter { String childName = child.getNodeName(); if (childName.equals(TEXT_P)) { if (handleParagraph(child)) { - indexBodyFirstPart.appendChild(child.getPreviousSibling()); + indexBodyFirstPart.appendChild(child.getPreviousSibling()); + dataMoved = true; } - return indexBodyFirstPart; + return dataMoved; } else if (childName.equals(TEXT_SOFT_PAGE_BREAK)) { - indexBody.removeChild(child); - return indexBodyFirstPart; + //indexBody.removeChild(child); + //return dataMoved; + System.out.println("Error. Soft page break inside " + childName); + System.exit(1); } } } - return indexBodyFirstPart; + return dataMoved; } private static boolean handleList(Node list){ Node parent = list.getParentNode(); @@ -633,7 +649,7 @@ public class ODFPageSplitter { if (childName.equals(TEXT_NOTE)){ Element textNote = (Element) child; //System.out.println("handle TextNote in para"); - splitText(textNote.getElementsByTagName(TEXT_NOTE_BODY).item(0), officeReader); + splitText(textNote.getElementsByTagName(TEXT_NOTE_BODY).item(0)); } else if (childName.equals(TEXT_SPAN)){ handleSpan(child); @@ -675,7 +691,7 @@ public class ODFPageSplitter { if (child.getNodeType() == Node.ELEMENT_NODE) { if (containsSPB(child)) { if (childName.equals(TEXT_NOTE)) { - splitText(((Element)child).getElementsByTagName(TEXT_NOTE_BODY).item(0), officeReader); + splitText(((Element)child).getElementsByTagName(TEXT_NOTE_BODY).item(0)); } else if (childName.equals(TEXT_SPAN)){ handleSpan(child); } else { @@ -687,22 +703,7 @@ public class ODFPageSplitter { i++; } } - private static Node handleNote(Node note) { - //Debug.printNode(note); - System.exit(1); - Node parent = note.getParentNode(); - Element NoteElement = (Element) note; - Node noteBody = NoteElement.getElementsByTagName(TEXT_NOTE_BODY).item(0); - NodeList noteBodyNodes = noteBody.getChildNodes(); - int i = 0; - boolean foundSPB = false; - while (noteBodyNodes.getLength() > i) { - Node child = noteBodyNodes.item(i); - - } - - return null; - } + private static boolean appendChild(Node parent, Node child) { boolean dataMoved; parent.appendChild(child); diff --git a/src/main/java/w2phtml/xhtml/content/TextParser.java b/src/main/java/w2phtml/xhtml/content/TextParser.java index 8863bf1..e7e3a01 100644 --- a/src/main/java/w2phtml/xhtml/content/TextParser.java +++ b/src/main/java/w2phtml/xhtml/content/TextParser.java @@ -166,7 +166,7 @@ public class TextParser extends Parser { //Split pages if (pagination) { - onode = (Element) ODFPageSplitter.splitText(onode,ofr); + ODFPageSplitter.splitOfficeText(onode, ofr); //Debug.printNode(onode); } hnode = (Element)traverseBlockText(onode,hnode);