Refactored splitters

This commit is contained in:
Georgy Litvinov 2020-07-24 20:45:18 +02:00
parent 3a453fb046
commit b954415d1a
17 changed files with 1239 additions and 0 deletions

View file

@ -0,0 +1,44 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TEXT_ALPHABETICAL_INDEX_SOURCE;
import static w2phtml.office.XMLString.TEXT_INDEX_BODY;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class ABIndexSplitter extends BasicSplitter implements ISplitter {
public ABIndexSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node abIndex) {
Node parent = abIndex.getParentNode();
Node abIndexFirstPart = abIndex.cloneNode(false);
NodeList childs = abIndex.getChildNodes();
boolean dataMoved = false;
int i = 0;
while (childs.getLength() > i) {
Node child = childs.item(i);
String childName = child.getNodeName();
if (childName.equals(TEXT_ALPHABETICAL_INDEX_SOURCE)) {
abIndexFirstPart.appendChild(child.cloneNode(true));
} else
if (childName.equals(TEXT_INDEX_BODY)) {
if (factory.split(child)) {
abIndexFirstPart.appendChild(child.getPreviousSibling());
dataMoved = true;
}
}
i++;
}
if (dataMoved) {
parent.insertBefore(abIndexFirstPart, abIndex);
}
return dataMoved;
}
}

View file

@ -0,0 +1,128 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.FO_TEXT_INDENT;
import static w2phtml.office.XMLString.TEXT_NOTE_BODY;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import static w2phtml.office.XMLString.TEXT_STYLE_NAME;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
import w2phtml.office.StyleWithProperties;
import w2phtml.util.Misc;
public class BasicSplitter {
protected OfficeReader officeReader;
protected SplitFactory factory;
public BasicSplitter(OfficeReader officeReader, SplitFactory factory) {
this.officeReader = officeReader;
this.factory = factory;
}
protected static boolean appendChild(Node parent, Node child) {
boolean dataMoved;
parent.appendChild(child);
dataMoved = true;
return dataMoved;
}
protected static void addHyphen(Node para, Node paraBefore) {
//Debug.printNode(paraBefore);
//Debug.printNode(para);
Character softHyphen = 0x00ad;
Character lastChar = getLastChar(paraBefore);
Character firstChar = getFirstChar(para);
if (lastChar == null || firstChar == null) {
return;
}
Node lastNode = paraBefore;
while (lastNode.hasChildNodes()) {
lastNode = lastNode.getLastChild();
}
String lineEndsWith = lastNode.getTextContent();
if (Character.isLetter(lastChar) && Character.isLetter(firstChar)) {
lastNode.setTextContent(lastNode.getTextContent() + "\u2010");
} else if (lastChar.equals(softHyphen)) {
lastNode.setTextContent(lineEndsWith.substring(0, lineEndsWith.length()-1) + "\u2010");
}
}
protected static Character getLastChar(Node para) {
if (para == null) { return null; }
Node lastNode = para;
while (lastNode.hasChildNodes()) {
lastNode = lastNode.getLastChild();
}
String content = lastNode.getTextContent();
if (content != null && !content.isEmpty()) {
return content.charAt(content.length()-1);
}
return null;
}
protected static Character getFirstChar(Node para) {
if (para == null) { return null; }
Node firstNode = para;
while (firstNode.hasChildNodes()) {
firstNode = firstNode.getFirstChild();
}
String content = firstNode.getTextContent();
if (content != null && !content.isEmpty()) {
return content.charAt(0);
}
return null;
}
protected void removeIndent(Node paraAfter) {
String baseStyleName = Misc.getAttribute(paraAfter, TEXT_STYLE_NAME);
String newStyleName = officeReader.cloneParStyle(baseStyleName);
Node styleAttr = paraAfter.getAttributes().getNamedItem(TEXT_STYLE_NAME);
styleAttr.setTextContent(newStyleName);
StyleWithProperties newStyle = officeReader.getParStyle(Misc.getAttribute(paraAfter, TEXT_STYLE_NAME));
newStyle.setParProperty(FO_TEXT_INDENT, "0");
}
protected static boolean containsSPB(Node node) {
Node parent = node.getParentNode();
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equals(TEXT_SOFT_PAGE_BREAK) && (parent.getNodeName() != TEXT_NOTE_BODY)) {
return true;
}
if (node.hasChildNodes()) {
int currentNo = 0;
NodeList childNodes = node.getChildNodes();
while (currentNo < childNodes.getLength()) {
Node childNode = childNodes.item(currentNo);
if (containsSPB(childNode)) {
return true;
}
currentNo++;
}
}
}
return false;
}
protected static void removeSPB(Node node) {
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equals(TEXT_SOFT_PAGE_BREAK)) {
Node parent = node.getParentNode();
parent.removeChild(node);
return;
}
if (node.hasChildNodes()) {
int currentNo = 0;
NodeList childNodes = node.getChildNodes();
while (currentNo < childNodes.getLength()) {
Node childNode = childNodes.item(currentNo);
removeSPB(childNode);
currentNo++;
}
}
}
}
}

View file

@ -0,0 +1,9 @@
package w2phtml.pageSplitters;
import org.w3c.dom.Node;
public interface ISplitter {
public boolean Split(Node node);
}

View file

@ -0,0 +1,44 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TEXT_ILLUSTRATION_INDEX_SOURCE;
import static w2phtml.office.XMLString.TEXT_INDEX_BODY;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class IllustrationIndexSplitter extends BasicSplitter implements ISplitter{
public IllustrationIndexSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node illustrationIndex) {
Node parent = illustrationIndex.getParentNode();
Node illustrationIndexFirstPart = illustrationIndex.cloneNode(false);
NodeList childs = illustrationIndex.getChildNodes();
boolean dataMoved = false;
int i = 0;
while (childs.getLength() > i) {
Node child = childs.item(i);
String childName = child.getNodeName();
if (childName.equals(TEXT_ILLUSTRATION_INDEX_SOURCE)) {
illustrationIndexFirstPart.appendChild(child.cloneNode(true));
} else
if (childName.equals(TEXT_INDEX_BODY)) {
if(factory.split(child)) {
illustrationIndexFirstPart.appendChild(child.getPreviousSibling());
dataMoved = true;
}
}
i++;
}
if (dataMoved) {
parent.insertBefore(illustrationIndexFirstPart, illustrationIndex);
}
return dataMoved;
}
}

View file

@ -0,0 +1,47 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TEXT_P;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class IndexBodySplitter extends BasicSplitter implements ISplitter {
public IndexBodySplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node indexBody) {
Node indexBodyFirstPart = indexBody.cloneNode(false);
NodeList childs = indexBody.getChildNodes();
boolean dataMoved = false;
int i = 0;
while (childs.getLength() > i) {
Node child = childs.item(i);
if (!containsSPB(child)) {
indexBodyFirstPart.appendChild(child);
} else {
String childName = child.getNodeName();
if (childName.equals(TEXT_P)) {
if (factory.split(child)) {
indexBodyFirstPart.appendChild(child.getPreviousSibling());
dataMoved = true;
}
return dataMoved;
} else
if (childName.equals(TEXT_SOFT_PAGE_BREAK)) {
//indexBody.removeChild(child);
//return dataMoved;
System.out.println("Error. Soft page break inside " + childName);
System.exit(1);
}
}
}
return dataMoved;
}
}

View file

@ -0,0 +1,69 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TEXT_H;
import static w2phtml.office.XMLString.TEXT_LIST;
import static w2phtml.office.XMLString.TEXT_P;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class ListItemSplitter extends BasicSplitter implements ISplitter {
public ListItemSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node listItem) {
Node listItemFirstPart = listItem.cloneNode(false);
Node parent = listItem.getParentNode();
int i = 0;
boolean dataMoved = false;
NodeList listItemNodes = listItem.getChildNodes();
while(listItemNodes.getLength() > i){
Node listItemChild = listItemNodes.item(i);
if(listItemChild.getNodeType() == Node.ELEMENT_NODE){
//Node name
String nodeName = listItemChild.getNodeName();
if (containsSPB(listItemChild)){
//Break if SPB
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)) {
//Remove SPB.Return result
listItem.removeChild(listItemChild);
} else if (nodeName.equals(TEXT_LIST)) {
if (factory.split(listItemChild)){
dataMoved=true;
}
} else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) {
if (factory.split(listItemChild)){
listItemFirstPart.appendChild(listItemChild.getPreviousSibling());
dataMoved=true;
}
} else {
System.out.println("Error. SPB in List item child node " + nodeName);
System.exit(1);
}
break;
//Move to first part
} else {
listItemFirstPart.appendChild(listItemChild);
dataMoved = true;
}
} else {
listItemFirstPart.appendChild(listItemChild);
dataMoved = true;
}
//check internal nodes
}
if(dataMoved) {
parent.insertBefore(listItemFirstPart, listItem);
}
return dataMoved;
}
}

View file

@ -0,0 +1,93 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TEXT_CONTINUE_NUMBERING;
import static w2phtml.office.XMLString.TEXT_LIST_HEADER;
import static w2phtml.office.XMLString.TEXT_LIST_ITEM;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class ListSplitter extends BasicSplitter implements ISplitter {
public ListSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node list) {
Node parent = list.getParentNode();
Node listFirstPart = list.cloneNode(false);
NodeList listNodes = list.getChildNodes();
Document document = list.getOwnerDocument();
Element softPageBreak = document.createElement(TEXT_SOFT_PAGE_BREAK);
Node hangingItem = null;
int i = 0;
boolean dataMoved = false;
while (listNodes.getLength() > i) {
Node child = listNodes.item(i);
if(child.getNodeType() == Node.ELEMENT_NODE){
String nodeName = child.getNodeName();
if (nodeName.equals(TEXT_LIST_HEADER)) {
if(containsSPB(child)){
//Remove inner SPB
removeSPB(child);
//HACK :(
break;
}
listFirstPart.appendChild(child.cloneNode(true));
//Get next element
i++;
} else if (nodeName.equals(TEXT_LIST_ITEM)) {
if (containsSPB(child)){
//remove SPB, move previous nodes to firstPart.
if (factory.split(child)){
dataMoved = true;
//Add first part of list item to previous list item
listFirstPart.appendChild(child.getPreviousSibling());
if (child.hasChildNodes()){
hangingItem = child;
} else {
list.removeChild(child);
}
}
//Add text:continue-numbering="true"
if (dataMoved){
((Element) list).setAttribute(TEXT_CONTINUE_NUMBERING, "true");
}
break;
} else {
// Not with SPB yet, move node, set dataMoved=true
listFirstPart.appendChild(child);
dataMoved = true;
}
} else {
System.out.println("Error. SPB in List child node " + nodeName);
System.exit(1);
}
}
}
if (dataMoved) {
parent.insertBefore(listFirstPart, list);
parent.insertBefore(softPageBreak, list);
if (hangingItem != null) {
i = 0;
NodeList freeNodes = hangingItem.getChildNodes();
while (freeNodes.getLength() > 0 ) {
Node freeNode = freeNodes.item(i);
parent.insertBefore(freeNode, list);
}
list.removeChild(hangingItem);
}
}
return dataMoved;
}
}

View file

@ -0,0 +1,80 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TEXT_BOOKMARK_START;
import static w2phtml.office.XMLString.TEXT_NOTE;
import static w2phtml.office.XMLString.TEXT_NOTE_BODY;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import static w2phtml.office.XMLString.TEXT_SPAN;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class PageSplitter extends BasicSplitter implements ISplitter {
public PageSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
// TODO Auto-generated constructor stub
}
@Override
public boolean Split(Node para) {
Node parent = para.getParentNode();
//System.out.println("handlePara");
Node paraFirstPart = para.cloneNode(false);
boolean dataMoved = false;
int i = 0;
NodeList childs = para.getChildNodes();
while (childs.getLength() > i) {
//System.out.println("In WHILE HANDLE PARA");
Node child = childs.item(i);
//NOT TEXT NODES
if ((child.getNodeType() == Node.ELEMENT_NODE)) {
String childName = child.getNodeName();
//SPB FOUND
if (containsSPB(child)){
//System.out.println(childName);
if (childName.equals(TEXT_SOFT_PAGE_BREAK)){
//removeSPB
//System.out.println("REMOVE SPB");
para.removeChild(child);
} else
if (childName.equals(TEXT_NOTE)){
Element textNote = (Element) child;
//System.out.println("handle TextNote in para");
factory.split(textNote.getElementsByTagName(TEXT_NOTE_BODY).item(0));
} else
if (childName.equals(TEXT_SPAN)){
factory.split(child);
} else {
//Debug.printNode(child);
System.out.println("ERROR: SPB INSIDE Paragraph Element in inner element " + childName);
System.exit(1);
}
break;
//Other elements
} else if (childName.equals(TEXT_BOOKMARK_START)){
paraFirstPart.appendChild(child.cloneNode(true));
i++;
} else {
dataMoved = appendChild(paraFirstPart, child);
}
//TEXT NODES
} else {
dataMoved = appendChild(paraFirstPart, child);
}
}
addHyphen(para,paraFirstPart);
if (dataMoved) {
removeIndent(para);
parent.insertBefore(paraFirstPart, para);
}
return dataMoved;
}
}

View file

@ -0,0 +1,102 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TABLE_TABLE;
import static w2phtml.office.XMLString.TEXT_H;
import static w2phtml.office.XMLString.TEXT_LIST;
import static w2phtml.office.XMLString.TEXT_P;
import static w2phtml.office.XMLString.TEXT_SECTION;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import static w2phtml.office.XMLString.TEXT_TABLE_OF_CONTENT;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class SectionSplitter extends BasicSplitter implements ISplitter {
public SectionSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
// TODO Auto-generated constructor stub
}
@Override
public boolean Split(Node section) {
Node parent = section.getParentNode();
setLastAttribute(section);
Node sectionFirstPart = section.cloneNode(false);
removeLastAttribute(sectionFirstPart);
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList childs = section.getChildNodes();
while (childs.getLength() > i) {
Node child = childs.item(0);
if ((child.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = child.getNodeName();
if (containsSPB(child)){
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
section.removeChild(child);
} else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) {
if (factory.split(child)){
sectionFirstPart.appendChild(child.getPreviousSibling());
dataMoved=true;
}
} else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)) {
//HACK
removeSPB(section);
i++;
continue;
} else if (nodeName.equals(TABLE_TABLE)) {
if (factory.split(child)){
sectionFirstPart.appendChild(child.getPreviousSibling());
dataMoved=true;
}
} else if (nodeName.equals(TEXT_SECTION)) {
if (factory.split(child)){
sectionFirstPart.appendChild(child.getPreviousSibling());
dataMoved=true;
}
} else if (nodeName.equals(TEXT_LIST)) {
if (factory.split(child)){
dataMoved=true;
}
} else {
System.out.println("Error. SPB in Section child node " + nodeName);
System.exit(1);
}
//split node with spb and exit
break;
} else {
sectionFirstPart.appendChild(child);
dataMoved = true;
}
} else {
//Append text nodes
sectionFirstPart.appendChild(child);
dataMoved = true;
}
}
if (dataMoved) {
parent.insertBefore(sectionFirstPart, section);
}
return dataMoved;
}
private static void removeLastAttribute(Node section) {
Element sectionEl = (Element) section;
sectionEl.setAttribute("last", "false");
}
private static void setLastAttribute(Node section) {
Element sectionEl = (Element) section;
sectionEl.setAttribute("last", "true");
}
}

View file

@ -0,0 +1,43 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TEXT_NOTE;
import static w2phtml.office.XMLString.TEXT_NOTE_BODY;
import static w2phtml.office.XMLString.TEXT_SPAN;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class SpanSplitter extends BasicSplitter implements ISplitter{
public SpanSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node span) {
NodeList childs = span.getChildNodes();
int i = 0;
while (childs.getLength() > i) {
Node child = childs.item(i);
String childName = child.getNodeName();
if (child.getNodeType() == Node.ELEMENT_NODE) {
if (containsSPB(child)) {
if (childName.equals(TEXT_NOTE)) {
factory.split(((Element)child).getElementsByTagName(TEXT_NOTE_BODY).item(0));
} else if (childName.equals(TEXT_SPAN)){
factory.split(child);
} else {
System.out.println("Error. SPB in unknown element " + childName);
System.exit(1);
}
}
}
i++;
}
return false;
}
}

View file

@ -0,0 +1,72 @@
package w2phtml.pageSplitters;
import org.w3c.dom.Node;
import static w2phtml.office.XMLString.*;
import pro.litvinovg.xml.Debug;
import w2phtml.office.OfficeReader;
public class SplitFactory {
private OfficeReader officeReader;
public SplitFactory(OfficeReader ofr){
this.officeReader = ofr;
}
public boolean split(Node node) {
if(node == null) {
System.out.println("Error. Node is null.");
Debug.printStackTrace();
System.exit(1);
}
String nodeName = node.getNodeName();
System.out.println(nodeName);
ISplitter splitter = null;
if (nodeName.contentEquals(TEXT_SPAN)) {
splitter = new SpanSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_P) || nodeName.contentEquals(TEXT_H)) {
splitter = new PageSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_NOTE) || nodeName.contentEquals(OFFICE_TEXT)) {
splitter = new TextSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_ILLUSTRATION_INDEX)) {
splitter = new IllustrationIndexSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_INDEX_BODY)) {
splitter = new IndexBodySplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_ALPHABETICAL_INDEX)) {
splitter = new ABIndexSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TABLE_TABLE)) {
splitter = new TableSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TABLE_TABLE_ROW)) {
splitter = new TableRowSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TABLE_TABLE_ROWS)) {
splitter = new TableRowsSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_LIST)) {
splitter = new ListSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_SECTION)) {
splitter = new SectionSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TEXT_LIST_ITEM)) {
splitter = new ListItemSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TABLE_TABLE_ROW_GROUP)) {
splitter = new TableRowGroupSplitter(officeReader, this);
} else
if (nodeName.contentEquals(TABLE_TABLE_CELL) || nodeName.contentEquals(TABLE_COVERED_TABLE_CELL)) {
splitter = new TableCellSplitter(officeReader, this);
}
if (splitter == null) {
System.out.println("Error. Splitter for element " + nodeName + " is not implemented, yet. Exit.");
System.exit(1);
}
return splitter.Split(node);
}
}

View file

@ -0,0 +1,76 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TABLE_TABLE;
import static w2phtml.office.XMLString.TEXT_H;
import static w2phtml.office.XMLString.TEXT_LIST;
import static w2phtml.office.XMLString.TEXT_P;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class TableCellSplitter extends BasicSplitter implements ISplitter {
public TableCellSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
// TODO Auto-generated constructor stub
}
@Override
public boolean Split(Node cellNode) {
Node cellFirstPart = cellNode.cloneNode(false);
Node parent = cellNode.getParentNode();
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList cellChildNodes = cellNode.getChildNodes();
while (cellChildNodes.getLength() > i) {
Node cellChildNode = cellChildNodes.item(0);
if ((cellChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = cellChildNode.getNodeName();
if (containsSPB(cellChildNode)){
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
cellNode.removeChild(cellChildNode);
} else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) {
if (factory.split(cellChildNode)){
cellFirstPart.appendChild(cellChildNode.getPreviousSibling());
dataMoved=true;
}
} else if (nodeName.equals(TEXT_LIST)) {
if (factory.split(cellChildNode)){
cellFirstPart.appendChild(cellChildNode.getPreviousSibling());
dataMoved=true;
}
} else if (nodeName.equals(TABLE_TABLE)) {
if (factory.split(cellChildNode)){
cellFirstPart.appendChild(cellChildNode.getPreviousSibling());
dataMoved=true;
}
} else {
System.out.println("Error. SPB in Cell child node " + nodeName);
System.exit(1);
}
break;
} else {
cellFirstPart.appendChild(cellChildNode);
dataMoved = true;
}
} else {
//Append text nodes
cellFirstPart.appendChild(cellChildNode);
dataMoved = true;
}
}
if (dataMoved) {
parent.insertBefore(cellFirstPart, cellNode);
}
return dataMoved;
}
}

View file

@ -0,0 +1,86 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TABLE_TABLE_HEADER_ROWS;
import static w2phtml.office.XMLString.TABLE_TABLE_ROW;
import static w2phtml.office.XMLString.TABLE_TABLE_ROWS;
import static w2phtml.office.XMLString.TABLE_TABLE_ROW_GROUP;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class TableRowGroupSplitter extends BasicSplitter implements ISplitter {
public TableRowGroupSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
// TODO Auto-generated constructor stub
}
@Override
public boolean Split(Node tableRowGroup) {
boolean dataMoved = false;
Node parent = tableRowGroup.getParentNode();
Node tableRowGroupFistPart = tableRowGroup.cloneNode(false);
// Node counter
int i = 0;
NodeList tableRowGroupChildNodes = tableRowGroup.getChildNodes();
while (tableRowGroupChildNodes.getLength() > i) {
Node tableRowGroupChildNode = tableRowGroupChildNodes.item(0);
if ((tableRowGroupChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowGroupChildNode.getNodeName();
if (containsSPB(tableRowGroupChildNode)){
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
tableRowGroup.removeChild(tableRowGroupChildNode);
} else if (nodeName.equals(TABLE_TABLE_HEADER_ROWS)){
//Nothing IF table-header-rows found - it is description node,
//Not needed to set dataMoved = true, not needed to append First part
} else if (nodeName.equals(TABLE_TABLE_ROW)){
if (factory.split(tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildNode.getPreviousSibling());
}
} else if (nodeName.equals(TABLE_TABLE_ROW_GROUP)){
if (factory.split(tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildNode.getPreviousSibling());
}
} else if (nodeName.equals(TABLE_TABLE_ROWS)){
if (factory.split(tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildNode.getPreviousSibling());
}
}
break;
} else {
if (nodeName.equals(TABLE_TABLE_HEADER_ROWS)){
tableRowGroupFistPart.appendChild(tableRowGroupChildNode.cloneNode(true));
//increment counter
i++;
} else {
tableRowGroupFistPart.appendChild(tableRowGroupChildNode);
dataMoved = true;
}
}
} else {
//Append text nodes
tableRowGroupFistPart.appendChild(tableRowGroupChildNode);
dataMoved = true;
}
}
if (dataMoved) {
parent.insertBefore(tableRowGroupFistPart, tableRowGroup);
}
return dataMoved;
}
}

View file

@ -0,0 +1,77 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TABLE_COVERED_TABLE_CELL;
import static w2phtml.office.XMLString.TABLE_TABLE_CELL;
import static w2phtml.office.XMLString.TEXT_P;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class TableRowSplitter extends BasicSplitter implements ISplitter {
public TableRowSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node tableRow) {
Node tableRowFirstPart = tableRow.cloneNode(false);
Node parent = tableRow.getParentNode();
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowChildNodes = tableRow.getChildNodes();
while (tableRowChildNodes.getLength() > i) {
Node tableRowChildNode = tableRowChildNodes.item(i);
if ((tableRowChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowChildNode.getNodeName();
if (containsSPB(tableRowChildNode)){
if (nodeName.equals(TABLE_TABLE_CELL)){
if (factory.split(tableRowChildNode)){
dataMoved = true;
tableRowFirstPart.appendChild(tableRowChildNode.getPreviousSibling());
}
} else if (nodeName.equals(TABLE_COVERED_TABLE_CELL)){
//Implement handleCoveredCell in future
if (factory.split(tableRowChildNode)){
dataMoved = true;
tableRowFirstPart.appendChild(tableRowChildNode.getPreviousSibling());
}
}
} else {
//System.out.println("HERE " + nodeName);
//Move node without SPB above
tableRowFirstPart.appendChild(tableRowChildNode.cloneNode(true));
Node emptyCell = tableRowChildNode.cloneNode(false);
Document document = tableRow.getOwnerDocument();
Element textP = document.createElement(TEXT_P);
emptyCell.appendChild(textP);
tableRow.insertBefore(emptyCell, tableRowChildNode);
tableRow.removeChild(tableRowChildNode);
dataMoved = true;
}
i++;
} else {
System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-row");
//Append text nodes
//tableRowsFistPart.appendChild(tableRowsChildNode);
//dataMoved = true;
}
}
if (dataMoved) {
parent.insertBefore(tableRowFirstPart, tableRow);
}
return dataMoved;
}
}

View file

@ -0,0 +1,61 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TABLE_TABLE_ROW;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class TableRowsSplitter extends BasicSplitter implements ISplitter {
public TableRowsSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node tableRows) {
Node tableRowsFirstPart = tableRows.cloneNode(false);
Node parent = tableRows.getParentNode();
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowsChildNodes = tableRows.getChildNodes();
while (tableRowsChildNodes.getLength() > i) {
Node tableRowsChildNode = tableRowsChildNodes.item(0);
if ((tableRowsChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowsChildNode.getNodeName();
if (containsSPB(tableRowsChildNode)){
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
tableRows.removeChild(tableRowsChildNode);
} else if (nodeName.equals(TABLE_TABLE_ROW)){
if (factory.split(tableRowsChildNode)){
dataMoved = true;
tableRowsFirstPart.appendChild(tableRowsChildNode.getPreviousSibling());
}
}
break;
} else {
tableRowsFirstPart.appendChild(tableRowsChildNode);
dataMoved = true;
}
} else {
System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-rows");
//Append text nodes
//tableRowsFistPart.appendChild(tableRowsChildNode);
//dataMoved = true;
}
}
if (dataMoved) {
parent.insertBefore(tableRowsFirstPart, tableRows);
}
return dataMoved;
}
}

View file

@ -0,0 +1,107 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.TABLE_TABLE_COLUMN;
import static w2phtml.office.XMLString.TABLE_TABLE_COLUMN_GROUP;
import static w2phtml.office.XMLString.TABLE_TABLE_HEADER_COLUMNS;
import static w2phtml.office.XMLString.TABLE_TABLE_HEADER_ROWS;
import static w2phtml.office.XMLString.TABLE_TABLE_ROW;
import static w2phtml.office.XMLString.TABLE_TABLE_ROWS;
import static w2phtml.office.XMLString.TABLE_TABLE_ROW_GROUP;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import w2phtml.office.OfficeReader;
public class TableSplitter extends BasicSplitter implements ISplitter {
public TableSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node table) {
/*
* // TODO: 0.Test if soft-page-break not at start of table // - in that
* case just remove it and insert before table // 1.Create new table //
* 2.Copy to it table:table-column's and // table:table-header-rows //
* 3.Move nodes before soft-page-break to new table //4. IF in one
* table:row exist more one Algorithm IF SPB at start - just move it
* higher IF SPB between rows - just copy table move row and put SPB
* between tables IF SPB inside row, inside cell - copy table, copy
* empty row, copy each empty cell and in each cell move every node up
* to the first SPB
*
*
*/
Node parent = table.getParentNode();
Node tableFirstPart = table.cloneNode(false);
NodeList tableChildNodes = table.getChildNodes();
// Node counter
int i = 0;
boolean dataMoved = false;
// Loop through the TABLE:TABLE child nodes
while (tableChildNodes.getLength() > i) {
Node tableChildNode = tableChildNodes.item(i);
if (tableChildNode.getNodeType() == Node.ELEMENT_NODE) {
//Node name
String tableChildNodeName = tableChildNode.getNodeName();
//System.out.println("Table child node " + tableChildNodeName);
if (containsSPB(tableChildNode)){
if (tableChildNodeName.equals(TEXT_SOFT_PAGE_BREAK)) {
// remove inner soft page break node
table.removeChild(tableChildNode);
} else if (tableChildNodeName.equals(TABLE_TABLE_ROW_GROUP)) {
if (factory.split(tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildNode.getPreviousSibling());
}
} else if ( tableChildNodeName.equals(TABLE_TABLE_ROWS)) {
if (factory.split(tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildNode.getPreviousSibling());
}
} else if ( tableChildNodeName.equals(TABLE_TABLE_ROW)) {
if (factory.split(tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildNode.getPreviousSibling());
}
} else if (tableChildNodeName.equals(TABLE_TABLE_COLUMN)
|| tableChildNodeName.equals(TABLE_TABLE_COLUMN_GROUP)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_ROWS)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_COLUMNS)) {
//Remove Soft Page Break
removeSPB(tableChildNode);
}
break;
} else {
//Before SPB
//Description nodes
if (tableChildNodeName.equals(TABLE_TABLE_COLUMN)
|| tableChildNodeName.equals(TABLE_TABLE_COLUMN_GROUP)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_ROWS)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_COLUMNS)) {
//Append to clone table
tableFirstPart.appendChild(tableChildNode.cloneNode(true));
//increment counter
i++;
} else {
//Append to clone table
tableFirstPart.appendChild(tableChildNode);
dataMoved = true;
}
}
}
}
if (dataMoved) {
parent.insertBefore(tableFirstPart, table);
}
return dataMoved;
}
}

View file

@ -0,0 +1,101 @@
package w2phtml.pageSplitters;
import static w2phtml.office.XMLString.FO_BREAK_BEFORE;
import static w2phtml.office.XMLString.TABLE_STYLE_NAME;
import static w2phtml.office.XMLString.TABLE_TABLE;
import static w2phtml.office.XMLString.TEXT_ALPHABETICAL_INDEX;
import static w2phtml.office.XMLString.TEXT_H;
import static w2phtml.office.XMLString.TEXT_ILLUSTRATION_INDEX;
import static w2phtml.office.XMLString.TEXT_LIST;
import static w2phtml.office.XMLString.TEXT_P;
import static w2phtml.office.XMLString.TEXT_SECTION;
import static w2phtml.office.XMLString.TEXT_SOFT_PAGE_BREAK;
import static w2phtml.office.XMLString.TEXT_STYLE_NAME;
import static w2phtml.office.XMLString.TEXT_TABLE_OF_CONTENT;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import pro.litvinovg.xml.Debug;
import w2phtml.office.OfficeReader;
import w2phtml.office.StyleWithProperties;
import w2phtml.util.Misc;
public class TextSplitter extends BasicSplitter implements ISplitter {
public TextSplitter(OfficeReader officeReader, SplitFactory factory) {
super(officeReader, factory);
}
@Override
public boolean Split(Node onode) {
//Find par node with soft page break inside and split it
Document document = onode.getOwnerDocument();
Element softPageBreak = document.createElement(TEXT_SOFT_PAGE_BREAK);
NodeList nodes = onode.getChildNodes();
int i = 0;
//Loop through the content nodes and split nodes with soft page break
while (i < nodes.getLength()){
Node child = nodes.item(i);
//Necessary check if node is an Element
if (containsSPB(child)){
String nodeName = child.getNodeName();
//Create Duplicate Node!
Element childFirstPart = (Element) child.cloneNode(false);
StyleWithProperties style = null;
if ((nodeName.equals(TEXT_P) || nodeName.equals(TEXT_H))) {
//If SPB not the first node
if (factory.split(child)){
style = officeReader.getParStyle(Misc.getAttribute(child, TEXT_STYLE_NAME));
}
} else if (nodeName.equals(TABLE_TABLE)) {
if (factory.split(child)){
style = officeReader.getTableStyle(Misc.getAttribute(child, TABLE_STYLE_NAME));
}
} else if (nodeName.equals(TEXT_LIST)) {
factory.split(child);
} else if (nodeName.equals(TEXT_ILLUSTRATION_INDEX)) {
factory.split(child);
} else if (nodeName.equals(TEXT_ALPHABETICAL_INDEX)) {
factory.split(child);
} else if (nodeName.equals(TEXT_SECTION)) {
if (factory.split(child)) {
style = officeReader.getSectionStyle(Misc.getAttribute(child, TEXT_SECTION));
}
} else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)){
//HACK
containsSPB(childFirstPart);
i++;
continue;
} else if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
//HACK
i++;
continue;
} else {
Debug.printNode(child);
System.exit(1);
}
if (style == null || !"page".equals(style.getProperty(FO_BREAK_BEFORE))){
if (!nodeName.equals(TEXT_LIST)) {
onode.insertBefore(softPageBreak.cloneNode(false), child);
}
}
if (!child.hasChildNodes()){
onode.removeChild(child);
}
continue;
}
i++;
}
//Debug.printNode(onode);
return false;
}
}