w2phtml/source/java/writer2latex/xhtml/PageSplitter.java

632 lines
22 KiB
Java

package writer2latex.xhtml;
import javax.print.Doc;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import writer2latex.office.OfficeReader;
import writer2latex.office.StyleWithProperties;
import writer2latex.office.XMLString;
import writer2latex.util.Misc;
public class PageSplitter {
static Node truncatedListItemNodeContent = null;
protected static Node splitSoftPageBreak(Node onode,OfficeReader ofr){
//Find par node with soft page break inside and split it
Document document = onode.getOwnerDocument();
Element softPageBreak = document.createElement(XMLString.TEXT_SOFT_PAGE_BREAK);
NodeList nodes = onode.getChildNodes();
int i = 0;
//Loop through the content nodes and split paragraph nodes with soft page break
while (i < nodes.getLength()){
Node child = nodes.item(i);
//Necessary check if node is an Element
if ((child.getNodeType() == Node.ELEMENT_NODE) && containsSPB(child)){
String nodeName = child.getNodeName();
//DEBUG
//System.out.println("----------CURRENT NODE IS-------" + nodeName);
//Create Duplicate Node!
Element childFirstPart = (Element) child.cloneNode(false);
StyleWithProperties style = null;
if ((nodeName.equals(XMLString.TEXT_P) || nodeName.equals(XMLString.TEXT_H))) {
//If SPB not the first node
if (handleParagraph(childFirstPart, child)){
onode.insertBefore(childFirstPart, child);
style = ofr.getTableStyle(Misc.getAttribute(child, XMLString.TEXT_STYLE_NAME));
}
} else if (nodeName.equals(XMLString.TABLE_TABLE)) {
if (handleTableTable(childFirstPart, child)){
onode.insertBefore(childFirstPart, child);
style = ofr.getTableStyle(Misc.getAttribute(child, XMLString.TABLE_STYLE_NAME));
}
} else if (nodeName.equals(XMLString.TEXT_LIST)) {
if (handleList(childFirstPart, child)){
onode.insertBefore(childFirstPart, child);
style = ofr.getTableStyle(Misc.getAttribute(child, XMLString.TEXT_LIST_STYLE));
}
} else if (nodeName.equals(XMLString.TEXT_SECTION)) {
if (handleSection(childFirstPart, child)){
onode.insertBefore(childFirstPart, child);
style = ofr.getTableStyle(Misc.getAttribute(child, XMLString.TEXT_SECTION));
}
} else if (nodeName.equals(XMLString.TEXT_TABLE_OF_CONTENT)){
//HACK
containsSPB(childFirstPart);
i++;
continue;
} else if (nodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)){
//HACK
i++;
continue;
}
//TODO: IF fo:break before in original table - don't create SPB
if (style == null || !"page".equals(style.getProperty(XMLString.FO_BREAK_BEFORE))){
onode.insertBefore(softPageBreak.cloneNode(false), child);
}
//HACK!
if (truncatedListItemNodeContent != null){
NodeList itemNodeList= truncatedListItemNodeContent.getChildNodes();
while (itemNodeList.getLength() > 0){
onode.insertBefore(itemNodeList.item(0), child);
}
truncatedListItemNodeContent.getParentNode().removeChild(truncatedListItemNodeContent);
truncatedListItemNodeContent = null;
}
if (!child.hasChildNodes()){
onode.removeChild(child);
}
continue;
}
i++;
}
return onode;
}
private static boolean handleList(Node listFirstPart, Node list){
NodeList listNodes = list.getChildNodes();
int i = 0;
boolean dataMoved = false;
while (listNodes.getLength() > i) {
Node listChild = listNodes.item(i);
if(listChild.getNodeType() == Node.ELEMENT_NODE){
String nodeName = listChild.getNodeName();
if (nodeName.equals(XMLString.TEXT_LIST_HEADER)) {
if(containsSPB(listChild)){
//Remove inner SPB
removeSPB(listChild);
//HACK :(
break;
}
listFirstPart.appendChild(listChild.cloneNode(true));
//Get next element
i++;
} else if (nodeName.equals(XMLString.TEXT_LIST_ITEM)) {
if (containsSPB(listChild)){
Node listItemFirstPart = listChild.cloneNode(false);
//remove SPB, move previous nodes to firstPart.
if (handleListItem(listItemFirstPart,listChild)){
dataMoved = true;
//Add first part of list item to previous list item
listFirstPart.appendChild(listItemFirstPart);
//Get list parent node and move cutted node
//After First Part and SPB but before this list;
//TODO!!!!!!!!!!
truncatedListItemNodeContent = listChild;
listFirstPart.getParentNode();
//If List item is empty - remove it
if (!listChild.hasChildNodes()){
list.removeChild(listChild);
}
}
//Add text:continue-numbering="true"
if (dataMoved){
((Element) list).setAttribute(XMLString.TEXT_CONTINUE_NUMBERING, "true");
}
break;
} else {
// Not with SPB yet, move node, set dataMoved=true
listFirstPart.appendChild(listChild);
dataMoved = true;
}
}
}
}
return dataMoved;
}
//If SPB before first item - return false, remove SPB
//Otherwise add childNodes before SPB to firstPart, return true
private static boolean handleListItem(Node listItemFirstPart, Node listItem){
int i = 0;
boolean dataMoved = false;
NodeList listItemNodes = listItem.getChildNodes();
while(listItemNodes.getLength() > i){
Node listItemChild = listItemNodes.item(i);
if(listItemChild.getNodeType() == Node.ELEMENT_NODE){
//Node name
String nodeName = listItemChild.getNodeName();
if (containsSPB(listItemChild)){
Node listItemChildFirstPart = listItemChild.cloneNode(false);
//Break if SPB
if (nodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)) {
//Remove SPB.Return result
listItem.removeChild(listItemChild);
} else if (nodeName.equals(XMLString.TEXT_LIST)) {
if (handleList(listItemChildFirstPart, listItemChild)){
listItemFirstPart.appendChild(listItemChildFirstPart);
dataMoved=true;
}
} else if (nodeName.equals(XMLString.TEXT_H) || nodeName.equals(XMLString.TEXT_P)) {
if (handleParagraph(listItemChildFirstPart, listItemChild)){
listItemFirstPart.appendChild(listItemChildFirstPart);
dataMoved=true;
}
}
break;
//Move to first part
} else {
listItemFirstPart.appendChild(listItemChild);
dataMoved = true;
}
} else {
listItemFirstPart.appendChild(listItemChild);
dataMoved = true;
}
//check internal nodes
}
return dataMoved;
}
//Needs finish
private static boolean handleTableTable(Node tableFirstPart, Node table) {
/*
* // TODO: 0.Test if soft-page-break not at start of table // - in that
* case just remove it and insert before table // 1.Create new table //
* 2.Copy to it table:table-column's and // table:table-header-rows //
* 3.Move nodes before soft-page-break to new table //4. IF in one
* table:row exist more one Algorithm IF SPB at start - just move it
* higher IF SPB between rows - just copy table move row and put SPB
* between tables IF SPB inside row, inside cell - copy table, copy
* empty row, copy each empty cell and in each cell move every node up
* to the first SPB
*
*
*/
NodeList tableChildNodes = table.getChildNodes();
// Node counter
int i = 0;
boolean dataMoved = false;
// Loop through the TABLE:TABLE child nodes
while (tableChildNodes.getLength() > i) {
Node tableChildNode = tableChildNodes.item(i);
if (tableChildNode.getNodeType() == Node.ELEMENT_NODE) {
//Node name
String tableChildNodeName = tableChildNode.getNodeName();
//System.out.println("Table child node " + tableChildNodeName);
if (containsSPB(tableChildNode)){
Node tableChildFirstPart = tableChildNode.cloneNode(false);
if (tableChildNodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)) {
// remove inner soft page break node
table.removeChild(tableChildNode);
} else if (tableChildNodeName.equals(XMLString.TABLE_TABLE_ROW_GROUP)) {
if (handleTableRowGroup(tableChildFirstPart, tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildFirstPart);
}
} else if ( tableChildNodeName.equals(XMLString.TABLE_TABLE_ROWS)) {
if (handleTableRows(tableChildFirstPart, tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildFirstPart);
}
} else if ( tableChildNodeName.equals(XMLString.TABLE_TABLE_ROW)) {
if (handleTableRow(tableChildFirstPart, tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildFirstPart);
}
} else if (tableChildNodeName.equals(XMLString.TABLE_TABLE_COLUMN)
|| tableChildNodeName.equals(XMLString.TABLE_TABLE_COLUMN_GROUP)
|| tableChildNodeName.equals(XMLString.TABLE_TABLE_HEADER_ROWS)
|| tableChildNodeName.equals(XMLString.TABLE_TABLE_HEADER_COLUMNS)) {
//Remove Soft Page Break
removeSPB(tableChildNode);
}
break;
} else {
//Before SPB
//Description nodes
if (tableChildNodeName.equals(XMLString.TABLE_TABLE_COLUMN)
|| tableChildNodeName.equals(XMLString.TABLE_TABLE_COLUMN_GROUP)
|| tableChildNodeName.equals(XMLString.TABLE_TABLE_HEADER_ROWS)
|| tableChildNodeName.equals(XMLString.TABLE_TABLE_HEADER_COLUMNS)) {
//Append to clone table
tableFirstPart.appendChild(tableChildNode.cloneNode(true));
//increment counter
i++;
} else {
//Append to clone table
tableFirstPart.appendChild(tableChildNode);
dataMoved = true;
}
}
}
}
return dataMoved;
}
private static boolean handleTableRowGroup(Node tableRowGroupFistPart, Node tableRowGroup) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowGroupChildNodes = tableRowGroup.getChildNodes();
while (tableRowGroupChildNodes.getLength() > i) {
Node tableRowGroupChildNode = tableRowGroupChildNodes.item(0);
if ((tableRowGroupChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowGroupChildNode.getNodeName();
if (containsSPB(tableRowGroupChildNode)){
Node tableRowGroupChildFirstPart = tableRowGroupChildNode.cloneNode(false);
if (nodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
tableRowGroup.removeChild(tableRowGroupChildNode);
} else if (nodeName.equals(XMLString.TABLE_TABLE_HEADER_ROWS)){
//Nothing IF table-header-rows found - it is description node,
//Not needed to set dataMoved = true, not needed to append First part
} else if (nodeName.equals(XMLString.TABLE_TABLE_ROW)){
if (handleTableRow(tableRowGroupChildFirstPart, tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart);
}
} else if (nodeName.equals(XMLString.TABLE_TABLE_ROW_GROUP)){
if (handleTableRowGroup(tableRowGroupChildFirstPart, tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart);
}
} else if (nodeName.equals(XMLString.TABLE_TABLE_ROWS)){
if (handleTableRows(tableRowGroupChildFirstPart, tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart);
}
}
break;
} else {
if (nodeName.equals(XMLString.TABLE_TABLE_HEADER_ROWS)){
tableRowGroupFistPart.appendChild(tableRowGroupChildNode.cloneNode(true));
//increment counter
i++;
} else {
tableRowGroupFistPart.appendChild(tableRowGroupChildNode);
dataMoved = true;
}
}
} else {
//Append text nodes
tableRowGroupFistPart.appendChild(tableRowGroupChildNode);
dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleTableRows(Node tableRowsFistPart, Node tableRows) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowsChildNodes = tableRows.getChildNodes();
while (tableRowsChildNodes.getLength() > i) {
Node tableRowsChildNode = tableRowsChildNodes.item(0);
if ((tableRowsChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowsChildNode.getNodeName();
if (containsSPB(tableRowsChildNode)){
Node tableRowGroupChildFirstPart = tableRowsChildNode.cloneNode(false);
if (nodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
tableRows.removeChild(tableRowsChildNode);
} else if (nodeName.equals(XMLString.TABLE_TABLE_ROW)){
if (handleTableRow(tableRowGroupChildFirstPart, tableRowsChildNode)){
dataMoved = true;
tableRowsFistPart.appendChild(tableRowGroupChildFirstPart);
}
}
break;
} else {
tableRowsFistPart.appendChild(tableRowsChildNode);
dataMoved = true;
}
} else {
System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-rows");
//Append text nodes
//tableRowsFistPart.appendChild(tableRowsChildNode);
//dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleTableRow(Node tableRowFistPart, Node tableRow) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowChildNodes = tableRow.getChildNodes();
while (tableRowChildNodes.getLength() > i) {
Node tableRowChildNode = tableRowChildNodes.item(i);
if ((tableRowChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowChildNode.getNodeName();
if (containsSPB(tableRowChildNode)){
Node tableRowGroupChildFirstPart = tableRowChildNode.cloneNode(false);
if (nodeName.equals(XMLString.TABLE_TABLE_CELL)){
if (handleCell(tableRowGroupChildFirstPart, tableRowChildNode)){
dataMoved = true;
tableRowFistPart.appendChild(tableRowGroupChildFirstPart);
}
} else if (nodeName.equals(XMLString.TABLE_COVERED_TABLE_CELL)){
//Implement handleCoveredCell in future
if (handleCell(tableRowGroupChildFirstPart, tableRowChildNode)){
dataMoved = true;
tableRowFistPart.appendChild(tableRowGroupChildFirstPart);
}
}
} else {
//System.out.println("HERE " + nodeName);
//Move node without SPB above
tableRowFistPart.appendChild(tableRowChildNode.cloneNode(true));
Node emptyCell = tableRowChildNode.cloneNode(false);
Document document = tableRow.getOwnerDocument();
Element textP = document.createElement(XMLString.TEXT_P);
emptyCell.appendChild(textP);
tableRow.insertBefore(emptyCell, tableRowChildNode);
tableRow.removeChild(tableRowChildNode);
dataMoved = true;
}
i++;
} else {
System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-row");
//Append text nodes
//tableRowsFistPart.appendChild(tableRowsChildNode);
//dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleCell(Node cellFirstPart, Node cellNode) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList cellChildNodes = cellNode.getChildNodes();
while (cellChildNodes.getLength() > i) {
Node cellChildNode = cellChildNodes.item(0);
if ((cellChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = cellChildNode.getNodeName();
if (containsSPB(cellChildNode)){
Node cellChildFirstPart = cellChildNode.cloneNode(false);
if (nodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
cellNode.removeChild(cellChildNode);
} else if (nodeName.equals(XMLString.TEXT_H) || nodeName.equals(XMLString.TEXT_P)) {
if (handleParagraph(cellChildFirstPart, cellChildNode)){
cellFirstPart.appendChild(cellChildFirstPart);
dataMoved=true;
}
}
break;
} else {
cellFirstPart.appendChild(cellChildNode);
dataMoved = true;
}
} else {
//Append text nodes
cellFirstPart.appendChild(cellChildNode);
dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleSection(Node sectionFirstPart, Node sectionNode) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList sectionChildNodes = sectionNode.getChildNodes();
while (sectionChildNodes.getLength() > i) {
Node sectionChildNode = sectionChildNodes.item(0);
if ((sectionChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = sectionChildNode.getNodeName();
if (containsSPB(sectionChildNode)){
Node sectionChildFirstPart = sectionChildNode.cloneNode(false);
if (nodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
sectionNode.removeChild(sectionChildNode);
} else if (nodeName.equals(XMLString.TEXT_H) || nodeName.equals(XMLString.TEXT_P)) {
if (handleParagraph(sectionChildFirstPart, sectionChildNode)){
sectionFirstPart.appendChild(sectionChildFirstPart);
dataMoved=true;
}
} else if (nodeName.equals(XMLString.TEXT_TABLE_OF_CONTENT)) {
//HACK
removeSPB(sectionNode);
i++;
continue;
}
break;
} else {
sectionFirstPart.appendChild(sectionChildNode);
dataMoved = true;
}
} else {
//Append text nodes
sectionFirstPart.appendChild(sectionChildNode);
dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleParagraph(Node paraFirstPart, Node paraNode) {
boolean dataMoved = false;
int i = 0;
NodeList paraChildNodes = paraNode.getChildNodes();
while (paraChildNodes.getLength() > i) {
Node paraChildNode = paraChildNodes.item(i);
//NOT TEXT NODES
if ((paraChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = paraChildNode.getNodeName();
//System.out.println(nodeName);
//SPB FOUND
if (containsSPB(paraChildNode)){
if (nodeName.equals(XMLString.TEXT_SOFT_PAGE_BREAK)){
//Next node in paragraph. If it is text node go further
Node paraNextNode = paraChildNodes.item(i+1);
Node paraPrevNode = paraFirstPart.getLastChild();
String nextText = null;
String prevText = null;
if (paraNextNode != null && paraPrevNode != null ){
if (paraNextNode.getNodeType() == Node.TEXT_NODE) {
nextText = paraNextNode.getTextContent();
} else if (paraNextNode.getNodeType() == Node.ELEMENT_NODE) {
Node nextNodeFirstChild = paraNextNode.getFirstChild();
if (nextNodeFirstChild != null && nextNodeFirstChild.getNodeType() == Node.TEXT_NODE) {
nextText = nextNodeFirstChild.getTextContent();
}
}
if (paraPrevNode.getNodeType() == Node.TEXT_NODE){
prevText = paraPrevNode.getTextContent();
} else if (paraPrevNode.getNodeType() == Node.ELEMENT_NODE) {
Node prevNodeLastChild = paraPrevNode.getLastChild();
if (prevNodeLastChild != null && prevNodeLastChild.getNodeType() == Node.TEXT_NODE) {
prevText = prevNodeLastChild.getTextContent();
}
}
//If previous and next texts exists
if (nextText != null && prevText != null) {
//If first character in next text is a letter
//And if last character in previous text is a letter or soft hyphen
if (Character.isLetter(nextText.charAt(0))
&& (Character.isLetter(prevText.charAt(prevText.length() - 1))
|| prevText.charAt(prevText.length() - 1) == 173)) {
paraPrevNode.setTextContent(prevText + "\u2010");
}
}
}
// In case paragraph is empty add space to prevent it's removing
if (paraNextNode == null && paraPrevNode == null){
Document doc = paraNode.getOwnerDocument();
Node space = doc.createTextNode(" ");
paraNode.insertBefore(space, paraChildNode);
}
// remove inner soft page break node
paraNode.removeChild(paraChildNode);
/* Check if next node in para is text and first char is a letter
* Check if last node in paraFirstPart is text and last char is a letter
* If both true - add
*/
} else {
System.out.println("ERROR: SPB INSIDE Paragraph Element in inner element " + nodeName);
//checkSoftPageBreak(internalNode, true);
//paraFirstPart.appendChild(internalNode);
//dataMoved = true;
}
break;
//ELEMENT WITHOUT SPB
} else if (nodeName.equals(XMLString.TEXT_BOOKMARK_START)){
paraFirstPart.appendChild(paraChildNode.cloneNode(true));
i++;
} else {
paraFirstPart.appendChild(paraChildNode);
dataMoved = true;
}
//TEXT NODES
} else {
paraFirstPart.appendChild(paraChildNode);
dataMoved = true;
}
}
return dataMoved;
}
// Returns true if soft-page-break found. Removes it if removeFound = true
private static void removeSPB(Node node) {
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equals(XMLString.TEXT_SOFT_PAGE_BREAK)) {
Node parent = node.getParentNode();
parent.removeChild(node);
return;
}
if (node.hasChildNodes()) {
int currentNo = 0;
NodeList childNodes = node.getChildNodes();
while (currentNo < childNodes.getLength()) {
Node childNode = childNodes.item(currentNo);
removeSPB(childNode);
currentNo++;
}
}
}
}
private static boolean containsSPB(Node node) {
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equals(XMLString.TEXT_SOFT_PAGE_BREAK)) {
return true;
}
if (node.hasChildNodes()) {
int currentNo = 0;
NodeList childNodes = node.getChildNodes();
while (currentNo < childNodes.getLength()) {
Node childNode = childNodes.item(currentNo);
if (containsSPB(childNode)) {
return true;
}
currentNo++;
}
}
}
return false;
}
}