w2phtml/src/main/java/writer2latex/xhtml/PageSplitter.java

669 lines
21 KiB
Java
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package writer2latex.xhtml;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import writer2latex.office.OfficeReader;
import writer2latex.office.StyleWithProperties;
import static writer2latex.office.XMLString.*;
import writer2latex.util.Misc;
public class PageSplitter {
static Node truncatedListItemNodeContent = null;
static OfficeReader officeReader = null;
protected static Node splitSoftPageBreak(Node onode,OfficeReader ofr){
//Find par node with soft page break inside and split it
officeReader = ofr;
Document document = onode.getOwnerDocument();
Element softPageBreak = document.createElement(TEXT_SOFT_PAGE_BREAK);
NodeList nodes = onode.getChildNodes();
int i = 0;
//Loop through the content nodes and split nodes with soft page break
while (i < nodes.getLength()){
Node child = nodes.item(i);
//System.out.println("splitSoftPageBreak ");
//Necessary check if node is an Element
if (containsSPB(child)){
String nodeName = child.getNodeName();
//Create Duplicate Node!
Element childFirstPart = (Element) child.cloneNode(false);
StyleWithProperties style = null;
if ((nodeName.equals(TEXT_P) || nodeName.equals(TEXT_H))) {
//If SPB not the first node
Node paraFirstPart = handleParagraph(child);
if (paraFirstPart != null){
onode.insertBefore(paraFirstPart, child);
style = ofr.getParStyle(Misc.getAttribute(child, TEXT_STYLE_NAME));
}
} else if (nodeName.equals(TABLE_TABLE)) {
if (handleTableTable(childFirstPart, child)){
onode.insertBefore(childFirstPart, child);
style = ofr.getTableStyle(Misc.getAttribute(child, TABLE_STYLE_NAME));
}
} else if (nodeName.equals(TEXT_LIST)) {
if (handleList(childFirstPart, child)){
onode.insertBefore(childFirstPart, child);
}
} else if (nodeName.equals(TEXT_SECTION)) {
if (handleSection(childFirstPart, child)){
onode.insertBefore(childFirstPart, child);
style = ofr.getSectionStyle(Misc.getAttribute(child, TEXT_SECTION));
}
} else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)){
//HACK
containsSPB(childFirstPart);
i++;
continue;
} else if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
//HACK
i++;
continue;
}
//TODO: IF fo:break before in original table - don't create SPB
if (style == null || !"page".equals(style.getProperty(FO_BREAK_BEFORE))){
onode.insertBefore(softPageBreak.cloneNode(false), child);
}
//HACK!
if (truncatedListItemNodeContent != null){
NodeList itemNodeList= truncatedListItemNodeContent.getChildNodes();
while (itemNodeList.getLength() > 0){
onode.insertBefore(itemNodeList.item(0), child);
}
truncatedListItemNodeContent.getParentNode().removeChild(truncatedListItemNodeContent);
truncatedListItemNodeContent = null;
}
if (!child.hasChildNodes()){
onode.removeChild(child);
}
continue;
}
i++;
}
return onode;
}
private static boolean handleList(Node listFirstPart, Node list){
NodeList listNodes = list.getChildNodes();
int i = 0;
boolean dataMoved = false;
while (listNodes.getLength() > i) {
Node listChild = listNodes.item(i);
if(listChild.getNodeType() == Node.ELEMENT_NODE){
String nodeName = listChild.getNodeName();
if (nodeName.equals(TEXT_LIST_HEADER)) {
if(containsSPB(listChild)){
//Remove inner SPB
removeSPB(listChild);
//HACK :(
break;
}
listFirstPart.appendChild(listChild.cloneNode(true));
//Get next element
i++;
} else if (nodeName.equals(TEXT_LIST_ITEM)) {
if (containsSPB(listChild)){
Node listItemFirstPart = listChild.cloneNode(false);
//remove SPB, move previous nodes to firstPart.
if (handleListItem(listItemFirstPart,listChild)){
dataMoved = true;
//Add first part of list item to previous list item
listFirstPart.appendChild(listItemFirstPart);
//Get list parent node and move cutted node
//After First Part and SPB but before this list;
//TODO!!!!!!!!!!
truncatedListItemNodeContent = listChild;
listFirstPart.getParentNode();
//If List item is empty - remove it
if (!listChild.hasChildNodes()){
list.removeChild(listChild);
}
}
//Add text:continue-numbering="true"
if (dataMoved){
((Element) list).setAttribute(TEXT_CONTINUE_NUMBERING, "true");
}
break;
} else {
// Not with SPB yet, move node, set dataMoved=true
listFirstPart.appendChild(listChild);
dataMoved = true;
}
}
}
}
return dataMoved;
}
//If SPB before first item - return false, remove SPB
//Otherwise add childNodes before SPB to firstPart, return true
private static boolean handleListItem(Node listItemFirstPart, Node listItem){
int i = 0;
boolean dataMoved = false;
NodeList listItemNodes = listItem.getChildNodes();
while(listItemNodes.getLength() > i){
Node listItemChild = listItemNodes.item(i);
if(listItemChild.getNodeType() == Node.ELEMENT_NODE){
//Node name
String nodeName = listItemChild.getNodeName();
if (containsSPB(listItemChild)){
Node listItemChildFirstPart = listItemChild.cloneNode(false);
//Break if SPB
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)) {
//Remove SPB.Return result
listItem.removeChild(listItemChild);
} else if (nodeName.equals(TEXT_LIST)) {
if (handleList(listItemChildFirstPart, listItemChild)){
listItemFirstPart.appendChild(listItemChildFirstPart);
dataMoved=true;
}
} else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) {
Node paraFirstPart = handleParagraph(listItemChild);
if (paraFirstPart != null){
listItemFirstPart.appendChild(paraFirstPart);
dataMoved=true;
}
}
break;
//Move to first part
} else {
listItemFirstPart.appendChild(listItemChild);
dataMoved = true;
}
} else {
listItemFirstPart.appendChild(listItemChild);
dataMoved = true;
}
//check internal nodes
}
return dataMoved;
}
//Needs finish
private static boolean handleTableTable(Node tableFirstPart, Node table) {
/*
* // TODO: 0.Test if soft-page-break not at start of table // - in that
* case just remove it and insert before table // 1.Create new table //
* 2.Copy to it table:table-column's and // table:table-header-rows //
* 3.Move nodes before soft-page-break to new table //4. IF in one
* table:row exist more one Algorithm IF SPB at start - just move it
* higher IF SPB between rows - just copy table move row and put SPB
* between tables IF SPB inside row, inside cell - copy table, copy
* empty row, copy each empty cell and in each cell move every node up
* to the first SPB
*
*
*/
NodeList tableChildNodes = table.getChildNodes();
// Node counter
int i = 0;
boolean dataMoved = false;
// Loop through the TABLE:TABLE child nodes
while (tableChildNodes.getLength() > i) {
Node tableChildNode = tableChildNodes.item(i);
if (tableChildNode.getNodeType() == Node.ELEMENT_NODE) {
//Node name
String tableChildNodeName = tableChildNode.getNodeName();
//System.out.println("Table child node " + tableChildNodeName);
if (containsSPB(tableChildNode)){
Node tableChildFirstPart = tableChildNode.cloneNode(false);
if (tableChildNodeName.equals(TEXT_SOFT_PAGE_BREAK)) {
// remove inner soft page break node
table.removeChild(tableChildNode);
} else if (tableChildNodeName.equals(TABLE_TABLE_ROW_GROUP)) {
if (handleTableRowGroup(tableChildFirstPart, tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildFirstPart);
}
} else if ( tableChildNodeName.equals(TABLE_TABLE_ROWS)) {
if (handleTableRows(tableChildFirstPart, tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildFirstPart);
}
} else if ( tableChildNodeName.equals(TABLE_TABLE_ROW)) {
if (handleTableRow(tableChildFirstPart, tableChildNode)){
dataMoved = true;
tableFirstPart.appendChild(tableChildFirstPart);
}
} else if (tableChildNodeName.equals(TABLE_TABLE_COLUMN)
|| tableChildNodeName.equals(TABLE_TABLE_COLUMN_GROUP)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_ROWS)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_COLUMNS)) {
//Remove Soft Page Break
removeSPB(tableChildNode);
}
break;
} else {
//Before SPB
//Description nodes
if (tableChildNodeName.equals(TABLE_TABLE_COLUMN)
|| tableChildNodeName.equals(TABLE_TABLE_COLUMN_GROUP)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_ROWS)
|| tableChildNodeName.equals(TABLE_TABLE_HEADER_COLUMNS)) {
//Append to clone table
tableFirstPart.appendChild(tableChildNode.cloneNode(true));
//increment counter
i++;
} else {
//Append to clone table
tableFirstPart.appendChild(tableChildNode);
dataMoved = true;
}
}
}
}
return dataMoved;
}
private static boolean handleTableRowGroup(Node tableRowGroupFistPart, Node tableRowGroup) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowGroupChildNodes = tableRowGroup.getChildNodes();
while (tableRowGroupChildNodes.getLength() > i) {
Node tableRowGroupChildNode = tableRowGroupChildNodes.item(0);
if ((tableRowGroupChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowGroupChildNode.getNodeName();
if (containsSPB(tableRowGroupChildNode)){
Node tableRowGroupChildFirstPart = tableRowGroupChildNode.cloneNode(false);
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
tableRowGroup.removeChild(tableRowGroupChildNode);
} else if (nodeName.equals(TABLE_TABLE_HEADER_ROWS)){
//Nothing IF table-header-rows found - it is description node,
//Not needed to set dataMoved = true, not needed to append First part
} else if (nodeName.equals(TABLE_TABLE_ROW)){
if (handleTableRow(tableRowGroupChildFirstPart, tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart);
}
} else if (nodeName.equals(TABLE_TABLE_ROW_GROUP)){
if (handleTableRowGroup(tableRowGroupChildFirstPart, tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart);
}
} else if (nodeName.equals(TABLE_TABLE_ROWS)){
if (handleTableRows(tableRowGroupChildFirstPart, tableRowGroupChildNode)){
dataMoved = true;
tableRowGroupFistPart.appendChild(tableRowGroupChildFirstPart);
}
}
break;
} else {
if (nodeName.equals(TABLE_TABLE_HEADER_ROWS)){
tableRowGroupFistPart.appendChild(tableRowGroupChildNode.cloneNode(true));
//increment counter
i++;
} else {
tableRowGroupFistPart.appendChild(tableRowGroupChildNode);
dataMoved = true;
}
}
} else {
//Append text nodes
tableRowGroupFistPart.appendChild(tableRowGroupChildNode);
dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleTableRows(Node tableRowsFistPart, Node tableRows) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowsChildNodes = tableRows.getChildNodes();
while (tableRowsChildNodes.getLength() > i) {
Node tableRowsChildNode = tableRowsChildNodes.item(0);
if ((tableRowsChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowsChildNode.getNodeName();
if (containsSPB(tableRowsChildNode)){
Node tableRowGroupChildFirstPart = tableRowsChildNode.cloneNode(false);
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
tableRows.removeChild(tableRowsChildNode);
} else if (nodeName.equals(TABLE_TABLE_ROW)){
if (handleTableRow(tableRowGroupChildFirstPart, tableRowsChildNode)){
dataMoved = true;
tableRowsFistPart.appendChild(tableRowGroupChildFirstPart);
}
}
break;
} else {
tableRowsFistPart.appendChild(tableRowsChildNode);
dataMoved = true;
}
} else {
System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-rows");
//Append text nodes
//tableRowsFistPart.appendChild(tableRowsChildNode);
//dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleTableRow(Node tableRowFistPart, Node tableRow) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList tableRowChildNodes = tableRow.getChildNodes();
while (tableRowChildNodes.getLength() > i) {
Node tableRowChildNode = tableRowChildNodes.item(i);
if ((tableRowChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = tableRowChildNode.getNodeName();
if (containsSPB(tableRowChildNode)){
Node tableRowGroupChildFirstPart = tableRowChildNode.cloneNode(false);
if (nodeName.equals(TABLE_TABLE_CELL)){
if (handleCell(tableRowGroupChildFirstPart, tableRowChildNode)){
dataMoved = true;
tableRowFistPart.appendChild(tableRowGroupChildFirstPart);
}
} else if (nodeName.equals(TABLE_COVERED_TABLE_CELL)){
//Implement handleCoveredCell in future
if (handleCell(tableRowGroupChildFirstPart, tableRowChildNode)){
dataMoved = true;
tableRowFistPart.appendChild(tableRowGroupChildFirstPart);
}
}
} else {
//System.out.println("HERE " + nodeName);
//Move node without SPB above
tableRowFistPart.appendChild(tableRowChildNode.cloneNode(true));
Node emptyCell = tableRowChildNode.cloneNode(false);
Document document = tableRow.getOwnerDocument();
Element textP = document.createElement(TEXT_P);
emptyCell.appendChild(textP);
tableRow.insertBefore(emptyCell, tableRowChildNode);
tableRow.removeChild(tableRowChildNode);
dataMoved = true;
}
i++;
} else {
System.out.println("ERROR: TEXT NODE FOUND INSIDE tabl:table-row");
//Append text nodes
//tableRowsFistPart.appendChild(tableRowsChildNode);
//dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleCell(Node cellFirstPart, Node cellNode) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList cellChildNodes = cellNode.getChildNodes();
while (cellChildNodes.getLength() > i) {
Node cellChildNode = cellChildNodes.item(0);
if ((cellChildNode.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = cellChildNode.getNodeName();
if (containsSPB(cellChildNode)){
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
cellNode.removeChild(cellChildNode);
} else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) {
Node paraFirstPart = handleParagraph(cellChildNode);
if (paraFirstPart != null){
cellFirstPart.appendChild(paraFirstPart);
dataMoved=true;
}
}
break;
} else {
cellFirstPart.appendChild(cellChildNode);
dataMoved = true;
}
} else {
//Append text nodes
cellFirstPart.appendChild(cellChildNode);
dataMoved = true;
}
}
return dataMoved;
}
private static boolean handleSection(Node sectionFirstPart, Node section) {
boolean dataMoved = false;
// Node counter
int i = 0;
NodeList childs = section.getChildNodes();
while (childs.getLength() > i) {
Node child = childs.item(0);
if ((child.getNodeType() == Node.ELEMENT_NODE)) {
String nodeName = child.getNodeName();
if (containsSPB(child)){
Node childFirstPart = child.cloneNode(false);
if (nodeName.equals(TEXT_SOFT_PAGE_BREAK)){
// remove inner soft page break node
section.removeChild(child);
} else if (nodeName.equals(TEXT_H) || nodeName.equals(TEXT_P)) {
Node paraFirstPart = handleParagraph(child);
if (paraFirstPart != null){
sectionFirstPart.appendChild(paraFirstPart);
dataMoved=true;
}
} else if (nodeName.equals(TEXT_TABLE_OF_CONTENT)) {
//HACK
removeSPB(section);
i++;
continue;
} else if (nodeName.equals(TABLE_TABLE)) {
if (handleTableTable(childFirstPart, child)){
sectionFirstPart.appendChild(childFirstPart);
dataMoved=true;
}
} else if (nodeName.equals(TEXT_SECTION)) {
if (handleSection(childFirstPart, child)){
sectionFirstPart.appendChild(childFirstPart);
dataMoved=true;
}
} else if (nodeName.equals(TEXT_LIST)) {
if (handleList(childFirstPart, child)){
sectionFirstPart.appendChild(childFirstPart);
dataMoved=true;
}
}
//split node with spb and exit
break;
} else {
sectionFirstPart.appendChild(child);
dataMoved = true;
}
} else {
//Append text nodes
sectionFirstPart.appendChild(child);
dataMoved = true;
}
}
return dataMoved;
}
private static Node handleParagraph(Node para) {
Node paraBefore = para.cloneNode(false);
boolean dataMoved = false;
int i = 0;
NodeList сhilds = para.getChildNodes();
while (сhilds.getLength() > i) {
Node child = сhilds.item(i);
//NOT TEXT NODES
if ((child.getNodeType() == Node.ELEMENT_NODE)) {
String childName = child.getNodeName();
//SPB FOUND
if (containsSPB(child)){
if (childName.equals(TEXT_SOFT_PAGE_BREAK)){
//removeSPB
para.removeChild(child);
} else {
System.out.println("ERROR: SPB INSIDE Paragraph Element in inner element " + childName);
System.exit(1);
}
break;
//Other elements
} else if (childName.equals(TEXT_BOOKMARK_START)){
paraBefore.appendChild(child.cloneNode(true));
i++;
} else {
dataMoved = appendChild(paraBefore, child);
}
//TEXT NODES
} else {
dataMoved = appendChild(paraBefore, child);
}
}
addHyphen(para,paraBefore);
if (dataMoved) {
removeIndent(para);
return paraBefore;
}
return null;
}
private static boolean appendChild(Node parent, Node child) {
boolean dataMoved;
parent.appendChild(child);
dataMoved = true;
return dataMoved;
}
private static void addHyphen(Node para, Node paraBefore) {
//Debug.printNode(paraBefore);
//Debug.printNode(para);
Character softHyphen = 0x00ad;
Character lastChar = getLastChar(paraBefore);
Character firstChar = getFirstChar(para);
if (lastChar == null || firstChar == null) {
return;
}
Node lastNode = paraBefore;
while (lastNode.hasChildNodes()) {
lastNode = lastNode.getLastChild();
}
String lineEndsWith = lastNode.getTextContent();
if (Character.isLetter(lastChar) && Character.isLetter(firstChar)) {
lastNode.setTextContent(lastNode.getTextContent() + "\u2010");
} else if (lastChar.equals(softHyphen)) {
lastNode.setTextContent(lineEndsWith.substring(0, lineEndsWith.length()-1) + "\u2010");
}
}
private static Character getLastChar(Node para) {
if (para == null) { return null; }
Node lastNode = para;
while (lastNode.hasChildNodes()) {
lastNode = lastNode.getLastChild();
}
String content = lastNode.getTextContent();
if (content != null && !content.isEmpty()) {
return content.charAt(content.length()-1);
}
return null;
}
private static Character getFirstChar(Node para) {
if (para == null) { return null; }
Node firstNode = para;
while (firstNode.hasChildNodes()) {
firstNode = firstNode.getFirstChild();
}
String content = firstNode.getTextContent();
if (content != null && !content.isEmpty()) {
return content.charAt(0);
}
return null;
}
private static void removeIndent(Node paraAfter) {
String baseStyleName = Misc.getAttribute(paraAfter, TEXT_STYLE_NAME);
String newStyleName = officeReader.cloneParStyle(baseStyleName);
Node styleAttr = paraAfter.getAttributes().getNamedItem(TEXT_STYLE_NAME);
styleAttr.setTextContent(newStyleName);
StyleWithProperties newStyle = officeReader.getParStyle(Misc.getAttribute(paraAfter, TEXT_STYLE_NAME));
newStyle.setParProperty(FO_TEXT_INDENT, "0");
}
private static void removeSPB(Node node) {
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equals(TEXT_SOFT_PAGE_BREAK)) {
Node parent = node.getParentNode();
parent.removeChild(node);
return;
}
if (node.hasChildNodes()) {
int currentNo = 0;
NodeList childNodes = node.getChildNodes();
while (currentNo < childNodes.getLength()) {
Node childNode = childNodes.item(currentNo);
removeSPB(childNode);
currentNo++;
}
}
}
}
private static boolean containsSPB(Node node) {
if (node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getNodeName().equals(TEXT_SOFT_PAGE_BREAK)) {
return true;
}
if (node.hasChildNodes()) {
int currentNo = 0;
NodeList childNodes = node.getChildNodes();
while (currentNo < childNodes.getLength()) {
Node childNode = childNodes.item(currentNo);
if (containsSPB(childNode)) {
return true;
}
currentNo++;
}
}
}
return false;
}
}