/************************************************************************
*
* OfficeReader.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2008 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2008-09-30)
*
*/
package writer2latex.office;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.HashSet;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Document;
import writer2latex.xmerge.OfficeDocument;
import writer2latex.util.Misc;
/**
This class reads and collects global information about an OOo document.
* This includes styles, forms, information about indexes and references etc.
*
*/
public class OfficeReader {
///////////////////////////////////////////////////////////////////////////
// Static methods
/** Checks, if a node is an element in the text namespace
* @param node the node to check
* @return true if this is a text element
*/
public static boolean isTextElement(Node node) {
return node.getNodeType()==Node.ELEMENT_NODE &&
node.getNodeName().startsWith(XMLString.TEXT_);
}
/** Checks, if a node is an element in the table namespace
* @param node the node to check
* @return true if this is a table element
*/
public static boolean isTableElement(Node node) {
return node.getNodeType()==Node.ELEMENT_NODE &&
node.getNodeName().startsWith(XMLString.TABLE_);
}
/** Checks, if a node is an element in the draw namespace
* @param node the node to check
* @return true if this is a draw element
*/
public static boolean isDrawElement(Node node) {
return node.getNodeType()==Node.ELEMENT_NODE &&
node.getNodeName().startsWith(XMLString.DRAW_);
}
/** Checks, if a node is an element representing a note (footnote/endnote)
* @param node the node to check
* @return true if this is a note element
*/
public static boolean isNoteElement(Node node) {
return node.getNodeType()==Node.ELEMENT_NODE &&
( node.getNodeName().equals(XMLString.TEXT_NOTE) ||
node.getNodeName().equals(XMLString.TEXT_FOOTNOTE) ||
node.getNodeName().equals(XMLString.TEXT_ENDNOTE) );
}
/** Checks, if this node contains at most one element, and that this is a
* paragraph.
* @param node the node to check
* @return true if the node contains a single paragraph or nothing
*/
public static boolean isSingleParagraph(Node node) {
boolean bFoundPar = false;
Node child = node.getFirstChild();
while (child!=null) {
if (child.getNodeType()==Node.ELEMENT_NODE) {
if (child.getNodeName().equals(XMLString.TEXT_P)) {
if (bFoundPar) { return false; }
else { bFoundPar = true; }
}
else {
return false;
}
}
child = child.getNextSibling();
}
return bFoundPar;
}
/**
Checks, if the only text content of this node is whitespace
* @param node the node to check (should be a paragraph node or a child
* of a paragraph node)
* @return true if the node contains whitespace only
*/
public static boolean isWhitespaceContent(Node node) {
Node child = node.getFirstChild();
while (child!=null) {
if (child.getNodeType()==Node.ELEMENT_NODE) {
if (isTextElement(child)) {
if (!isWhitespaceContent(child)) { return false; }
}
else {
return false; // found non-text content!
}
}
else if (child.getNodeType()==Node.TEXT_NODE) {
if (!isWhitespace(child.getNodeValue())) { return false; }
}
child = child.getNextSibling();
}
return true; // found nothing!
}
/**
Checks, if this text is whitespace
* @param s the String to check
* @return true if the String contains whitespace only
*/
public static boolean isWhitespace(String s) {
int nLen = s.length();
for (int i=0; i0) {
// Found the next character!
return next.getNodeValue().charAt(0);
}
//else if (next!=null && next.getNodeType()==Node.ELEMENT_NODE &&
//XMLString.TEXT_S.equals(next.getNodeName())) {
// Next character is a space (first of several)
//return ' ';
//}
} while (next!=null);
// No more text in this paragraph!
return '\u0000';
}
// Return the next node of *this paragraph* in logical order
// (Parents before children, siblings from left to right)
// Do not descend into draw elements and footnotes/endnotes
private static Node getNextNode(Node node) {
// If element node: Next node is first child
if (node.getNodeType()==Node.ELEMENT_NODE && node.hasChildNodes() &&
!isDrawElement(node) && !isNoteElement(node)) {
return node.getFirstChild();
}
// else iterate for next node, but don't leave this paragraph
Node next = node;
do {
// First look for next sibling
if (next.getNextSibling()!=null) { return next.getNextSibling(); }
// Then move to parent, if this is the text:p node, we are done
next = next.getParentNode();
if (next.getNodeType()==Node.ELEMENT_NODE &&
next.getNodeName().equals(XMLString.TEXT_P)) {
return null;
}
} while (next!=null);
return null;
}
///////////////////////////////////////////////////////////////////////////
// Data
// The Document
private OfficeDocument oooDoc = null;
// Font declarations
private OfficeStyleFamily font = new OfficeStyleFamily(FontDeclaration.class);
// Styles
private OfficeStyleFamily text = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily par = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily section = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily table = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily column = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily row = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily cell = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily frame = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily presentation = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily drawingPage = new OfficeStyleFamily(StyleWithProperties.class);
private OfficeStyleFamily list = new OfficeStyleFamily(ListStyle.class);
private OfficeStyleFamily pageLayout = new OfficeStyleFamily(PageLayout.class);
private OfficeStyleFamily masterPage = new OfficeStyleFamily(MasterPage.class);
// Document-wide styles
private ListStyle outline = new ListStyle();
private PropertySet footnotes = null;
private PropertySet endnotes = null;
// Special styles
private StyleWithProperties[] heading = new StyleWithProperties[11];
private MasterPage firstMasterPage = null;
//private String sFirstMasterPageName = null;
// All indexes
private Hashtable indexes = new Hashtable();
private HashSet indexSourceStyles = new HashSet();
private HashSet figureSequenceNames = new HashSet();
private HashSet tableSequenceNames = new HashSet();
private String sAutoFigureSequenceName = null;
private String sAutoTableSequenceName = null;
// Map paragraphs to sequence names (caption helper)
private Hashtable sequenceNames = new Hashtable();
// Map sequence reference names to sequence names
private Hashtable seqrefNames = new Hashtable();
// All references
private HashSet footnoteRef = new HashSet();
private HashSet endnoteRef = new HashSet();
private HashSet referenceRef = new HashSet();
private HashSet bookmarkRef = new HashSet();
private HashSet sequenceRef = new HashSet();
// Reference marks and bookmarks contained in headings
private HashSet referenceHeading = new HashSet();
private HashSet bookmarkHeading = new HashSet();
// All internal hyperlinks
private HashSet links = new HashSet();
// Forms
private FormsReader forms = new FormsReader();
// The main content element
private Element content = null;
// Identify OASIS OpenDocument format
private boolean bOpenDocument = false;
// Identify individual genres
private boolean bText = false;
private boolean bSpreadsheet = false;
private boolean bPresentation = false;
///////////////////////////////////////////////////////////////////////////
// Various methods
/** Checks whether or not this document is in package format
* @return true if it's in package format
*/
public boolean isPackageFormat() { return oooDoc.isPackageFormat(); }
/** Checks whether this url is internal to the package
* @param sUrl the url to check
* @return true if the url is internal to the package
*/
public boolean isInPackage(String sUrl) {
if (!bOpenDocument && sUrl.startsWith("#")) { return true; } // old format
if (sUrl.startsWith("./")) { sUrl=sUrl.substring(2); }
return oooDoc.getEmbeddedObject(sUrl)!=null;
}
///////////////////////////////////////////////////////////////////////////
// Accessor methods
/**
Get the collection of all font declarations.
* @return the OfficeStyleFamily of font declarations
*/
public OfficeStyleFamily getFontDeclarations() { return font; }
/**
Get a specific font declaration
* @param sName the name of the font declaration
* @return a FontDeclaration representing the font
*/
public FontDeclaration getFontDeclaration(String sName) {
return (FontDeclaration) font.getStyle(sName);
}
// Accessor methods for styles
public OfficeStyleFamily getTextStyles() { return text; }
public StyleWithProperties getTextStyle(String sName) {
return (StyleWithProperties) text.getStyle(sName);
}
public OfficeStyleFamily getParStyles() { return par; }
public StyleWithProperties getParStyle(String sName) {
return (StyleWithProperties) par.getStyle(sName);
}
public StyleWithProperties getDefaultParStyle() {
return (StyleWithProperties) par.getDefaultStyle();
}
public OfficeStyleFamily getSectionStyles() { return section; }
public StyleWithProperties getSectionStyle(String sName) {
return (StyleWithProperties) section.getStyle(sName);
}
public OfficeStyleFamily getTableStyles() { return table; }
public StyleWithProperties getTableStyle(String sName) {
return (StyleWithProperties) table.getStyle(sName);
}
public OfficeStyleFamily getColumnStyles() { return column; }
public StyleWithProperties getColumnStyle(String sName) {
return (StyleWithProperties) column.getStyle(sName);
}
public OfficeStyleFamily getRowStyles() { return row; }
public StyleWithProperties getRowStyle(String sName) {
return (StyleWithProperties) row.getStyle(sName);
}
public OfficeStyleFamily getCellStyles() { return cell; }
public StyleWithProperties getCellStyle(String sName) {
return (StyleWithProperties) cell.getStyle(sName);
}
public StyleWithProperties getDefaultCellStyle() {
return (StyleWithProperties) cell.getDefaultStyle();
}
public OfficeStyleFamily getFrameStyles() { return frame; }
public StyleWithProperties getFrameStyle(String sName) {
return (StyleWithProperties) frame.getStyle(sName);
}
public StyleWithProperties getDefaultFrameStyle() {
return (StyleWithProperties) frame.getDefaultStyle();
}
public OfficeStyleFamily getPresentationStyles() { return presentation; }
public StyleWithProperties getPresentationStyle(String sName) {
return (StyleWithProperties) presentation.getStyle(sName);
}
public StyleWithProperties getDefaultPresentationStyle() {
return (StyleWithProperties) presentation.getDefaultStyle();
}
public OfficeStyleFamily getDrawingPageStyles() { return drawingPage; }
public StyleWithProperties getDrawingPageStyle(String sName) {
return (StyleWithProperties) drawingPage.getStyle(sName);
}
public StyleWithProperties getDefaultDrawingPageStyle() {
return (StyleWithProperties) drawingPage.getDefaultStyle();
}
public OfficeStyleFamily getListStyles() { return list; }
public ListStyle getListStyle(String sName) {
return (ListStyle) list.getStyle(sName);
}
public OfficeStyleFamily getPageLayouts() { return pageLayout; }
public PageLayout getPageLayout(String sName) {
return (PageLayout) pageLayout.getStyle(sName);
}
public OfficeStyleFamily getMasterPages() { return masterPage; }
public MasterPage getMasterPage(String sName) {
return (MasterPage) masterPage.getStyle(sName);
}
public ListStyle getOutlineStyle() { return outline; }
public PropertySet getFootnotesConfiguration() { return footnotes; }
public PropertySet getEndnotesConfiguration() { return endnotes; }
/**
Returns the paragraph style associated with headings of a specific
* level. Returns null if no such style is known.
*
In principle, different styles can be used for each heading, in
* practice the same (soft) style is used for all headings of a specific
* level.
* @param nLevel the level of the heading
* @return a StyleWithProperties object representing the style
*/
public StyleWithProperties getHeadingStyle(int nLevel) {
return 1<=nLevel && nLevel<=10 ? heading[nLevel] : null;
}
/**
Returns the first master page used in the document. If no master
* page is used explicitly, the first master page found in the styles is
* returned. Returns null if no master pages exists.
* @return a MasterPage object representing the master page
*/
public MasterPage getFirstMasterPage() { return firstMasterPage; }
/** Return the iso language used in most paragaph styles (in a well-structured
* document this will be the default language)
* TODO: Base on content rather than style
* @return the iso language
*/
public String getMajorityLanguage() {
Hashtable