w2phtml/source/java/writer2latex/xmerge/OfficeDocument.java
henrikjust ce61f7bc3b Writer2xhtml custom config ui + EPUB export
git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@55 f0f2a975-2e09-46c8-9428-3b39399b9f3c
2010-03-29 11:07:24 +00:00

1296 lines
44 KiB
Java

/************************************************************************
*
* The Contents of this file are made available subject to the terms of
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version is adapted for Writer2LaTeX
// Version 1.2 (2010-03-28)
package writer2latex.xmerge;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.BufferedReader;
import java.io.StringReader;
import java.io.InputStreamReader;
//import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
//import java.util.zip.ZipEntry;
//import java.util.zip.ZipInputStream;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.DocumentType;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.w3c.dom.NamedNodeMap;
import org.xml.sax.SAXException;
//import org.xml.sax.SAXParseException;
import writer2latex.office.MIMETypes;
import writer2latex.util.Misc;
//import org.openoffice.xmerge.util.Resources;
//import org.openoffice.xmerge.util.Debug;
/**
* An implementation of <code>Document</code> for
* StarOffice documents.
*/
public class OfficeDocument
implements writer2latex.xmerge.Document, OfficeConstants {
/** Factory for <code>DocumentBuilder</code> objects. */
private static DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
/** DOM <code>Document</code> of content.xml. */
private Document contentDoc = null;
/** DOM <code>Document</code> of meta.xml. */
private Document metaDoc = null;
/** DOM <code>Document</code> of settings.xml. */
private Document settingsDoc = null;
/** DOM <code>Document</code> of content.xml. */
private Document styleDoc = null;
/** DOM <code>Document</code> of META-INF/manifest.xml. */
private Document manifestDoc = null;
private String documentName = null;
private String fileName = null;
/** Resources object. */
//private Resources res = null;
/**
* <code>OfficeZip</code> object to store zip contents from
* read <code>InputStream</code>. Note that this member
* will still be null if it was initialized using a template
* file instead of reading from a StarOffice zipped
* XML file.
*/
private OfficeZip zip = null;
/** Collection to keep track of the embedded objects in the document. */
private Map<String, EmbeddedObject> embeddedObjects = null;
/**
* Default constructor.
*
* @param name <code>Document</code> name.
*/
public OfficeDocument(String name)
{
this(name, true, false);
}
/**
* Constructor with arguments to set <code>namespaceAware</code>
* and <code>validating</code> flags.
*
* @param name <code>Document</code> name (may or may not
* contain extension).
* @param namespaceAware Value for <code>namespaceAware</code> flag.
* @param validating Value for <code>validating</code> flag.
*/
public OfficeDocument(String name, boolean namespaceAware, boolean validating) {
//res = Resources.getInstance();
factory.setValidating(validating);
factory.setNamespaceAware(namespaceAware);
this.documentName = trimDocumentName(name);
this.fileName = documentName + getFileExtension();
}
/**
* Removes the file extension from the <code>Document</code>
* name.
*
* @param name Full <code>Document</code> name with extension.
*
* @return Name of <code>Document</code> without the extension.
*/
private String trimDocumentName(String name) {
String temp = name.toLowerCase();
String ext = getFileExtension();
if (temp.endsWith(ext)) {
// strip the extension
int nlen = name.length();
int endIndex = nlen - ext.length();
name = name.substring(0,endIndex);
}
return name;
}
// FIX2 (HJ): Determine wether this is package or flat format
/** Package or flat format?
* @return true if the document is in package format, false if it's flat xml
*/
public boolean isPackageFormat() { return zip!=null; }
/**
* Return a DOM <code>Document</code> object of the content.xml
* file. Note that a content DOM is not created when the constructor
* is called. So, either the <code>read</code> method or the
* <code>initContentDOM</code> method will need to be called ahead
* on this object before calling this method.
*
* @return DOM <code>Document</code> object.
*/
public Document getContentDOM() {
return contentDoc;
}
/**
* Return a DOM <code>Document</code> object of the meta.xml
* file. Note that a content DOM is not created when the constructor
* is called. So, either the <code>read</code> method or the
* <code>initContentDOM</code> method will need to be called ahead
* on this object before calling this method.
*
* @return DOM <code>Document</code> object.
*/
public Document getMetaDOM() {
return metaDoc;
}
/**
* Return a DOM <code>Document</code> object of the settings.xml
* file. Note that a content DOM is not created when the constructor
* is called. So, either the <code>read</code> method or the
* <code>initContentDOM</code> method will need to be called ahead
* on this object before calling this method.
*
* @return DOM <code>Document</code> object.
*/
public Document getSettingsDOM() {
return settingsDoc;
}
/**
* Sets the content tree of the document.
*
* @param newDom <code>Node</code> containing the new content tree.
*/
public void setContentDOM( Node newDom) {
contentDoc = (Document)newDom;
}
/**
* Sets the meta tree of the document.
*
* @param newDom <code>Node</code> containing the new meta tree.
*/
public void setMetaDOM (Node newDom) {
metaDoc = (Document)newDom;
}
/**
* Sets the settings tree of the document.
*
* @param newDom <code>Node</code> containing the new settings tree.
*/
public void setSettingsDOM (Node newDom) {
settingsDoc = (Document)newDom;
}
/**
* Sets the style tree of the document.
*
* @param newDom <code>Node</code> containing the new style tree.
*/
public void setStyleDOM (Node newDom) {
styleDoc = (Document)newDom;
}
/**
* Return a DOM <code>Document</code> object of the style.xml file.
* Note that this may return null if there is no style DOM.
* Note that a style DOM is not created when the constructor
* is called. Depending on the <code>InputStream</code>, a
* <code>read</code> method may or may not build a style DOM. When
* creating a new style DOM, call the <code>initStyleDOM</code> method
* first.
*
* @return DOM <code>Document</code> object.
*/
public Document getStyleDOM() {
return styleDoc;
}
/**
* Return the name of the <code>Document</code>.
*
* @return The name of <code>Document</code>.
*/
public String getName() {
return documentName;
}
/**
* Return the file name of the <code>Document</code>, possibly
* with the standard extension.
*
* @return The file name of <code>Document</code>.
*/
public String getFileName() {
return fileName;
}
/**
* Returns the file extension for this type of
* <code>Document</code>.
*
* @return The file extension of <code>Document</code>.
*/
// TODO: is this used?
protected String getFileExtension() { return ""; }
/**
* Returns all the embedded objects (graphics, formulae, etc.) present in
* this document.
*
* @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
*/
public Iterator<EmbeddedObject> getEmbeddedObjects() {
if (embeddedObjects == null && manifestDoc != null) {
embeddedObjects = new HashMap<String, EmbeddedObject>();
// Need to read the manifest file and construct a list of objects
NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE);
// Dont create the HashMap if there are no embedded objects
int len = nl.getLength();
for (int i = 0; i < len; i++) {
Node n = nl.item(i);
NamedNodeMap attrs = n.getAttributes();
String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue();
String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue();
/*
* According to OpenOffice.org XML File Format document (ver. 1)
* there are only two types of embedded object:
*
* Objects with an XML representation.
* Objects without an XML representation.
*
* The former are represented by one or more XML files.
* The latter are in binary form.
*/
// FIX2 (HJ): Allow either OOo 1.x or OpenDocument embedded objects
if (type.startsWith("application/vnd.sun.xml") || type.startsWith("application/vnd.oasis.opendocument"))
{
if (path.equals("/")) {
// Exclude the main document entries
continue;
}
// Take off the trailing '/'
String name = path.substring(0, path.length() - 1);
embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip));
}
else if (type.equals("text/xml")) {
// XML entries are either embedded StarOffice doc entries or main
// document entries
continue;
}
else { // FIX (HJ): allows empty MIME type
embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip));
}
}
}
return embeddedObjects.values().iterator();
}
/**
* Returns the embedded object corresponding to the name provided.
* The name should be stripped of any preceding path characters, such as
* '/', '.' or '#'.
*
* @param name The name of the embedded object to retrieve.
*
* @return An <code>EmbeddedObject</code> instance representing the named
* object.
*/
public EmbeddedObject getEmbeddedObject(String name) {
if (name == null) {
return null;
}
if (embeddedObjects == null) {
// FIX2 (HJ): Return null if there's no manifest
if (manifestDoc != null) {
getEmbeddedObjects();
}
else {
return null;
}
}
if (embeddedObjects.containsKey(name)) {
return embeddedObjects.get(name);
}
else {
return null;
}
}
/**
* Adds a new embedded object to the document.
*
* @param embObj An instance of <code>EmbeddedObject</code>.
*/
public void addEmbeddedObject(EmbeddedObject embObj) {
if (embObj == null) {
return;
}
if (embeddedObjects == null) {
embeddedObjects = new HashMap<String, EmbeddedObject>();
}
embeddedObjects.put(embObj.getName(), embObj);
}
/**
* Read the Office <code>Document</code> from the given
* <code>InputStream</code>.
* FIX3 (HJ): Perform simple type detection to determine package or flat format
*
* @param is Office document <code>InputStream</code>.
*
* @throws IOException If any I/O error occurs.
*/
public void read(InputStream is) throws IOException {
byte[] doc = Misc.inputStreamToByteArray(is);
boolean bZip = MIMETypes.ZIP.equals(MIMETypes.getMagicMIMEType(doc));
// if it's zip, assume package - otherwise assume flat
read(new ByteArrayInputStream(doc),bZip);
}
private void readZip(InputStream is) throws IOException {
// Debug.log(Debug.INFO, "reading Office file");
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
// read in Office zip file format
zip = new OfficeZip();
zip.read(is);
// grab the content.xml and
// parse it into contentDoc.
byte contentBytes[] = zip.getContentXMLBytes();
if (contentBytes == null) {
throw new OfficeDocumentException("Entry content.xml not found in file");
}
try {
contentDoc = parse(builder, contentBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
// if style.xml exists, grab the style.xml
// parse it into styleDoc.
byte styleBytes[] = zip.getStyleXMLBytes();
if (styleBytes != null) {
try {
styleDoc = parse(builder, styleBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
byte metaBytes[] = zip.getMetaXMLBytes();
if (metaBytes != null) {
try {
metaDoc = parse(builder, metaBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
byte settingsBytes[] = zip.getSettingsXMLBytes();
if (settingsBytes != null) {
try {
settingsDoc = parse(builder, settingsBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
// Read in the META-INF/manifest.xml file
byte manifestBytes[] = zip.getManifestXMLBytes();
if (manifestBytes != null) {
try {
manifestDoc = parse(builder, manifestBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
}
/**
* Read the Office <code>Document</code> from the given
* <code>InputStream</code>.
*
* @param is Office document <code>InputStream</code>.
* @param isZip <code>boolean</code> Identifies whether
* a file is zipped or not
*
* @throws IOException If any I/O error occurs.
*/
public void read(InputStream is, boolean isZip) throws IOException {
// Debug.log(Debug.INFO, "reading Office file");
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
if (isZip)
{
readZip(is);
}
else{
try{
//contentDoc= builder.parse((InputStream)is);
Reader r = secondHack(is);
InputSource ins = new InputSource(r);
org.w3c.dom.Document newDoc = builder.parse(ins);
//org.w3c.dom.Document newDoc = builder.parse((InputStream)is);
Element rootElement=newDoc.getDocumentElement();
NodeList nodeList;
Node tmpNode;
Node rootNode = (Node)rootElement;
if (newDoc !=null){
/*content*/
contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
rootElement=contentDoc.getDocumentElement();
rootNode = (Node)rootElement;
// FIX (HJ): Include office:font-decls in content DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX2 (HJ): Include office:font-face-decls (OpenDocument) in content DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_FACE_DECLS);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
/*Styles*/
styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
rootElement=styleDoc.getDocumentElement();
rootNode = (Node)rootElement;
// FIX (HJ): Include office:font-decls in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX2 (HJ): Include office:font-face-decls in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_FACE_DECLS);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX (HJ): Include office:automatic-styles in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX (HJ): Include office:master-styles in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
/*Settings*/
settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
rootElement=settingsDoc.getDocumentElement();
rootNode = (Node)rootElement;
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
if (nodeList.getLength()>0){
tmpNode = settingsDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
/*Meta*/
metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META);
rootElement=metaDoc.getDocumentElement();
rootNode = (Node)rootElement;
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META);
if (nodeList.getLength()>0){
tmpNode = metaDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
}
}
catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
}
/**
* Parse given <code>byte</code> array into a DOM
* <code>Document</code> object using the
* <code>DocumentBuilder</code> object.
*
* @param builder <code>DocumentBuilder</code> object for parsing.
* @param bytes <code>byte</code> array for parsing.
*
* @return Resulting DOM <code>Document</code> object.
*
* @throws SAXException If any parsing error occurs.
*/
static Document parse(DocumentBuilder builder, byte bytes[])
throws SAXException, IOException {
Document doc = null;
ByteArrayInputStream is = new ByteArrayInputStream(bytes);
// TODO: replace hack with a more appropriate fix.
Reader r = hack(is);
InputSource ins = new InputSource(r);
doc = builder.parse(ins);
return doc;
}
/**
* Method to return the MIME type of the document.
*
* @return String The document's MIME type.
*/
// not really used...
protected String getDocumentMimeType() { return ""; }
/**
* Write out Office ZIP file format.
*
* @param os XML <code>OutputStream</code>.
*
* @throws IOException If any I/O error occurs.
*/
public void write(OutputStream os) throws IOException {
if (zip == null) {
zip = new OfficeZip();
}
initManifestDOM();
Element domEntry;
Element manifestRoot = manifestDoc.getDocumentElement();
// The EmbeddedObjects come first.
Iterator<EmbeddedObject> embObjs = getEmbeddedObjects();
while (embObjs.hasNext()) {
EmbeddedObject obj = embObjs.next();
obj.writeManifestData(manifestDoc);
obj.write(zip);
}
// Add in the entry for the Pictures directory. Always present.
domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/");
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "");
manifestRoot.appendChild(domEntry);
// Write content to the Zip file and then write any of the optional
// data, if it exists.
zip.setContentXMLBytes(docToBytes(contentDoc));
domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml");
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
manifestRoot.appendChild(domEntry);
if (styleDoc != null) {
zip.setStyleXMLBytes(docToBytes(styleDoc));
domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml");
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
manifestRoot.appendChild(domEntry);
}
if (metaDoc != null) {
zip.setMetaXMLBytes(docToBytes(metaDoc));
domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml");
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
manifestRoot.appendChild(domEntry);
}
if (settingsDoc != null) {
zip.setSettingsXMLBytes(docToBytes(settingsDoc));
domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml");
domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
manifestRoot.appendChild(domEntry);
}
zip.setManifestXMLBytes(docToBytes(manifestDoc));
zip.write(os);
}
/**
* Write out Office ZIP file format.
*
* @param os XML <code>OutputStream</code>.
* @param isZip <code>boolean</code>
*
* @throws IOException If any I/O error occurs.
*/
public void write(OutputStream os, boolean isZip) throws IOException {
// Create an OfficeZip object if one does not exist.
if (isZip){
write(os);
}
else{
try{
DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder= builderFactory.newDocumentBuilder();
DOMImplementation domImpl = builder.getDOMImplementation();
domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null);
org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null);
Element rootElement=newDoc.getDocumentElement();
rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office");
rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" );
rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text");
rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table");
rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing");
rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" );
rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" );
rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" );
rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" );
rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" );
rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" );
rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" );
rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" );
rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" );
rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" );
rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" );
rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" );
// #i41033# OASIS format needs the "office:class" set.
if(getDocumentMimeType() == SXC_MIME_TYPE)
rootElement.setAttribute("office:class","spreadsheet" );
else if(getDocumentMimeType() == SXW_MIME_TYPE)
rootElement.setAttribute("office:class","text" );
rootElement.setAttribute("office:version","1.0");
NodeList nodeList;
Node tmpNode;
Node rootNode = (Node)rootElement;
if (metaDoc !=null){
nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META);
if (nodeList.getLength()>0){
tmpNode = newDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
}if (styleDoc !=null){
nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES);
if (nodeList.getLength()>0){
tmpNode = newDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
}if (settingsDoc !=null){
nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
if (nodeList.getLength()>0){
tmpNode = newDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
}
if (contentDoc !=null){
nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
if (nodeList.getLength()>0){
tmpNode = newDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY);
if (nodeList.getLength()>0){
tmpNode = newDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
}
byte contentBytes[] = docToBytes(newDoc);
os.write(contentBytes);
}
catch(Exception exc){
System.err.println("\nException in OfficeDocument.write():" +exc);
}
//byte contentBytes[] = docToBytes(contentDoc);
}
}
/**
* <p>Write out a <code>org.w3c.dom.Document</code> object into a
* <code>byte</code> array.</p>
*
* <p>TODO: remove dependency on com.sun.xml.tree.XmlDocument
* package!</p>
*
* @param Document DOM <code>Document</code> object.
*
* @return <code>byte</code> array of DOM <code>Document</code>
* object.
*
* @throws IOException If any I/O error occurs.
*/
static byte[] docToBytes(Document doc)
throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
java.lang.reflect.Constructor con;
java.lang.reflect.Method meth;
String domImpl = doc.getClass().getName();
/*
* We may have multiple XML parsers in the Classpath.
* Depending on which one is first, the actual type of
* doc may vary. Need a way to find out which API is being
* used and use an appropriate serialization method.
*/
try {
// First of all try for JAXP 1.0
if (domImpl.equals("com.sun.xml.tree.XmlDocument")) {
// Debug.log(Debug.INFO, "Using JAXP");
Class jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument");
// The method is in the XMLDocument class itself, not a helper
meth = jaxpDoc.getMethod("write",
new Class[] { Class.forName("java.io.OutputStream") } );
meth.invoke(doc, new Object [] { baos } );
}
else if (domImpl.equals("org.apache.crimson.tree.XmlDocument"))
{
// Debug.log(Debug.INFO, "Using Crimson");
Class crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument");
// The method is in the XMLDocument class itself, not a helper
meth = crimsonDoc.getMethod("write",
new Class[] { Class.forName("java.io.OutputStream") } );
meth.invoke(doc, new Object [] { baos } );
}
else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl")
|| domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) {
// Debug.log(Debug.INFO, "Using Xerces");
// Try for Xerces
Class xercesSer =
Class.forName("org.apache.xml.serialize.XMLSerializer");
// Get the OutputStream constructor
// May want to use the OutputFormat parameter at some stage too
con = xercesSer.getConstructor(new Class []
{ Class.forName("java.io.OutputStream"),
Class.forName("org.apache.xml.serialize.OutputFormat") } );
// Get the serialize method
meth = xercesSer.getMethod("serialize",
new Class [] { Class.forName("org.w3c.dom.Document") } );
// Get an instance
Object serializer = con.newInstance(new Object [] { baos, null } );
// Now call serialize to write the document
meth.invoke(serializer, new Object [] { doc } );
}
else {
// We don't have another parser
throw new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl);
}
}
catch (ClassNotFoundException cnfe) {
throw new IOException(cnfe.toString());
}
catch (Exception e) {
// We may get some other errors, but the bottom line is that
// the steps being executed no longer work
throw new IOException(e.toString());
}
byte bytes[] = baos.toByteArray();
return bytes;
}
/**
* Initializes a new DOM <code>Document</code> with the content
* containing minimum OpenOffice XML tags.
*
* @throws IOException If any I/O error occurs.
*/
public final void initContentDOM() throws IOException {
contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
// this is a work-around for a bug in Office6.0 - not really
// needed but StarCalc 6.0 will crash without this tag.
Element root = contentDoc.getDocumentElement();
Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS);
root.appendChild(child);
child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
root.appendChild(child);
child = contentDoc.createElement(TAG_OFFICE_BODY);
root.appendChild(child);
}
/**
* Initializes a new DOM <code>Document</code> with the content
* containing minimum OpenOffice XML tags.
*
* @throws IOException If any I/O error occurs.
*/
public final void initSettingsDOM() throws IOException {
settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
// this is a work-around for a bug in Office6.0 - not really
// needed but StarCalc 6.0 will crash without this tag.
Element root = settingsDoc.getDocumentElement();
Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS);
root.appendChild(child);
}
/**
* Initializes a new DOM Document with styles
* containing minimum OpenOffice XML tags.
*
* @throws IOException If any I/O error occurs.
*/
public final void initStyleDOM() throws IOException {
styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
}
/**
* <p>Creates a new DOM <code>Document</code> containing minimum
* OpenOffice XML tags.</p>
*
* <p>This method uses the subclass
* <code>getOfficeClassAttribute</code> method to get the
* attribute for <i>office:class</i>.</p>
*
* @param rootName root name of <code>Document</code>.
*
* @throws IOException If any I/O error occurs.
*/
private final Document createSettingsDOM(String rootName) throws IOException {
Document doc = null;
try {
DocumentBuilder builder = factory.newDocumentBuilder();
doc = builder.newDocument();
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
Element root = (Element) doc.createElement(rootName);
doc.appendChild(root);
root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink");
root.setAttribute("xmlns:config", "http://openoffice.org/2001/config");
root.setAttribute("office:version", "1.0");
return doc;
}
/**
* <p>Creates a new DOM <code>Document</code> containing minimum
* OpenOffice XML tags.</p>
*
* <p>This method uses the subclass
* <code>getOfficeClassAttribute</code> method to get the
* attribute for <i>office:class</i>.</p>
*
* @param rootName root name of <code>Document</code>.
*
* @throws IOException If any I/O error occurs.
*/
private final Document createDOM(String rootName) throws IOException {
Document doc = null;
try {
DocumentBuilder builder = factory.newDocumentBuilder();
doc = builder.newDocument();
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
Element root = (Element) doc.createElement(rootName);
doc.appendChild(root);
root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
root.setAttribute("xmlns:style", "http://openoffice.org/2000/style");
root.setAttribute("xmlns:text", "http://openoffice.org/2000/text");
root.setAttribute("xmlns:table", "http://openoffice.org/2000/table");
root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing");
root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format");
root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle");
root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg");
root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart");
root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d");
root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML");
root.setAttribute("xmlns:form", "http://openoffice.org/2000/form");
root.setAttribute("xmlns:script", "http://openoffice.org/2000/script");
root.setAttribute("office:class", getOfficeClassAttribute());
root.setAttribute("office:version", "1.0");
return doc;
}
/**
* Return the <i>office:class</i> attribute value.
*
* @return The attribute value.
*/
// not really used...
protected String getOfficeClassAttribute() { return ""; }
/**
* <p>Hacked code to filter <!DOCTYPE> tag before
* sending stream to parser.</p>
*
* <p>This hacked code needs to be changed later on.</p>
*
* <p>Issue: using current jaxp1.0 parser, there is no way
* to turn off processing of dtds. Current set of dtds
* have bugs, processing them will throw exceptions.</p>
*
* <p>This is a simple hack that assumes the whole <!DOCTYPE>
* tag are all in the same line. This is sufficient for
* current StarOffice 6.0 generated XML files. Since this
* hack really needs to go away, I don't want to spend
* too much time in making it a perfect hack.</p>
* FIX (HJ): Removed requirement for DOCTYPE to be in one line
* FIX (HJ): No longer removes newlines
*
* @param is <code>InputStream</code> to be filtered.
*
* @return Reader value without the <!DOCTYPE> tag.
*
* @throws IOException If any I/O error occurs.
*/
private static Reader hack(InputStream is) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
StringBuffer buffer = new StringBuffer();
String str = null;
while ((str = br.readLine()) != null) {
int sIndex = str.indexOf("<!DOCTYPE");
if (sIndex > -1) {
buffer.append(str.substring(0, sIndex));
int eIndex = str.indexOf('>', sIndex + 8 );
if (eIndex > -1) {
buffer.append(str.substring(eIndex + 1, str.length()));
// FIX (HJ): Preserve the newline
buffer.append("\n");
} else {
// FIX (HJ): More than one line. Search for '>' in following lines
boolean bOK = false;
while ((str = br.readLine())!=null) {
eIndex = str.indexOf('>');
if (eIndex>-1) {
buffer.append(str.substring(eIndex+1));
// FIX (HJ): Preserve the newline
buffer.append("\n");
bOK = true;
break;
}
}
if (!bOK) { throw new IOException("Invalid XML"); }
}
} else {
buffer.append(str);
// FIX (HJ): Preserve the newline
buffer.append("\n");
}
}
StringReader r = new StringReader(buffer.toString());
return r;
}
/**
* <p>Transform the InputStream to a Reader Stream.</p>
*
* <p>This hacked code needs to be changed later on.</p>
*
* <p>Issue: the new oasis input file stream means
* that the old input stream fails. see #i33702# </p>
*
* @param is <code>InputStream</code> to be filtered.
*
* @return Reader value of the InputStream().
*
* @throws IOException If any I/O error occurs.
*/
private static Reader secondHack(InputStream is) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
char[] charArray = new char[4096];
StringBuffer sBuf = new StringBuffer();
int n = 0;
while ((n=br.read(charArray, 0, charArray.length)) > 0)
sBuf.append(charArray, 0, n);
// ensure there is no trailing garbage after the end of the stream.
int sIndex = sBuf.lastIndexOf("</office:document>");
sBuf.delete(sIndex, sBuf.length());
sBuf.append("</office:document>");
StringReader r = new StringReader(sBuf.toString());
return r;
}
/**
* Method to create the initial entries in the manifest.xml file stored
* in an SX? file.
*/
private void initManifestDOM() throws IOException {
try {
DocumentBuilder builder = factory.newDocumentBuilder();
DOMImplementation domImpl = builder.getDOMImplementation();
DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT,
"-//OpenOffice.org//DTD Manifest 1.0//EN",
"Manifest.dtd");
manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType);
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
// Add the <manifest:manifest> entry
Element manifestRoot = manifestDoc.getDocumentElement();
manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest");
Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE);
docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/");
docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType());
manifestRoot.appendChild(docRoot);
}
// TODO: We need these because we implement OutputFile (but in fact we shouldn't)
public String getMIMEType() {
return "";
}
public boolean isMasterDocument() {
return false;
}
}