diff --git a/source/distro/changelog.txt b/source/distro/changelog.txt index 09b8930..a984643 100644 --- a/source/distro/changelog.txt +++ b/source/distro/changelog.txt @@ -2,6 +2,11 @@ Changelog for Writer2LaTeX version 1.2 -> 1.4 ---------- version 1.3.1 alpha ---------- +[all] Optimized the parsing of the source document saving some time and space (several intermediate steps and large byte arrays + are now avoided) + +[all] API change: The converters can now convert directly from a DOM tree + [all] Removed unused code in writer2latex.xmerge [w2x] Moved localized strings to .properties files \ No newline at end of file diff --git a/source/java/org/openoffice/da/comp/w2lcommon/filter/ExportFilterBase.java b/source/java/org/openoffice/da/comp/w2lcommon/filter/ExportFilterBase.java index 489cbac..d389e60 100644 --- a/source/java/org/openoffice/da/comp/w2lcommon/filter/ExportFilterBase.java +++ b/source/java/org/openoffice/da/comp/w2lcommon/filter/ExportFilterBase.java @@ -16,22 +16,18 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2010 by Henrik Just + * Copyright: 2002-2012 by Henrik Just * * All Rights Reserved. * - * Version 1.2 (2010-05-17) + * Version 1.4 (2012-03-22) * */ - -// This file was originally based on OOo's XMergeBridge, which is (c) by Sun Microsystems package org.openoffice.da.comp.w2lcommon.filter; -import com.sun.star.lib.uno.adapter.XInputStreamToInputStreamAdapter; import com.sun.star.lib.uno.adapter.XOutputStreamToOutputStreamAdapter; -//import com.sun.star.beans.PropertyValue; import com.sun.star.io.XInputStream; import com.sun.star.io.XOutputStream; import com.sun.star.lang.XMultiServiceFactory; @@ -43,461 +39,345 @@ import com.sun.star.ucb.XSimpleFileAccess2; import com.sun.star.uno.Type; import com.sun.star.uno.UnoRuntime; import com.sun.star.uno.XComponentContext; -//import com.sun.star.xml.sax.InputSource; -//import com.sun.star.xml.sax.XParser; import com.sun.star.xml.sax.XDocumentHandler; import com.sun.star.xml.XExportFilter; import org.openoffice.da.comp.w2lcommon.helper.MessageBox; -//import org.openoffice.da.comp.w2lcommon.helper.PropertyHelper; import writer2latex.api.Converter; import writer2latex.api.ConverterFactory; import writer2latex.api.ConverterResult; import writer2latex.api.OutputFile; import writer2latex.util.Misc; +import writer2latex.util.SimpleDOMBuilder; +import java.io.IOException; +import java.io.OutputStream; import java.util.Iterator; -//import java.util.Enumeration; -//import java.util.Vector; -import java.io.*; -//import javax.xml.parsers.*; -//import org.xml.sax.SAXException; -//import java.net.URI; -/** This class provides an abstract uno component which implements an XExportFilter. - * The filter is actually generic and only then constructor and 3 strings needs - * to changed by the subclass. +/** This class provides an abstract UNO component which implements an XExportFilter. + * The filter is actually generic and only the constructor and 3 strings needs + * to be changed by the subclass. */ public abstract class ExportFilterBase implements - XExportFilter, - XServiceName, - XServiceInfo, - XDocumentHandler, - XTypeProvider { - - /** Service name for the component */ - public static final String __serviceName = ""; - - /** Implementation name for the component */ - public static final String __implementationName = ""; - - /** Filter name to include in error messages */ +XExportFilter, +XServiceName, +XServiceInfo, +XDocumentHandler, +XTypeProvider { + + /** Service name for the component */ + public static final String __serviceName = ""; + + /** Implementation name for the component */ + public static final String __implementationName = ""; + + /** Filter name to include in error messages */ public String __displayName = ""; - private static XComponentContext xComponentContext = null; - protected static XMultiServiceFactory xMSF; - private static XInputStream xInStream =null; - private static XOutputStream xOutStream=null; - private static XOutputStream xos = null; - private static String sdMime=null; - private static String sURL=""; - - private Object filterData; - private XSimpleFileAccess2 sfa2; + private static XComponentContext xComponentContext = null; + protected static XMultiServiceFactory xMSF; + private SimpleDOMBuilder domBuilder = new SimpleDOMBuilder(); + private static XOutputStream xos = null; + private static String sdMime=null; + private static String sURL=""; - /** We need to get the Service Manager from the Component context to - * instantiate certain services, hence this constructor. - * The subclass must override this to set xMSF properly from the reigstration class - */ - public ExportFilterBase(XComponentContext xComponentContext1) { - xComponentContext = xComponentContext1; - xMSF = null; - } - - - // Some utility methods: - - String getFileName(String origName) { - String name=null; - if (origName !=null) { - if(origName.equalsIgnoreCase("")) - name = "OutFile"; - else { - if (origName.lastIndexOf("/")>=0) { - origName=origName.substring(origName.lastIndexOf("/")+1,origName.length()); - } - if (origName.lastIndexOf(".")>=0) { - name = origName.substring(0,(origName.lastIndexOf("."))); - } - else { - name=origName; - } - } - } - else{ - name = "OutFile"; - } + private Object filterData; + private XSimpleFileAccess2 sfa2; - return name; - } - - public String needsMask(String origString) { - StringBuffer buf = new StringBuffer(); - int nLen = origString.length(); - for (int i=0; i'){ - buf.append(">"); - } - //else if (c=='\u0009' || c=='\n' || c=='\r' || (c>='\u0020' && c<='\uD7FF') || (c>='\uE000' && c<'\uFFFD')) { - else if (c=='\u0009' || c=='\n' || c=='\r' || (c>='\u0020' && c<'\uFFFD')) { - // Valid characters found at xml.com - // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] - // (the latter are represented as surrogate pairs (#xD800-#xDFFF) - buf.append(c); - } - else { - // Found illegal character - //System.out.println("Illegal character : "+Integer.toHexString(c)); - } - } - return buf.toString(); - - } - - - // Implementation of XExportFilter: - - public boolean exporter(com.sun.star.beans.PropertyValue[] aSourceData, - java.lang.String[] msUserData) throws com.sun.star.uno.RuntimeException{ - - sURL=null; - filterData = null; - - // Get user data from configuration (type detection) - //String udConvertClass=msUserData[0]; - //String udImport =msUserData[2]; - //String udExport =msUserData[3]; - sdMime = msUserData[5]; - - // Get source data (only the OutputStream and the URL are actually used) - com.sun.star.beans.PropertyValue[] pValue = aSourceData; - for (int i = 0 ; i < pValue.length; i++) { - try{ - if (pValue[i].Name.compareTo("OutputStream")==0){ - xos=(com.sun.star.io.XOutputStream)AnyConverter.toObject(new Type(com.sun.star.io.XOutputStream.class), pValue[i].Value); - } - //if (pValue[i].Name.compareTo("FileName")==0){ - // sFileName=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value); - //} - if (pValue[i].Name.compareTo("URL")==0){ - sURL=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value); - } - //if (pValue[i].Name.compareTo("Title")==0){ - // title=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value); - //} - if (pValue[i].Name.compareTo("FilterData")==0) { - filterData = pValue[i].Value; - } - } - catch(com.sun.star.lang.IllegalArgumentException AnyExec){ - System.err.println("\nIllegalArgumentException "+AnyExec); - } - } - - - if (sURL==null){ - sURL=""; - } - - // Create a pipe to be used by the XDocumentHandler implementation: - try { - Object xPipeObj=xMSF.createInstance("com.sun.star.io.Pipe"); - xInStream = (XInputStream) UnoRuntime.queryInterface( - XInputStream.class , xPipeObj ); - xOutStream = (XOutputStream) UnoRuntime.queryInterface( - XOutputStream.class , xPipeObj ); - } - catch (Exception e){ - System.err.println("Exception "+e); - return false; - } - - return true; - } - - - - // Implementation of XDocumentHandler: - // Flat xml is created by the sax events and passed through the pipe - // created by exporter() - - public void startDocument () { - //Do nothing - } - - public void endDocument()throws com.sun.star.uno.RuntimeException { - try{ - xOutStream.closeOutput(); - convert(xInStream,xos); - } - catch (IOException e){ - MessageBox msgBox = new MessageBox(xComponentContext); - msgBox.showMessage(__displayName+": IO error in conversion", - e.toString()+" at "+e.getStackTrace()[0].toString()); - throw new com.sun.star.uno.RuntimeException(e.getMessage()); - } - catch (Exception e){ - MessageBox msgBox = new MessageBox(xComponentContext); - msgBox.showMessage(__displayName+": Internal error in conversion", - e.toString()+" at "+e.getStackTrace()[0].toString()); - throw new com.sun.star.uno.RuntimeException(__displayName+" Exception"); - } - } - - - - public void startElement (String str, com.sun.star.xml.sax.XAttributeList xattribs) - { - - str="<".concat(str); - if (xattribs !=null) - { - str= str.concat(" "); - int len=xattribs.getLength(); - for (short i=0;i"); - try{ - xOutStream.writeBytes(str.getBytes("UTF-8")); - } - catch (Exception e){ - System.err.println("\n"+e); - } - + /** We need to get the Service Manager from the Component context to + * instantiate certain services, hence this constructor. + * The subclass must override this to set xMSF properly from the registration class + */ + public ExportFilterBase(XComponentContext xComponentContext1) { + xComponentContext = xComponentContext1; + xMSF = null; } - public void endElement(String str){ - - str=""); - try{ - xOutStream.writeBytes(str.getBytes("UTF-8")); - - } - catch (Exception e){ - System.err.println("\n"+e); - } - - - } - public void characters(String str){ - str=needsMask(str); - try{ - xOutStream.writeBytes(str.getBytes("UTF-8")); - } - catch (Exception e){ - System.err.println("\n"+e); - } - - - } - - public void ignorableWhitespace(String str){ - - - } - public void processingInstruction(String aTarget, String aData){ - - } - - public void setDocumentLocator(com.sun.star.xml.sax.XLocator xLocator){ - - } + // Utility method: - - - // This is the actual conversion method, using Writer2LaTeX to convert - // the flat xml recieved from the XInputStream, and writing the result - // to the XOutputStream. The XMLExporter does not support export to - // compound documents with multiple output files; so the main file - // is written to the XOutStream and other files are written using ucb. - - public void convert (com.sun.star.io.XInputStream xml,com.sun.star.io.XOutputStream exportStream) - throws com.sun.star.uno.RuntimeException, IOException { - - // Initialise the file access - sfa2 = null; - try { - Object sfaObject = xComponentContext.getServiceManager().createInstanceWithContext( - "com.sun.star.ucb.SimpleFileAccess", xComponentContext); - sfa2 = (XSimpleFileAccess2) UnoRuntime.queryInterface(XSimpleFileAccess2.class, sfaObject); - } - catch (com.sun.star.uno.Exception e) { - // failed to get SimpleFileAccess service (should not happen) - } - - // Get base name from the url provided by OOo - String sName= getFileName(sURL); - - // Adapter for input stream (OpenDocument flat xml) - XInputStreamToInputStreamAdapter xis =new XInputStreamToInputStreamAdapter(xml); - - // Adapter for output stream (Main output file) - XOutputStreamToOutputStreamAdapter newxos =new XOutputStreamToOutputStreamAdapter(exportStream); - - // Create converter - Converter converter = ConverterFactory.createConverter(sdMime); - if (converter==null) { - throw new com.sun.star.uno.RuntimeException("Failed to create converter to "+sdMime); - } - - // Apply the FilterData to the converter - if (filterData!=null) { - FilterDataParser fdp = new FilterDataParser(xComponentContext); - fdp.applyFilterData(filterData,converter); - } - - // Do conversion - converter.setGraphicConverter(new GraphicConverterImpl(xComponentContext)); - - ConverterResult dataOut = null; - //try { - dataOut = converter.convert(xis,Misc.makeFileName(sName)); - //} - //catch (IOException e) { - // Fail silently - //} - - // Write out files - Iterator docEnum = dataOut.iterator(); - - // Remove the file name part of the url - String sNewURL = null; - if (sURL.lastIndexOf("/")>-1) { - // Take the url up to and including the last slash - sNewURL = sURL.substring(0,sURL.lastIndexOf("/")+1); - } - else { - // The url does not include a path; this should not really happen, - // but in this case we will write to the current default directory - sNewURL = ""; - } - - while (docEnum.hasNext() && sURL.startsWith("file:")) { - OutputFile docOut = docEnum.next(); - - if (dataOut.getMasterDocument()==docOut) { - // The master document is written to the XOutStream supplied - // by the XMLFilterAdaptor - docOut.write(newxos); - newxos.flush(); - newxos.close(); - } - else { - // Additional documents are written directly using ucb - - // Get the file name and the (optional) directory name - String sFullFileName = Misc.makeHref(docOut.getFileName()); - String sDirName = ""; - String sFileName = sFullFileName; - int nSlash = sFileName.indexOf("/"); - if (nSlash>-1) { - sDirName = sFileName.substring(0,nSlash); - sFileName = sFileName.substring(nSlash+1); - } - - try{ - // Create subdirectory if required - if (sDirName.length()>0 && !sfa2.exists(sNewURL+sDirName)) { - sfa2.createFolder(sNewURL+sDirName); - } - - // writeFile demands an InputStream, so we need a pipe - Object xPipeObj=xMSF.createInstance("com.sun.star.io.Pipe"); - XInputStream xInStream - = (XInputStream) UnoRuntime.queryInterface(XInputStream.class, xPipeObj ); - XOutputStream xOutStream - = (XOutputStream) UnoRuntime.queryInterface(XOutputStream.class, xPipeObj ); - OutputStream outStream = new XOutputStreamToOutputStreamAdapter(xOutStream); - // Feed the pipe with content... - docOut.write(outStream); - outStream.flush(); - outStream.close(); - xOutStream.closeOutput(); - // ...and then write the content to the url - sfa2.writeFile(sNewURL+sFullFileName,xInStream); - } - catch (Throwable e){ - MessageBox msgBox = new MessageBox(xComponentContext); - msgBox.showMessage(__displayName+": Error writing files", - e.toString()+" at "+e.getStackTrace()[0].toString()); - } - } - + String getFileName(String origName) { + String name=null; + if (origName !=null) { + if(origName.equalsIgnoreCase("")) + name = "OutFile"; + else { + if (origName.lastIndexOf("/")>=0) { + origName=origName.substring(origName.lastIndexOf("/")+1,origName.length()); + } + if (origName.lastIndexOf(".")>=0) { + name = origName.substring(0,(origName.lastIndexOf("."))); + } + else { + name=origName; + } } - - } + } + else{ + name = "OutFile"; + } + + return name; + } + + // Implementation of XExportFilter: + + public boolean exporter(com.sun.star.beans.PropertyValue[] aSourceData, + java.lang.String[] msUserData) throws com.sun.star.uno.RuntimeException{ + + sURL=null; + filterData = null; + + // Get user data from configuration (type detection) + //String udConvertClass=msUserData[0]; + //String udImport =msUserData[2]; + //String udExport =msUserData[3]; + sdMime = msUserData[5]; + + // Get source data (only the OutputStream and the URL are actually used) + com.sun.star.beans.PropertyValue[] pValue = aSourceData; + for (int i = 0 ; i < pValue.length; i++) { + try{ + if (pValue[i].Name.compareTo("OutputStream")==0){ + xos=(com.sun.star.io.XOutputStream)AnyConverter.toObject(new Type(com.sun.star.io.XOutputStream.class), pValue[i].Value); + } + //if (pValue[i].Name.compareTo("FileName")==0){ + // sFileName=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value); + //} + if (pValue[i].Name.compareTo("URL")==0){ + sURL=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value); + } + //if (pValue[i].Name.compareTo("Title")==0){ + // title=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value); + //} + if (pValue[i].Name.compareTo("FilterData")==0) { + filterData = pValue[i].Value; + } + } + catch(com.sun.star.lang.IllegalArgumentException AnyExec){ + System.err.println("\nIllegalArgumentException "+AnyExec); + } + } + + if (sURL==null){ + sURL=""; + } + + return true; + } + + // Implementation of XDocumentHandler: + // A flat XML DOM tree is created by the SAX events and finally converted + + public void startDocument () { + //Do nothing + } + + public void endDocument()throws com.sun.star.uno.RuntimeException { + try{ + convert(domBuilder.getDOM(),xos); + } + catch (IOException e){ + MessageBox msgBox = new MessageBox(xComponentContext); + msgBox.showMessage(__displayName+": IO error in conversion", + e.toString()+" at "+e.getStackTrace()[0].toString()); + throw new com.sun.star.uno.RuntimeException(e.getMessage()); + } + catch (Exception e){ + MessageBox msgBox = new MessageBox(xComponentContext); + msgBox.showMessage(__displayName+": Internal error in conversion", + e.toString()+" at "+e.getStackTrace()[0].toString()); + throw new com.sun.star.uno.RuntimeException(__displayName+" Exception"); + } + } - // Implement methods from interface XTypeProvider - // Implementation of XTypeProvider - - public com.sun.star.uno.Type[] getTypes() { - Type[] typeReturn = {}; - try { - typeReturn = new Type[] { - new Type( XTypeProvider.class ), - new Type( XExportFilter.class ), - new Type( XServiceName.class ), - new Type( XServiceInfo.class ) }; - } - catch( Exception exception ) { - - } + public void startElement (String sTagName, com.sun.star.xml.sax.XAttributeList xAttribs) { + domBuilder.startElement(sTagName); + int nLen = xAttribs.getLength(); + for (short i=0;i docEnum = dataOut.iterator(); + + // Remove the file name part of the URL + String sNewURL = null; + if (sURL.lastIndexOf("/")>-1) { + // Take the URL up to and including the last slash + sNewURL = sURL.substring(0,sURL.lastIndexOf("/")+1); + } + else { + // The URL does not include a path; this should not really happen, + // but in this case we will write to the current default directory + sNewURL = ""; + } + + while (docEnum.hasNext() && sURL.startsWith("file:")) { + OutputFile docOut = docEnum.next(); + + if (dataOut.getMasterDocument()==docOut) { + // The master document is written to the XOutStream supplied + // by the XMLFilterAdaptor + docOut.write(newxos); + newxos.flush(); + newxos.close(); + } + else { + // Additional files are written directly using UCB + + // Get the file name and the (optional) directory name + String sFullFileName = Misc.makeHref(docOut.getFileName()); + String sDirName = ""; + String sFileName = sFullFileName; + int nSlash = sFileName.indexOf("/"); + if (nSlash>-1) { + sDirName = sFileName.substring(0,nSlash); + sFileName = sFileName.substring(nSlash+1); + } + + try{ + // Create subdirectory if required + if (sDirName.length()>0 && !sfa2.exists(sNewURL+sDirName)) { + sfa2.createFolder(sNewURL+sDirName); + } + + // writeFile demands an InputStream, so we need a pipe + Object xPipeObj=xMSF.createInstance("com.sun.star.io.Pipe"); + XInputStream xInStream + = (XInputStream) UnoRuntime.queryInterface(XInputStream.class, xPipeObj ); + XOutputStream xOutStream + = (XOutputStream) UnoRuntime.queryInterface(XOutputStream.class, xPipeObj ); + OutputStream outStream = new XOutputStreamToOutputStreamAdapter(xOutStream); + // Feed the pipe with content... + docOut.write(outStream); + outStream.flush(); + outStream.close(); + xOutStream.closeOutput(); + // ...and then write the content to the URL + sfa2.writeFile(sNewURL+sFullFileName,xInStream); + } + catch (Throwable e){ + MessageBox msgBox = new MessageBox(xComponentContext); + msgBox.showMessage(__displayName+": Error writing files", + e.toString()+" at "+e.getStackTrace()[0].toString()); + } + } + } + } + + + // Implement methods from interface XTypeProvider + // Implementation of XTypeProvider + + public com.sun.star.uno.Type[] getTypes() { + Type[] typeReturn = {}; + + try { + typeReturn = new Type[] { + new Type( XTypeProvider.class ), + new Type( XExportFilter.class ), + new Type( XServiceName.class ), + new Type( XServiceInfo.class ) }; + } + catch( Exception exception ) { + + } + + return( typeReturn ); + } + + + public byte[] getImplementationId() { + byte[] byteReturn = {}; + + byteReturn = new String( "" + this.hashCode() ).getBytes(); + + return( byteReturn ); + } + + // Implement method from interface XServiceName + public String getServiceName() { + return( __serviceName ); + } + + // Implement methods from interface XServiceInfo + public boolean supportsService(String stringServiceName) { + return( stringServiceName.equals( __serviceName ) ); + } + + public String getImplementationName() { + return __implementationName; + } + + public String[] getSupportedServiceNames() { + String[] stringSupportedServiceNames = { __serviceName }; + return( stringSupportedServiceNames ); + } - // Implement method from interface XServiceName - public String getServiceName() { - return( __serviceName ); - } - - // Implement methods from interface XServiceInfo - public boolean supportsService(String stringServiceName) { - return( stringServiceName.equals( __serviceName ) ); - } - - public String getImplementationName() { - return __implementationName; - //return( W2LExportFilter.class.getName() ); - } - - public String[] getSupportedServiceNames() { - String[] stringSupportedServiceNames = { __serviceName }; - return( stringSupportedServiceNames ); - } - } - - - diff --git a/source/java/writer2latex/Application.java b/source/java/writer2latex/Application.java index eff440d..a43a1f3 100644 --- a/source/java/writer2latex/Application.java +++ b/source/java/writer2latex/Application.java @@ -87,9 +87,11 @@ public final class Application { */ public static final void main (String[] args){ try { + long time = System.currentTimeMillis(); Application app = new Application(); app.parseCommandLine(args); app.doConversion(); + System.out.println("Total conversion time was "+(System.currentTimeMillis()-time)+" miliseconds"); } catch (IllegalArgumentException ex) { String msg = ex.getMessage(); showUsage(msg); diff --git a/source/java/writer2latex/api/Converter.java b/source/java/writer2latex/api/Converter.java index 0a2c1be..ec6b453 100644 --- a/source/java/writer2latex/api/Converter.java +++ b/source/java/writer2latex/api/Converter.java @@ -16,11 +16,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2011 by Henrik Just + * Copyright: 2002-2012 by Henrik Just * * All Rights Reserved. * - * Version 1.2 (2011-06-05) + * Version 1.4 (2012-03-21) * */ @@ -137,5 +137,17 @@ public interface Converter { */ public ConverterResult convert(File source, String sTargetFileName) throws FileNotFoundException, IOException; + + /** Convert a document + * + * @param dom a DOM tree representing the document as flat XML + * @param sTargetFileName the file name to use for the converted document + * (if the converted document is a compound document consisting consisting + * of several files, this name will be used for the master document) + * @return a ConverterResult containing the converted document + * @throws IOException if some exception occurs while reading the document + */ + public ConverterResult convert(org.w3c.dom.Document dom, String sTargetFileName) + throws IOException; } diff --git a/source/java/writer2latex/api/ConverterFactory.java b/source/java/writer2latex/api/ConverterFactory.java index e68183f..47a3fe9 100644 --- a/source/java/writer2latex/api/ConverterFactory.java +++ b/source/java/writer2latex/api/ConverterFactory.java @@ -20,7 +20,7 @@ * * All Rights Reserved. * - * Version 1.4 (2012-03-19) + * Version 1.4 (2012-03-27) * */ @@ -33,7 +33,7 @@ public class ConverterFactory { // Version information private static final String VERSION = "1.3.1"; - private static final String DATE = "2012-03-19"; + private static final String DATE = "2012-03-27"; /** Return the Writer2LaTeX version in the form * (major version).(minor version).(patch level)
diff --git a/source/java/writer2latex/base/ConverterBase.java b/source/java/writer2latex/base/ConverterBase.java index 3617dc5..d6a7864 100644 --- a/source/java/writer2latex/base/ConverterBase.java +++ b/source/java/writer2latex/base/ConverterBase.java @@ -16,11 +16,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2011 by Henrik Just + * Copyright: 2002-2012 by Henrik Just * * All Rights Reserved. * - * Version 1.2 (2011-02-19) + * Version 1.4 (2012-03-23) * */ @@ -35,11 +35,11 @@ import writer2latex.api.GraphicConverter; import writer2latex.api.Converter; import writer2latex.api.ConverterResult; import writer2latex.api.OutputFile; +import writer2latex.office.EmbeddedObject; import writer2latex.office.ImageLoader; import writer2latex.office.MetaData; +import writer2latex.office.OfficeDocument; import writer2latex.office.OfficeReader; -import writer2latex.xmerge.EmbeddedObject; -import writer2latex.xmerge.OfficeDocument; /**

Abstract base implementation of writer2latex.api.Converter

*/ @@ -93,8 +93,19 @@ public abstract class ConverterBase implements Converter { public ConverterResult convert(InputStream is, String sTargetFileName) throws IOException { // Read document - odDoc = new OfficeDocument("InFile"); + odDoc = new OfficeDocument(); odDoc.read(is); + return convert(sTargetFileName); + } + + public ConverterResult convert(org.w3c.dom.Document dom, String sTargetFileName) throws IOException { + // Read document + odDoc = new OfficeDocument(); + odDoc.read(dom); + return convert(sTargetFileName); + } + + private ConverterResult convert(String sTargetFileName) throws IOException { ofr = new OfficeReader(odDoc,false); metaData = new MetaData(odDoc); imageLoader = new ImageLoader(odDoc,true); diff --git a/source/java/writer2latex/latex/DrawConverter.java b/source/java/writer2latex/latex/DrawConverter.java index 97adddd..9ba500f 100644 --- a/source/java/writer2latex/latex/DrawConverter.java +++ b/source/java/writer2latex/latex/DrawConverter.java @@ -33,12 +33,12 @@ import org.w3c.dom.Document; import org.w3c.dom.Element; //import org.w3c.dom.Node; -import writer2latex.xmerge.EmbeddedObject; -import writer2latex.xmerge.EmbeddedXMLObject; import writer2latex.latex.util.BeforeAfter; import writer2latex.latex.util.Context; //import writer2latex.office.ImageLoader; +import writer2latex.office.EmbeddedObject; +import writer2latex.office.EmbeddedXMLObject; import writer2latex.office.MIMETypes; import writer2latex.office.OfficeReader; import writer2latex.office.StyleWithProperties; diff --git a/source/java/writer2latex/latex/MathmlConverter.java b/source/java/writer2latex/latex/MathmlConverter.java index 674da80..f374243 100644 --- a/source/java/writer2latex/latex/MathmlConverter.java +++ b/source/java/writer2latex/latex/MathmlConverter.java @@ -34,13 +34,13 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; //import writer2latex.latex.i18n.I18n; +import writer2latex.office.EmbeddedObject; +import writer2latex.office.EmbeddedXMLObject; import writer2latex.office.MIMETypes; import writer2latex.office.OfficeReader; import writer2latex.office.TableReader; import writer2latex.office.XMLString; import writer2latex.util.Misc; -import writer2latex.xmerge.EmbeddedObject; -import writer2latex.xmerge.EmbeddedXMLObject; /** * This class converts mathml nodes to LaTeX. diff --git a/source/java/writer2latex/office/EmbeddedBinaryObject.java b/source/java/writer2latex/office/EmbeddedBinaryObject.java new file mode 100644 index 0000000..087271c --- /dev/null +++ b/source/java/writer2latex/office/EmbeddedBinaryObject.java @@ -0,0 +1,60 @@ +/************************************************************************ + * + * EmbeddedBinaryObject.java + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * Copyright: 2002-2012 by Henrik Just + * + * All Rights Reserved. + * + * Version 1.4 (2012-03-26) + * + */ + +package writer2latex.office; + +import writer2latex.util.SimpleZipReader; + +/** + * This class represents an embedded object with a binary representation in an ODF package document + */ +public class EmbeddedBinaryObject extends EmbeddedObject { + + /** The object's binary representation. */ + private byte[] objData = null; + + /** + * Package private constructor for use when reading an object from a + * package ODF file + * + * @param name The name of the object. + * @param type The MIME-type of the object. + * @param source A SimpleZipReader containing the object + */ + protected EmbeddedBinaryObject(String sName, String sType, SimpleZipReader source) { + super(sName,sType); + objData = source.getEntry(sName); + } + + /** Get the binary data for this object + * + * @return A byte array containing the object's data. + */ + public byte[] getBinaryData() { + return objData; + } + +} diff --git a/source/java/writer2latex/office/EmbeddedObject.java b/source/java/writer2latex/office/EmbeddedObject.java new file mode 100644 index 0000000..1ced2d2 --- /dev/null +++ b/source/java/writer2latex/office/EmbeddedObject.java @@ -0,0 +1,61 @@ +/************************************************************************ + * + * EmbeddedObject.java + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * Copyright: 2002-2012 by Henrik Just + * + * All Rights Reserved. + * + * Version 1.4 (2012-03-27) + * + */ + +package writer2latex.office; + +/** This class represents and embedded object within an ODF package document + */ +public abstract class EmbeddedObject { + private String sName; + private String sType; + + /** Construct a new embedded object + * + * @param sName The name of the object. + * @param sType The MIME-type of the object. + */ + protected EmbeddedObject(String name, String type) { + sName = name; + sType = type; + } + + /** Get the name of the embedded object represented by this instance. + * The name refers to the manifest.xml file + * + * @return The name of the object. + */ + public final String getName() { + return sName; + } + + /** Get the MIME type of the embedded object represented by this instance. + * The MIME type refers to the manifest.xml file + */ + public final String getType() { + return sType; + } + +} \ No newline at end of file diff --git a/source/java/writer2latex/office/EmbeddedXMLObject.java b/source/java/writer2latex/office/EmbeddedXMLObject.java new file mode 100644 index 0000000..4a14b8f --- /dev/null +++ b/source/java/writer2latex/office/EmbeddedXMLObject.java @@ -0,0 +1,122 @@ +/************************************************************************ + * + * EmbeddedXMLObject.java + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * Copyright: 2002-2012 by Henrik Just + * + * All Rights Reserved. + * + * Version 1.4 (2012-03-27) + * + */ + +package writer2latex.office; + +import java.io.IOException; + +import org.w3c.dom.Document; + +import org.xml.sax.SAXException; + +import writer2latex.util.SimpleZipReader; + +/** This class represents those embedded objects in an ODF document that have an XML representation: + * Formulas, charts, spreadsheets, text, drawings and presentations. + * These object types are stored using a combination of content, settings and styles XML files. + */ +public class EmbeddedXMLObject extends EmbeddedObject { + + // Byte entries for the XML streams of this object + private byte[] contentBytes = null; + private byte[] settingsBytes = null; + private byte[] stylesBytes = null; + + // DOM trees representing the XML parts of this object + protected Document contentDOM = null; + protected Document settingsDOM = null; + protected Document stylesDOM = null; + + /** Read an object from an ODF package document + * + * @param sName The name of the object. + * @param sType The MIME-type of the object. + * @param source A ZIP reader providing the contents of the package + */ + protected EmbeddedXMLObject(String sName, String sType, SimpleZipReader source) { + super(sName, sType); + // Read the bytes, but defer parsing until required (at that point, the bytes are nullified) + contentBytes = source.getEntry(sName+"/"+OfficeDocument.CONTENTXML); + settingsBytes = source.getEntry(sName+"/"+OfficeDocument.SETTINGSXML); + stylesBytes = source.getEntry(sName+"/"+OfficeDocument.STYLESXML); + } + + /** + * Returns the content data for this embedded object. + * + * @return DOM representation of "content.xml" + * + * @throws SAXException If any parser error occurs + * @throws IOException If any IO error occurs + */ + public Document getContentDOM() throws SAXException, IOException { + if (contentDOM==null) { + contentDOM=getDOM(contentBytes); + contentBytes=null; + } + return contentDOM; + } + + /** + * Returns the settings data for this embedded object. + * + * @return DOM representation of "settings.xml" + * + * @throws SAXException If any parser error occurs + * @throws IOException If any IO error occurs + */ + public Document getSettingsDOM() throws SAXException, IOException { + if (settingsDOM==null) { + settingsDOM=getDOM(settingsBytes); + settingsBytes=null; + } + return settingsDOM; + } + + /** + * Returns the style data for this embedded object. + * + * @return DOM representation of "styles.xml" + * + * @throws SAXException If any parser error occurs + * @throws IOException If any IO error occurs + */ + public Document getStylesDOM() throws SAXException, IOException { + if (stylesDOM==null) { + stylesDOM = getDOM(stylesBytes); + stylesBytes=null; + } + return stylesDOM; + } + + private Document getDOM(byte[] data) throws SAXException, IOException { + if (data!=null) { + return OfficeDocument.parse(data); + } + return null; + } + +} \ No newline at end of file diff --git a/source/java/writer2latex/office/ImageLoader.java b/source/java/writer2latex/office/ImageLoader.java index e7a359c..0ae9449 100644 --- a/source/java/writer2latex/office/ImageLoader.java +++ b/source/java/writer2latex/office/ImageLoader.java @@ -40,9 +40,6 @@ import writer2latex.api.GraphicConverter; import writer2latex.util.Base64; import writer2latex.util.Misc; import writer2latex.xmerge.BinaryGraphicsDocument; -import writer2latex.xmerge.EmbeddedObject; -import writer2latex.xmerge.EmbeddedBinaryObject; -import writer2latex.xmerge.OfficeDocument; //import writer2latex.util.*; diff --git a/source/java/writer2latex/office/MetaData.java b/source/java/writer2latex/office/MetaData.java index d78c7b5..79c3642 100644 --- a/source/java/writer2latex/office/MetaData.java +++ b/source/java/writer2latex/office/MetaData.java @@ -36,7 +36,6 @@ import org.w3c.dom.NodeList; import writer2latex.util.*; //import writer2latex.office.*; -import writer2latex.xmerge.OfficeDocument; /** *

This class represents the metadata of an OOo Writer document.

diff --git a/source/java/writer2latex/office/OfficeDocument.java b/source/java/writer2latex/office/OfficeDocument.java new file mode 100644 index 0000000..77e4433 --- /dev/null +++ b/source/java/writer2latex/office/OfficeDocument.java @@ -0,0 +1,349 @@ +/************************************************************************ + * + * OfficeDocument.java + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * Copyright: 2002-2012 by Henrik Just + * + * All Rights Reserved. + * + * Version 1.4 (2012-03-27) + * + */ + +package writer2latex.office; + +import java.io.BufferedInputStream; +import java.io.InputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; +import java.util.HashMap; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.w3c.dom.Element; +import org.w3c.dom.Document; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +import writer2latex.util.SimpleSAXHandler; +import writer2latex.util.SimpleZipReader; + +/** + * This class implements reading of ODF files from various sources + */ +public class OfficeDocument { + // File names for the XML streams in a package document + protected final static String CONTENTXML = "content.xml"; + protected final static String STYLESXML = "styles.xml"; + protected final static String SETTINGSXML = "settings.xml"; + private final static String METAXML = "meta.xml"; + private final static String MANIFESTXML = "META-INF/manifest.xml"; + + // Some tag and attribute names in manifest.xml + private final static String MANIFEST_FILE_ENTRY = "manifest:file-entry"; + private final static String MANIFEST_MEDIA_TYPE = "manifest:media-type"; + private final static String MANIFEST_FULL_PATH = "manifest:full-path"; + + /** DOM Document of content.xml. */ + private Document contentDoc = null; + + /** DOM Document of meta.xml. */ + private Document metaDoc = null; + + /** DOM Document of settings.xml. */ + private Document settingsDoc = null; + + /** DOM Document of content.xml. */ + private Document styleDoc = null; + + /** DOM Document of META-INF/manifest.xml. */ + private Document manifestDoc = null; + + /** SimpleZipReader to store the contents from the InputStream + * if the document is in package format (otherwise this will remain null) + */ + private SimpleZipReader zip = null; + + /** Collection to keep track of the embedded objects in the document. */ + private Map embeddedObjects = null; + + /** Package or flat format? + * @return true if the document is in package format, false if it's flat XML + */ + public boolean isPackageFormat() { + return zip!=null; + } + + /** + * Return a DOM Document object of the content.xml file. + * file. Note that a content DOM is not created when the constructor + * is called, but only after the read method has been invoked + * + * @return DOM Document object. + */ + public Document getContentDOM() { + return contentDoc; + } + + /** + * Return a DOM Document object of the meta.xml + * file. Note that a meta DOM is not created when the constructor + * is called, but only after the read method has been invoked + * + * @return DOM Document object. + */ + public Document getMetaDOM() { + return metaDoc; + } + + /** + * Return a DOM Document object of the settings.xml + * file. Note that a settings DOM is not created when the constructor + * is called, but only after the read method has been invoked + * + * @return DOM Document object. + */ + public Document getSettingsDOM() { + return settingsDoc; + } + + /** + * Return a DOM Document object of the style.xml file. + * Note that a style DOM is not created when the constructor + * is called, but only after the read method has been invoked + * + * @return DOM Document object. + */ + public Document getStyleDOM() { + return styleDoc; + } + + /** + * Returns all the embedded objects (graphics, formulae, etc.) present in + * this document. If the document is read from flat XML there will be no embedded objects. + * + * @return An Iterator of EmbeddedObject objects. + */ + public Iterator getEmbeddedObjects() { + if (embeddedObjects == null) { + embeddedObjects = new HashMap(); + if (manifestDoc != null) { + // Need to read the manifest file and construct a list of objects + NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY); + int nLen = nl.getLength(); + for (int i = 0; i < nLen; i++) { + Element elm = (Element) nl.item(i); + String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE); + String sPath = elm.getAttribute(MANIFEST_FULL_PATH); + + /* According to the ODF spec there are only two types of embedded object: + * Objects with an XML representation. + * Objects without an XML representation. + * The former are represented by one or more XML files. + * The latter are in binary form. + */ + if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) { + // Allow either ODF or old OOo 1.x embedded objects + if (!sPath.equals("/")) { // Exclude the main document entries + if (sPath.endsWith("/")) { // Remove trailing slash + sPath=sPath.substring(0, sPath.length()-1); + } + embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip)); + } + } + else if (!sType.equals("text/xml")) { + // XML entries are either embedded ODF doc entries or main document entries, all other + // entries are included as binary objects + embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip)); + } + } + } + } + return embeddedObjects.values().iterator(); + } + + /** + * Returns the embedded object corresponding to the name provided. + * The name should be stripped of any preceding path characters, such as + * '/', '.' or '#'. + * + * @param sName The name of the embedded object to retrieve. + * + * @return An EmbeddedObject instance representing the named + * object. + */ + public EmbeddedObject getEmbeddedObject(String sName) { + if (sName == null) { + return null; + } + getEmbeddedObjects(); + if (embeddedObjects.containsKey(sName)) { + return embeddedObjects.get(sName); + } + return null; + } + + /** + * Read the document from a DOM tree (flat XML format) + * + * @param dom the DOM tree + */ + public void read(org.w3c.dom.Document dom) { + contentDoc = dom; + styleDoc = null; + settingsDoc = null; + metaDoc = null; + manifestDoc = null; + zip=null; + embeddedObjects = null; + } + + + /** + * Read the Office Document from the given + * InputStream. + * Performs simple type detection to determine package or flat format + * + * @param is Office document InputStream. + * + * @throws IOException If any I/O error occurs. + */ + public void read(InputStream is) throws IOException { + // We need to read 4 bytes ahead to detect flat or zip format + BufferedInputStream inbuf = new BufferedInputStream(is); + byte[] bytes = new byte[4]; + inbuf.mark(4); + inbuf.read(bytes); + inbuf.reset(); + boolean bZip = MIMETypes.ZIP.equals(MIMETypes.getMagicMIMEType(bytes)); + if (bZip) { + readZip(inbuf); + } + else { + readFlat(inbuf); + } + } + + private void readZip(InputStream is) throws IOException { + zip = new SimpleZipReader(); + zip.read(is); + + byte contentBytes[] = zip.getEntry(CONTENTXML); + if (contentBytes == null) { + throw new IOException("Entry content.xml not found in file"); + } + try { + contentDoc = parse(contentBytes); + } catch (SAXException ex) { + throw new IOException(ex); + } + + byte styleBytes[] = zip.getEntry(STYLESXML); + if (styleBytes != null) { + try { + styleDoc = parse(styleBytes); + } catch (SAXException ex) { + throw new IOException(ex); + } + } + + byte metaBytes[] = zip.getEntry(METAXML); + if (metaBytes != null) { + try { + metaDoc = parse(metaBytes); + } catch (SAXException ex) { + throw new IOException(ex); + } + } + + byte settingsBytes[] = zip.getEntry(SETTINGSXML); + if (settingsBytes != null) { + try { + settingsDoc = parse(settingsBytes); + } catch (SAXException ex) { + throw new IOException(ex); + } + } + + byte manifestBytes[] = zip.getEntry(MANIFESTXML); + if (manifestBytes != null) { + try { + manifestDoc = parse(manifestBytes); + } catch (SAXException ex) { + throw new IOException(ex); + } + } + } + + + private void readFlat(InputStream is) throws IOException { + SAXParserFactory factory=SAXParserFactory.newInstance(); + SimpleSAXHandler handler = new SimpleSAXHandler(); + try { + SAXParser saxParser = factory.newSAXParser(); + saxParser.parse(is,handler); + } + catch (SAXException e){ + System.err.println("Oops - Error parsing document"); + e.printStackTrace(); + } + catch (ParserConfigurationException e) { + System.err.println("Oops - failed to get XML parser!?"); + e.printStackTrace(); + } + contentDoc = handler.getDOM(); + styleDoc = null; + settingsDoc = null; + metaDoc = null; + manifestDoc = null; + zip=null; + embeddedObjects = null; + } + + /** + * Parse given byte array into a DOM + * Document object using the + * DocumentBuilder object. + * + * @param builder DocumentBuilder object for parsing. + * @param bytes byte array for parsing. + * + * @return Resulting DOM Document object. + * + * @throws SAXException If any parsing error occurs. + */ + static Document parse(byte bytes[]) throws SAXException, IOException { + SAXParserFactory factory=SAXParserFactory.newInstance(); + SimpleSAXHandler handler = new SimpleSAXHandler(); + try { + SAXParser saxParser = factory.newSAXParser(); + saxParser.parse(new ByteArrayInputStream(bytes),handler); + return handler.getDOM(); + } + catch (ParserConfigurationException e) { + System.err.println("Oops - failed to get XML parser!?"); + e.printStackTrace(); + } + return null; + } + +} + diff --git a/source/java/writer2latex/office/OfficeReader.java b/source/java/writer2latex/office/OfficeReader.java index 6ca2aa6..248f37b 100644 --- a/source/java/writer2latex/office/OfficeReader.java +++ b/source/java/writer2latex/office/OfficeReader.java @@ -37,7 +37,6 @@ import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Document; -import writer2latex.xmerge.OfficeDocument; import writer2latex.util.Misc; /**

This class reads and collects global information about an OOo document. diff --git a/source/java/writer2latex/util/SimpleDOMBuilder.java b/source/java/writer2latex/util/SimpleDOMBuilder.java new file mode 100644 index 0000000..3eb0c8b --- /dev/null +++ b/source/java/writer2latex/util/SimpleDOMBuilder.java @@ -0,0 +1,123 @@ +/************************************************************************ + * + * SimpleDOMBuilder.java + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * Copyright: 2002-2012 by Henrik Just + * + * All Rights Reserved. + * + * Version 1.4 (2012-03-22) + * + */ + +package writer2latex.util; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.w3c.dom.DOMImplementation; +import org.w3c.dom.Document; +import org.w3c.dom.DocumentType; +import org.w3c.dom.Element; + +/** This class provides a simple way to create and populate a DOM tree in logical order + */ +public class SimpleDOMBuilder { + private Document dom=null; + private Element currentElement=null; + + /** + * Append an element to the current element and set this new element to be the current element. + * If there is no current element, a new DOM tree will be created (discarding the current DOM tree if any) + * with the new element as the document element. + * + * @param sTagName + * @return true on success + */ + public boolean startElement(String sTagName) { + if (currentElement!=null) { + currentElement = (Element) currentElement.appendChild(dom.createElement(sTagName)); + } + else { + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + try { + DocumentBuilder builder = factory.newDocumentBuilder(); + DOMImplementation domImpl = builder.getDOMImplementation(); + DocumentType doctype = domImpl.createDocumentType(sTagName, "", ""); + dom = domImpl.createDocument("",sTagName,doctype); + currentElement = dom.getDocumentElement(); + } catch (ParserConfigurationException e) { + return false; + } + } + return true; + } + + /** + * Set the current element to the parent of the current element + * @return true on success, false if there is no current element to end + */ + public boolean endElement() { + if (currentElement!=null) { + if (currentElement!=dom.getDocumentElement()) { + currentElement=(Element) currentElement.getParentNode(); + } + else { // Back at document element: Finished populating the DOM tree + currentElement=null; + } + return true; + } + return false; + } + + /** + * Set an attribute of the current element + * @param sName + * @param sValue + * @return true on success, false if there is no current element + */ + public boolean setAttribute(String sName,String sValue) { + if (currentElement!=null) { + currentElement.setAttribute(sName, sValue); + return true; + } + return false; + } + + /** + * Add characters to the currentElement + * @param sText + * @return true on success, false if there is no current element + */ + public boolean characters(String sText) { + if (currentElement!=null) { + currentElement.appendChild(dom.createTextNode(sText)); + return true; + } + return false; + } + + /** + * Get the DOM tree + * + * @return the DOM tree, or null if none has been created + */ + public Document getDOM() { + return dom; + } +} diff --git a/source/java/writer2latex/util/SimpleSAXHandler.java b/source/java/writer2latex/util/SimpleSAXHandler.java new file mode 100644 index 0000000..bb0e0ee --- /dev/null +++ b/source/java/writer2latex/util/SimpleSAXHandler.java @@ -0,0 +1,62 @@ +/************************************************************************ + * + * SimpleSAXHandler.java + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software Foundation. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, + * MA 02111-1307 USA + * + * Copyright: 2002-2012 by Henrik Just + * + * All Rights Reserved. + * + * Version 1.4 (2012-03-23) + * + */ + +package writer2latex.util; + +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + + +/** A simple SAX handler which transforms the SAX events into a DOM tree + * (supporting element and text nodes only) + */ +public class SimpleSAXHandler extends DefaultHandler { + + private SimpleDOMBuilder builder = new SimpleDOMBuilder(); + + public org.w3c.dom.Document getDOM() { + return builder.getDOM(); + } + + @Override public void startElement(String nameSpace, String localName, String qName, Attributes attributes){ + builder.startElement(qName); + int nLen = attributes.getLength(); + for (int i=0;i entries = new HashMap(); + + + /** Read a zipped stream + * + * @param is InputStream to read + * + * @throws IOException if an I/O error occurs + */ + public void read(InputStream is) throws IOException { + ZipInputStream zis = new ZipInputStream(is); + ZipEntry entry = null; + while ((entry=zis.getNextEntry())!=null) { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + int nLen = 0; + byte buffer[] = new byte[BUFFERSIZE]; + while ((nLen = zis.read(buffer)) > 0) { + baos.write(buffer, 0, nLen); + } + byte bytes[] = baos.toByteArray(); + entries.put(entry.getName(), bytes); + } + zis.close(); + } + + /** Get an entry from the ZIP file. Getting should be taken quite literally here: + * You can only get an entry once: The SimpleZipReader removes the entry from the + * collection when this method is called (memory optimization). + * + * @param name the name (path) of the ZIP entry + * + * @return a byte array with the contents of the entry, or null if the entry does not exist + */ + public byte[] getEntry(String sName) { + if (entries.containsKey(sName)) { + byte[] bytes = entries.get(sName); + entries.remove(sName); + return bytes; + } + return null; + } + + +} diff --git a/source/java/writer2latex/xhtml/DrawConverter.java b/source/java/writer2latex/xhtml/DrawConverter.java index f3e7bcb..8b0cdf6 100644 --- a/source/java/writer2latex/xhtml/DrawConverter.java +++ b/source/java/writer2latex/xhtml/DrawConverter.java @@ -52,13 +52,13 @@ import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Element; -import writer2latex.xmerge.EmbeddedObject; //import writer2latex.xmerge.EmbeddedBinaryObject; -import writer2latex.xmerge.EmbeddedXMLObject; import writer2latex.util.Misc; import writer2latex.util.CSVList; import writer2latex.xmerge.BinaryGraphicsDocument; +import writer2latex.office.EmbeddedObject; +import writer2latex.office.EmbeddedXMLObject; import writer2latex.office.XMLString; import writer2latex.office.MIMETypes; import writer2latex.office.StyleWithProperties; diff --git a/source/java/writer2latex/xmerge/EmbeddedBinaryObject.java b/source/java/writer2latex/xmerge/EmbeddedBinaryObject.java deleted file mode 100644 index 7a2dc51..0000000 --- a/source/java/writer2latex/xmerge/EmbeddedBinaryObject.java +++ /dev/null @@ -1,109 +0,0 @@ -/************************************************************************ - * - * The Contents of this file are made available subject to the terms of - * either of the following licenses - * - * - GNU Lesser General Public License Version 2.1 - * - * Sun Microsystems Inc., October, 2000 - * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ - * - * - ************************************************************************/ - -// This version is adapted for Writer2LaTeX -// Version 1.4 (2012-03-19) - -package writer2latex.xmerge; - - -/** - * This class represents embedded object's in an OpenOffice.org document that - * have a binary representation. - */ -public class EmbeddedBinaryObject extends EmbeddedObject { - - /** The object's binary representation. */ - protected byte[] objData = null; - - /** - * Constructor for an embedded object stored using an XML representation. - * - * @param name The name of the object. - * @param type The mime-type of the object. See the class summary. - */ - public EmbeddedBinaryObject(String name, String type) { - super(name, type); - } - - - /** - * Package private constructor for use when reading an object from a - * compressed SX? file. - * - * @param name The name of the object. - * @param type The mime-type of the object. See the class summary. - * @param source The OfficeZip representation of the SX? file that stores - * the object. - */ - EmbeddedBinaryObject(String name, String type, OfficeZip source) { - super(name, type, source); - } - - - /** - * This method returns the data for this object. - * - * @return A byte array containing the object's data. - */ - public byte[] getBinaryData() { - - if (objData == null) { - // See if we came from a Zip file - if (zipFile != null) { - objData = zipFile.getNamedBytes(objName); - } - } - - return objData; - } - - - /** - * Sets the data for this object. - * - * @param data A byte array containing data for the object. - */ - /*public void setBinaryData(byte[] data) { - objData = data; - hasChanged = true; - }*/ - -} - diff --git a/source/java/writer2latex/xmerge/EmbeddedObject.java b/source/java/writer2latex/xmerge/EmbeddedObject.java deleted file mode 100644 index afd815e..0000000 --- a/source/java/writer2latex/xmerge/EmbeddedObject.java +++ /dev/null @@ -1,108 +0,0 @@ -/************************************************************************ - * - * The Contents of this file are made available subject to the terms of - * - * - GNU Lesser General Public License Version 2.1 - * - * Sun Microsystems Inc., October, 2000 - * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ - * - * - ************************************************************************/ - -// This version is adapted for Writer2LaTeX -// Version 1.4 (2012-03-19) - -package writer2latex.xmerge; - -public abstract class EmbeddedObject { - protected String objName; - protected String objType; - - /** Representation of the file from which this object was read. */ - protected OfficeZip zipFile = null; - - /** Flag indicating if this document has changed since reading or is new. */ - protected boolean hasChanged = false; - - /** - * Constructor for an embedded object stored using an XML representation. - * - * @param name The name of the object. - * @param type The mime-type of the object. See the class summary. - */ - public EmbeddedObject(String name, String type) { - objName = name; - objType = type; - - hasChanged = true; - } - - - /** - * Package private constructor for use when reading an object from a - * compressed SX? file. - * - * @param name The name of the object. - * @param type The mime-type of the object. See the class summary. - * @param source The OfficeZip representation of the SX? file that stores - * the object. - */ - EmbeddedObject(String name, String type, OfficeZip source) { - this(name, type); - zipFile = source; - } - - - /** - * Retrieves the name of the embedded object represented by an instance of - * this class. - * - * N.B.The name referes to the name as found in the - * META-INF/manifest.xml file. - * - * @return The name of the object. - */ - public final String getName() { - return objName; - } - - - /** - * Retrieves the type of the embedded object represented by an instance of - * this class. - * - * The META-INF/manifest.xml file currently represents the - * type of an object using MIME types. - */ - public final String getType() { - return objType; - } - -} \ No newline at end of file diff --git a/source/java/writer2latex/xmerge/EmbeddedXMLObject.java b/source/java/writer2latex/xmerge/EmbeddedXMLObject.java deleted file mode 100644 index f44c229..0000000 --- a/source/java/writer2latex/xmerge/EmbeddedXMLObject.java +++ /dev/null @@ -1,239 +0,0 @@ -/************************************************************************ - * - * The Contents of this file are made available subject to the terms of - * - * - GNU Lesser General Public License Version 2.1 - * - * Sun Microsystems Inc., October, 2000 - * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ - * - * - ************************************************************************/ - -// This version is adapted for Writer2LaTeX -// Version 1.4 (2012-03-19) - -package writer2latex.xmerge; - -//import java.io.ByteArrayInputStream; -import java.io.IOException; - -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - -import org.w3c.dom.Document; - -//import org.xml.sax.EntityResolver; -//import org.xml.sax.InputSource; -import org.xml.sax.SAXException; - -/** - * This class represents those embedded objects in an OpenOffice.org document - * that have an XML representation. Currently, according to the OpenOffice.org - * File Format 1.0 document, there are 6 such objects: - * - * Formulae created with Math (application/vnd.sun.xml.math) - * Charts created with Chart (application/vnd.sun.xml.chart) - * Spreadsheets created with Calc (application/vnd.sun.xml.calc) - * Text created with Writer (application/vnd.sun.xml.writer) - * Drawings created with Draw (application/vnd.sun.xml.draw) - * Presentations created with Impress (application/vnd.sun.xml.impress) - * - * These object types are stored using a combination of content, settings and styles - * XML files. - */ -public class EmbeddedXMLObject extends EmbeddedObject { - - // Entries for the subdocuments that constitute this object; - protected Document contentDOM = null; - protected Document settingsDOM = null; - protected Document stylesDOM = null; - - private DocumentBuilder builder = null; - - /** - * Constructor for an embedded object stored using an XML representation. - * - * @param name The name of the object. - * @param type The mime-type of the object. See the class summary. - */ - public EmbeddedXMLObject(String name, String type) { - super(name, type); - } - - /** - * Package private constructor for use when reading an object from a - * compressed SX? file. - * - * @param name The name of the object. - * @param type The mime-type of the object. See the class summary. - * @param source The OfficeZip representation of the SX? file that stores - * the object. - */ - EmbeddedXMLObject(String name, String type, OfficeZip source) { - super(name, type, source); - } - - - /** - * Returns the content data for this embedded object. - * - * @return DOM represenation of "content.xml" - * - * @throws SAXException If any parser error occurs - * @throws IOException If any IO error occurs - */ - public Document getContentDOM() throws SAXException, IOException { - - if (contentDOM == null) { - contentDOM = getNamedDOM("content.xml"); - } - - return contentDOM; - } - - - /** - * Sets the content data for the embedded object. - * - * @param content DOM representation of the object's content. - */ - /*public void setContentDOM(Document content) { - contentDOM = content; - hasChanged = true; - }*/ - - - /** - * Returns the settings data for this embedded object. - * - * @return DOM represenation of "settings.xml" - * - * @throws SAXException If any parser error occurs - * @throws IOException If any IO error occurs - */ - public Document getSettingsDOM() throws SAXException, IOException { - - if (settingsDOM == null) { - settingsDOM = getNamedDOM("settings.xml"); - } - - return settingsDOM; - } - - - /** - * Sets the settings data for the embedded object. - * - * @param settings DOM representation of the object's settings. - */ - /*public void setSettingsDOM(Document settings) { - settingsDOM = settings; - hasChanged = true; - }*/ - - - /** - * Returns the style data for this embedded object. - * - * @return DOM represenation of "styles.xml" - * - * @throws SAXException If any parser error occurs - * @throws IOException If any IO error occurs - */ - public Document getStylesDOM() throws SAXException, IOException { - - if (stylesDOM == null) { - stylesDOM = getNamedDOM("styles.xml"); - } - - return stylesDOM; - } - - - /** - * Sets the styles data for the embedded object. - * - * @param styles DOM representation of the object's styles. - */ - /*public void setStylesDOM(Document styles) { - stylesDOM = styles; - hasChanged = true; - }*/ - - - /** - * This method extracts the data for the given XML file from the SX? file - * and creates a DOM representation of it. - * - * @param name The name of the XML file to retrieve. It is paired with - * the object name to access the SX? file. - * - * @return DOM representation of the named XML file. - * - * @throws SAXException If any parser error occurs - * @throws IOException If any IO error occurs - */ - private Document getNamedDOM(String name) throws SAXException, IOException { - if (zipFile == null) { - return null; - } - - try { - if (builder == null) { - DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); - - factory.setValidating(false); - builder = factory.newDocumentBuilder(); - } - - byte[] data = zipFile.getNamedBytes(new String(objName + "/" + name)); - if (data != null) { - return OfficeDocument.parse(builder, data); - } - else { - return null; - } - - } - catch (SAXException se) { - throw se; - } - catch (IOException ioe) { - throw ioe; - } - catch (ParserConfigurationException pce) { - throw new SAXException(pce); - } - } - - - -} \ No newline at end of file diff --git a/source/java/writer2latex/xmerge/OfficeConstants.java b/source/java/writer2latex/xmerge/OfficeConstants.java deleted file mode 100644 index 2fb0af1..0000000 --- a/source/java/writer2latex/xmerge/OfficeConstants.java +++ /dev/null @@ -1,455 +0,0 @@ -/************************************************************************ - * - * The Contents of this file are made available subject to the terms of - * - * - GNU Lesser General Public License Version 2.1 - * - * Sun Microsystems Inc., October, 2000 - * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ - * - * - ************************************************************************/ - -// This version has been adapted for Writer2LaTeX - -package writer2latex.xmerge; - -/** - * This interface contains constants for StarOffice XML tags, - * attributes (StarCalc cell types, etc.). - * - * @author Herbie Ong, Paul Rank, Martin Maher - */ -public interface OfficeConstants { - - /** Element tag for office:document, this is the root tag. */ - public final static String TAG_OFFICE_DOCUMENT = "office:document"; - - /** - * Element tag for office:document-content, this is the root - * tag in content.xml. - */ - public final static String TAG_OFFICE_DOCUMENT_CONTENT = "office:document-content"; - - /** - * Element tag for office:document-settings, this is the root - * tag in content.xml. - */ - public final static String TAG_OFFICE_DOCUMENT_SETTINGS= "office:document-settings"; - - /** - * Element tag for office:document-meta, this is the root - * tag in content.xml. - */ - public final static String TAG_OFFICE_DOCUMENT_META= "office:document-meta"; - - /** - * Element tag for office:document-styles, this is the root tag - * in styles.xml. - */ - public final static String TAG_OFFICE_DOCUMENT_STYLES = "office:document-styles"; - - /** - * Attribute tag for office:class of element - * office:document. - */ - public final static String ATTRIBUTE_OFFICE_CLASS = "office:class"; - - /** Element tag for office:styles. */ - public final static String TAG_OFFICE_STYLES = "office:styles"; - - /** Element tag for office:meta. */ - public final static String TAG_OFFICE_META = "office:meta"; - - /** Element tag for office:automatic-styles. */ - public final static String TAG_OFFICE_AUTOMATIC_STYLES = "office:automatic-styles"; - - /** Element tag for office:master-styles. */ - public final static String TAG_OFFICE_MASTER_STYLES = "office:master-styles"; - - /** Element tag for office:body. */ - public final static String TAG_OFFICE_BODY = "office:body"; - - /** Element tag for office:font-face-decls. */ - public final static String TAG_OFFICE_FONT_FACE_DECLS = "office:font-face-decls"; - - /** Element tag for office:settings. */ - public final static String TAG_OFFICE_SETTINGS = "office:settings"; - - //Adding - - /** Element tag for text:variable-set. */ - public final static String TAG_TEXT_VARIABLE_SET = "text:variable-set"; - - /** Element tag for text:variable-get. */ - public final static String TAG_TEXT_VARIABLE_GET = "text:variable-get"; -/** Element tag for text:expression. */ - public final static String TAG_TEXT_EXPRESSION = "text:expression"; - -/** Element tag for text:user-field-get. */ - public final static String TAG_TEXT_USER_FIELD_GET = "text:user-field-get"; - -/** Element tag for text:page-variable-get. */ - public final static String TAG_TEXT_PAGE_VARIABLE_GET = "text:page-variable-get"; -/** Element tag for text:sequence. */ - public final static String TAG_TEXT_SEQUENCE = "text:sequence"; - - /** Element tag for text:text-input. */ - public final static String TAG_TEXT_VARIABLE_INPUT = "text:variable-input"; - /** Element tag for text:time. */ - public final static String TAG_TEXT_TIME = "text:time"; - - /** Element tag for text:page-count. */ - public final static String TAG_TEXT_PAGE_COUNT = "text:page-count"; - /** Element tag for text:page-number. */ - public final static String TAG_TEXT_PAGE_NUMBER = "text:page-number"; - /** Element tag for text:author-initials. */ - public final static String TAG_TEXT_AUTHOR_INITIALS = "text:author-initials"; - /** Element tag for text:subject. */ - public final static String TAG_TEXT_SUBJECT = "text:subject"; - /** Element tag for text:title. */ - public final static String TAG_TEXT_TITLE = "text:title"; - /** Element tag for text:creation-time. */ - public final static String TAG_TEXT_CREATION_TIME = "text:creation-time"; - - /** Element tag for text:date. */ - public final static String TAG_TEXT_DATE = "text:date"; - /** Element tag for text:text-input. */ - public final static String TAG_TEXT_TEXT_INPUT = "text:text-input"; - - -//end adding - - /** Element tag for office:font-decls. */ - public final static String TAG_OFFICE_FONT_DECLS = "office:font-decls"; - - /** Element tag for style:font-decl. */ - public final static String TAG_STYLE_FONT_DECL = "style:font-decl"; - - /** Attribute tag for style:name of element style:name. */ - public final static String ATTRIBUTE_STYLE_NAME = "style:name"; - - /** - * Attribute tag for style:font-pitch of element - * style:font-pitch. - */ - public final static String ATTRIBUTE_STYLE_FONT_PITCH = "style:font-pitch"; - - /** - * Attribute tag for fo:font-family of element - * fo:font-family. - */ - public final static String ATTRIBUTE_FO_FONT_FAMILY = "fo:font-family"; - - /** - * Attribute tag for fo:font-family of element - * fo:font-family. - */ - public final static String ATTRIBUTE_FO_FONT_FAMILY_GENERIC = "fo:font-family-generic"; - - /** Element tag for text:p. */ - public final static String TAG_PARAGRAPH = "text:p"; - - /** Element tag for text:. */ - public final static String TAG_TEXT = "text:"; - - /** Element tag for text:h. */ - public final static String TAG_HEADING = "text:h"; - - /** Element tag for text:s. */ - public final static String TAG_SPACE = "text:s"; - - /** Element tag for text:tab-stop. */ - public final static String TAG_TAB_STOP = "text:tab-stop"; - - /** Element tag for text:line-break. */ - public final static String TAG_LINE_BREAK = "text:line-break"; - - /** Element tag for text:span. */ - public final static String TAG_SPAN = "text:span"; - - /** Element tag for text:a. */ - public final static String TAG_HYPERLINK = "text:a"; - - /** Element tag for text:bookmark. */ - public final static String TAG_BOOKMARK = "text:bookmark"; - - /** Element tag for text:bookmark-start. */ - public final static String TAG_BOOKMARK_START = "text:bookmark-start"; - - /** Element tag for text:unordered-list. */ - public final static String TAG_UNORDERED_LIST = "text:unordered-list"; - - /** Element tag for text:ordered-list. */ - public final static String TAG_ORDERED_LIST = "text:ordered-list"; - - /** Element tag for text:list-header. */ - public final static String TAG_LIST_HEADER = "text:list-header"; - - /** Element tag for text:list-item. */ - public final static String TAG_LIST_ITEM = "text:list-item"; - - /** Attribute tag for text:c of element text:s. */ - public final static String ATTRIBUTE_SPACE_COUNT = "text:c"; - - /** - * Attribute tag for text:style-name of element - * text:style-name. - */ - public final static String ATTRIBUTE_TEXT_STYLE_NAME = "text:style-name"; - - /** Element tag for table:table. */ - public final static String TAG_TABLE = "table:table"; - - /** Element tag for table:named-expression. */ - public final static String TAG_NAMED_EXPRESSIONS = "table:named-expressions"; - - /** Element tag for table:named-range. */ - public final static String TAG_TABLE_NAMED_RANGE= "table:named-range"; - - /** Element tag for table:named-expression. */ - public final static String TAG_TABLE_NAMED_EXPRESSION= "table:named-expression"; - - /** - * Attribute tag for table:name of element - * table:table. - */ - public final static String ATTRIBUTE_TABLE_NAME = "table:name"; - - /** - * Attribute tag for table:expression of element - * table:named-range. - */ - public final static String ATTRIBUTE_TABLE_EXPRESSION = "table:expression"; - - /** - * Attribute tag for table:base-cell-address of element - * table:named-range. - */ - public final static String ATTRIBUTE_TABLE_BASE_CELL_ADDRESS = "table:base-cell-address"; - - /** - * Attribute tag for table:cell-range-address of element - * table:named-range. - */ - public final static String ATTRIBUTE_TABLE_CELL_RANGE_ADDRESS = "table:cell-range-address"; - - /** Element tag for table:table-row. */ - public final static String TAG_TABLE_ROW = "table:table-row"; - - /** Element tag for table:table-column. */ - public final static String TAG_TABLE_COLUMN = "table:table-column"; - - /** - * Attribute tag for table:default-cell-style-name - * of element table:table-column. - */ - public final static String ATTRIBUTE_DEFAULT_CELL_STYLE = "table:default-cell-style-name"; - - /** Element tag for table:scenario. */ - public final static String TAG_TABLE_SCENARIO = "table:scenario"; - - /** Element tag for table:table-cell. */ - public final static String TAG_TABLE_CELL = "table:table-cell"; - - /** - * Attribute tag for table:value-type of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_VALUE_TYPE = "table:value-type"; - - /** - * Attribute tag for table:number-columns-repeated - * of element table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_NUM_COLUMNS_REPEATED = - "table:number-columns-repeated"; - - /** - * Attribute tag for table:number-rows-repeated - * of element table:table-row. - */ - public final static String ATTRIBUTE_TABLE_NUM_ROWS_REPEATED = - "table:number-rows-repeated"; - - /** - * Attribute tag for table:formula of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_FORMULA = "table:formula"; - - /** - * Attribute tag for table:value of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_VALUE = "table:value"; - - /** - * Attribute tag for table:date-value of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_DATE_VALUE = "table:date-value"; - - /** - * Attribute tag for table:time-value of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_TIME_VALUE = "table:time-value"; - - /** - * Attribute tag for table:string-value of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_STRING_VALUE = - "table:string-value"; - - /** - * Attribute tag for table:time-boolean-value of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_BOOLEAN_VALUE = - "table:boolean-value"; - - /** Attribute tag for table:style-name of table elements. */ - public final static String ATTRIBUTE_TABLE_STYLE_NAME = "table:style-name"; - - /** - * Attribute tag for table:currency of element - * table:table-cell. - */ - public final static String ATTRIBUTE_TABLE_CURRENCY = "table:currency"; - - /** The cell contains data of type string. */ - public final static String CELLTYPE_STRING = "string"; - - /** The cell contains data of type float. */ - public final static String CELLTYPE_FLOAT = "float"; - - /** The cell contains data of type time. */ - public final static String CELLTYPE_TIME = "time"; - - /** The cell contains data of type date. */ - public final static String CELLTYPE_DATE = "date"; - - /** The cell contains data of type currency. */ - public final static String CELLTYPE_CURRENCY = "currency"; - - /** The cell contains data of type boolean. */ - public final static String CELLTYPE_BOOLEAN = "boolean"; - - /** The cell contains data of type percent. */ - public final static String CELLTYPE_PERCENT = "percentage"; - - /** StarWriter XML file extension. */ - public final static String SXW_FILE_EXTENSION = ".sxw"; - - /** StarWriter XML office:class value. */ - public final static String SXW_TYPE = "text"; - - /** StarCalc XML file extension. */ - public final static String SXC_FILE_EXTENSION = ".sxc"; - - /** StarCalc XML office:class value. */ - public final static String SXC_TYPE = "spreadsheet"; - - /** Element tag for manifest:manifestentry in Manifest XML */ - public final static String TAG_MANIFEST_ROOT = "manifest:manifest"; - - /** Element tag for manifest:file-entry entry in Manifest XML. */ - public final static String TAG_MANIFEST_FILE = "manifest:file-entry"; - - /** - * Attribute tag for manifest:media-type of element - * manifest:file-entry. - */ - public final static String ATTRIBUTE_MANIFEST_FILE_TYPE = "manifest:media-type"; - - /** - * Attribute tag for manifest:full-path of element - * manifest:file-entry. - */ - public final static String ATTRIBUTE_MANIFEST_FILE_PATH = "manifest:full-path"; - - // Tags and Elements for the settings.xml - - /** Element tag for config:config-item. */ - public final static String TAG_CONFIG_ITEM = "config:config-item"; - - /** Element tag for config:config-item-set. */ - public final static String TAG_CONFIG_ITEM_SET = "config:config-item-set"; - - /** Element tag for config:config-item-map-indexed. */ - public final static String TAG_CONFIG_ITEM_MAP_INDEXED = "config:config-item-map-indexed"; - - /** Element tag for config:config-item-map-named. */ - public final static String TAG_CONFIG_ITEM_MAP_NAMED = "config:config-item-map-named"; - - /** Element tag for config:config-item-map-entry. */ - public final static String TAG_CONFIG_ITEM_MAP_ENTRY= "config:config-item-map-entry"; - - /** - * Attribute tag for config:name of element - * config:config-item. - */ - public final static String ATTRIBUTE_CONFIG_NAME = "config:name"; - - /** - * Attribute tag for config:type of element - * config:config-item. - */ - public final static String ATTRIBUTE_CONFIG_TYPE = "config:type"; - - - /** StarWriter XML MIME type. */ - public final static String SXW_MIME_TYPE = "application/vnd.sun.xml.writer"; - - /** StarWriter XML Template MIME type. */ - public final static String STW_MIME_TYPE = "application/vnd.sun.xml.writer.template"; - - /** StarCalc XML MIME type. */ - public final static String SXC_MIME_TYPE = "application/vnd.sun.xml.calc"; - - /** StarCalc XML Template MIME type. */ - public final static String STC_MIME_TYPE = "application/vnd.sun.xml.calc.template"; - - /** StarImpress XML MIME type. */ - public final static String SXI_MIME_TYPE = "application/vnd.sun.xml.impress"; - - /** StarImpress XML Template MIME type. */ - public final static String STI_MIME_TYPE = "application/vnd.sun.xml.impress.template"; - - /** StarDraw XML MIME type. */ - public final static String SXD_MIME_TYPE = "application/vnd.sun.xml.draw"; - - /** StarMath XML MIME type. */ - public final static String SXM_MIME_TYPE = "application/vnd.sun.xml.math"; - - /** StarWriter Global XML MIME Type */ - public final static String SXG_MIME_TYPE = "application/vnd.sun.xml.writer.global"; -} diff --git a/source/java/writer2latex/xmerge/OfficeDocument.java b/source/java/writer2latex/xmerge/OfficeDocument.java deleted file mode 100644 index f18aa30..0000000 --- a/source/java/writer2latex/xmerge/OfficeDocument.java +++ /dev/null @@ -1,824 +0,0 @@ -/************************************************************************ - * - * The Contents of this file are made available subject to the terms of - * - * - GNU Lesser General Public License Version 2.1 - * - * Sun Microsystems Inc., October, 2000 - * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ - * - * - ************************************************************************/ - -// This version is adapted for Writer2LaTeX -// Version 1.4 (2012-03-19) - -package writer2latex.xmerge; - -import java.io.InputStream; -import java.io.Reader; -import java.io.BufferedReader; -import java.io.StringReader; -import java.io.InputStreamReader; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; -import java.util.HashMap; - -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.ParserConfigurationException; - -import org.w3c.dom.Node; -import org.w3c.dom.Element; -import org.w3c.dom.Document; -import org.w3c.dom.NodeList; -import org.xml.sax.InputSource; -import org.w3c.dom.NamedNodeMap; -import org.xml.sax.SAXException; - -import writer2latex.office.MIMETypes; -import writer2latex.util.Misc; - -/** - * This class implements reading of ODF files - */ -public class OfficeDocument - implements OfficeConstants { - - /** Factory for DocumentBuilder objects. */ - private static DocumentBuilderFactory factory = - DocumentBuilderFactory.newInstance(); - - /** DOM Document of content.xml. */ - private Document contentDoc = null; - - /** DOM Document of meta.xml. */ - private Document metaDoc = null; - - /** DOM Document of settings.xml. */ - private Document settingsDoc = null; - - /** DOM Document of content.xml. */ - private Document styleDoc = null; - - /** DOM Document of META-INF/manifest.xml. */ - private Document manifestDoc = null; - - private String documentName = null; - private String fileName = null; - - /** - * OfficeZip object to store zip contents from - * read InputStream. Note that this member - * will still be null if it was initialized using a template - * file instead of reading from a StarOffice zipped - * XML file. - */ - private OfficeZip zip = null; - - /** Collection to keep track of the embedded objects in the document. */ - private Map embeddedObjects = null; - - /** - * Default constructor. - * - * @param name Document name. - */ - public OfficeDocument(String name) - { - this(name, true, false); - } - - - /** - * Constructor with arguments to set namespaceAware - * and validating flags. - * - * @param name Document name (may or may not - * contain extension). - * @param namespaceAware Value for namespaceAware flag. - * @param validating Value for validating flag. - */ - public OfficeDocument(String name, boolean namespaceAware, boolean validating) { - - //res = Resources.getInstance(); - factory.setValidating(validating); - factory.setNamespaceAware(namespaceAware); - this.documentName = trimDocumentName(name); - this.fileName = documentName + getFileExtension(); - } - - - /** - * Removes the file extension from the Document - * name. - * - * @param name Full Document name with extension. - * - * @return Name of Document without the extension. - */ - private String trimDocumentName(String name) { - String temp = name.toLowerCase(); - String ext = getFileExtension(); - - if (temp.endsWith(ext)) { - // strip the extension - int nlen = name.length(); - int endIndex = nlen - ext.length(); - name = name.substring(0,endIndex); - } - - return name; - } - - // FIX2 (HJ): Determine wether this is package or flat format - /** Package or flat format? - * @return true if the document is in package format, false if it's flat xml - */ - public boolean isPackageFormat() { return zip!=null; } - - /** - * Return a DOM Document object of the content.xml - * file. Note that a content DOM is not created when the constructor - * is called. So, either the read method or the - * initContentDOM method will need to be called ahead - * on this object before calling this method. - * - * @return DOM Document object. - */ - public Document getContentDOM() { - - return contentDoc; - } - - /** - * Return a DOM Document object of the meta.xml - * file. Note that a content DOM is not created when the constructor - * is called. So, either the read method or the - * initContentDOM method will need to be called ahead - * on this object before calling this method. - * - * @return DOM Document object. - */ - public Document getMetaDOM() { - - return metaDoc; - } - - - /** - * Return a DOM Document object of the settings.xml - * file. Note that a content DOM is not created when the constructor - * is called. So, either the read method or the - * initContentDOM method will need to be called ahead - * on this object before calling this method. - * - * @return DOM Document object. - */ - public Document getSettingsDOM() { - - return settingsDoc; - } - - - /** - * Return a DOM Document object of the style.xml file. - * Note that this may return null if there is no style DOM. - * Note that a style DOM is not created when the constructor - * is called. Depending on the InputStream, a - * read method may or may not build a style DOM. When - * creating a new style DOM, call the initStyleDOM method - * first. - * - * @return DOM Document object. - */ - public Document getStyleDOM() { - - return styleDoc; - } - - - /** - * Return the name of the Document. - * - * @return The name of Document. - */ - public String getName() { - - return documentName; - } - - - /** - * Return the file name of the Document, possibly - * with the standard extension. - * - * @return The file name of Document. - */ - public String getFileName() { - - return fileName; - } - - - /** - * Returns the file extension for this type of - * Document. - * - * @return The file extension of Document. - */ - // TODO: is this used? - protected String getFileExtension() { return ""; } - - - /** - * Returns all the embedded objects (graphics, formulae, etc.) present in - * this document. - * - * @return An Iterator of EmbeddedObject objects. - */ - public Iterator getEmbeddedObjects() { - - if (embeddedObjects == null && manifestDoc != null) { - embeddedObjects = new HashMap(); - - // Need to read the manifest file and construct a list of objects - NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE); - - // Dont create the HashMap if there are no embedded objects - int len = nl.getLength(); - for (int i = 0; i < len; i++) { - Node n = nl.item(i); - - NamedNodeMap attrs = n.getAttributes(); - - String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue(); - String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue(); - - - /* - * According to OpenOffice.org XML File Format document (ver. 1) - * there are only two types of embedded object: - * - * Objects with an XML representation. - * Objects without an XML representation. - * - * The former are represented by one or more XML files. - * The latter are in binary form. - */ - // FIX2 (HJ): Allow either OOo 1.x or OpenDocument embedded objects - if (type.startsWith("application/vnd.sun.xml") || type.startsWith("application/vnd.oasis.opendocument")) - { - if (path.equals("/")) { - // Exclude the main document entries - continue; - } - // Take off the trailing '/' - String name = path.substring(0, path.length() - 1); - embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip)); - } - else if (type.equals("text/xml")) { - // XML entries are either embedded StarOffice doc entries or main - // document entries - continue; - } - else { // FIX (HJ): allows empty MIME type - embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip)); - } - } - } - - return embeddedObjects.values().iterator(); - } - - /** - * Returns the embedded object corresponding to the name provided. - * The name should be stripped of any preceding path characters, such as - * '/', '.' or '#'. - * - * @param name The name of the embedded object to retrieve. - * - * @return An EmbeddedObject instance representing the named - * object. - */ - public EmbeddedObject getEmbeddedObject(String name) { - if (name == null) { - return null; - } - - if (embeddedObjects == null) { - // FIX2 (HJ): Return null if there's no manifest - if (manifestDoc != null) { - getEmbeddedObjects(); - } - else { - return null; - } - } - - if (embeddedObjects.containsKey(name)) { - return embeddedObjects.get(name); - } - else { - return null; - } - } - - - /** - * Adds a new embedded object to the document. - * - * @param embObj An instance of EmbeddedObject. - */ - /*public void addEmbeddedObject(EmbeddedObject embObj) { - if (embObj == null) { - return; - } - - if (embeddedObjects == null) { - embeddedObjects = new HashMap(); - } - - embeddedObjects.put(embObj.getName(), embObj); - }*/ - - - /** - * Read the Office Document from the given - * InputStream. - * FIX3 (HJ): Perform simple type detection to determine package or flat format - * - * @param is Office document InputStream. - * - * @throws IOException If any I/O error occurs. - */ - public void read(InputStream is) throws IOException { - byte[] doc = Misc.inputStreamToByteArray(is); - boolean bZip = MIMETypes.ZIP.equals(MIMETypes.getMagicMIMEType(doc)); - // if it's zip, assume package - otherwise assume flat - read(new ByteArrayInputStream(doc),bZip); - } - - private void readZip(InputStream is) throws IOException { - - // Debug.log(Debug.INFO, "reading Office file"); - - DocumentBuilder builder = null; - - try { - builder = factory.newDocumentBuilder(); - } catch (ParserConfigurationException ex) { - throw new OfficeDocumentException(ex); - } - - // read in Office zip file format - - zip = new OfficeZip(); - zip.read(is); - - // grab the content.xml and - // parse it into contentDoc. - - byte contentBytes[] = zip.getContentXMLBytes(); - - if (contentBytes == null) { - - throw new OfficeDocumentException("Entry content.xml not found in file"); - } - - try { - - contentDoc = parse(builder, contentBytes); - - } catch (SAXException ex) { - - throw new OfficeDocumentException(ex); - } - - // if style.xml exists, grab the style.xml - // parse it into styleDoc. - - byte styleBytes[] = zip.getStyleXMLBytes(); - - if (styleBytes != null) { - - try { - - styleDoc = parse(builder, styleBytes); - - } catch (SAXException ex) { - - throw new OfficeDocumentException(ex); - } - } - - byte metaBytes[] = zip.getMetaXMLBytes(); - - if (metaBytes != null) { - - try { - - metaDoc = parse(builder, metaBytes); - - } catch (SAXException ex) { - - throw new OfficeDocumentException(ex); - } - } - - byte settingsBytes[] = zip.getSettingsXMLBytes(); - - if (settingsBytes != null) { - - try { - - settingsDoc = parse(builder, settingsBytes); - - } catch (SAXException ex) { - - throw new OfficeDocumentException(ex); - } - } - - - // Read in the META-INF/manifest.xml file - byte manifestBytes[] = zip.getManifestXMLBytes(); - - if (manifestBytes != null) { - - try { - manifestDoc = parse(builder, manifestBytes); - } catch (SAXException ex) { - throw new OfficeDocumentException(ex); - } - } - - } - - - /** - * Read the Office Document from the given - * InputStream. - * - * @param is Office document InputStream. - * @param isZip boolean Identifies whether - * a file is zipped or not - * - * @throws IOException If any I/O error occurs. - */ - public void read(InputStream is, boolean isZip) throws IOException { - - // Debug.log(Debug.INFO, "reading Office file"); - - DocumentBuilder builder = null; - - try { - builder = factory.newDocumentBuilder(); - } catch (ParserConfigurationException ex) { - throw new OfficeDocumentException(ex); - } - - if (isZip) - { - readZip(is); - } - else{ - try{ - //contentDoc= builder.parse((InputStream)is); - Reader r = secondHack(is); - InputSource ins = new InputSource(r); - org.w3c.dom.Document newDoc = builder.parse(ins); - //org.w3c.dom.Document newDoc = builder.parse((InputStream)is); - Element rootElement=newDoc.getDocumentElement(); - - NodeList nodeList; - Node tmpNode; - Node rootNode = (Node)rootElement; - if (newDoc !=null){ - /*content*/ - contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT); - rootElement=contentDoc.getDocumentElement(); - rootNode = (Node)rootElement; - - // FIX (HJ): Include office:font-decls in content DOM - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); - if (nodeList.getLength()>0){ - tmpNode = contentDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - // FIX2 (HJ): Include office:font-face-decls (OpenDocument) in content DOM - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_FACE_DECLS); - if (nodeList.getLength()>0){ - tmpNode = contentDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); - if (nodeList.getLength()>0){ - tmpNode = contentDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY); - if (nodeList.getLength()>0){ - tmpNode = contentDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - /*Styles*/ - styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES); - rootElement=styleDoc.getDocumentElement(); - rootNode = (Node)rootElement; - - // FIX (HJ): Include office:font-decls in styles DOM - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS); - if (nodeList.getLength()>0){ - tmpNode = styleDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - // FIX2 (HJ): Include office:font-face-decls in styles DOM - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_FACE_DECLS); - if (nodeList.getLength()>0){ - tmpNode = styleDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES); - if (nodeList.getLength()>0){ - tmpNode = styleDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - // FIX (HJ): Include office:automatic-styles in styles DOM - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES); - if (nodeList.getLength()>0){ - tmpNode = styleDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - // FIX (HJ): Include office:master-styles in styles DOM - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES); - if (nodeList.getLength()>0){ - tmpNode = styleDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - - /*Settings*/ - settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS); - rootElement=settingsDoc.getDocumentElement(); - rootNode = (Node)rootElement; - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS); - if (nodeList.getLength()>0){ - tmpNode = settingsDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - /*Meta*/ - metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META); - rootElement=metaDoc.getDocumentElement(); - rootNode = (Node)rootElement; - nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META); - if (nodeList.getLength()>0){ - tmpNode = metaDoc.importNode(nodeList.item(0),true); - rootNode.appendChild(tmpNode); - } - } - } - catch (SAXException ex) { - throw new OfficeDocumentException(ex); - } - } - - } - - - - /** - * Parse given byte array into a DOM - * Document object using the - * DocumentBuilder object. - * - * @param builder DocumentBuilder object for parsing. - * @param bytes byte array for parsing. - * - * @return Resulting DOM Document object. - * - * @throws SAXException If any parsing error occurs. - */ - static Document parse(DocumentBuilder builder, byte bytes[]) - throws SAXException, IOException { - - Document doc = null; - - ByteArrayInputStream is = new ByteArrayInputStream(bytes); - - // TODO: replace hack with a more appropriate fix. - - Reader r = hack(is); - InputSource ins = new InputSource(r); - doc = builder.parse(ins); - - return doc; - } - - - /** - *

Creates a new DOM Document containing minimum - * OpenOffice XML tags.

- * - *

This method uses the subclass - * getOfficeClassAttribute method to get the - * attribute for office:class.

- * - * @param rootName root name of Document. - * - * @throws IOException If any I/O error occurs. - */ - private final Document createDOM(String rootName) throws IOException { - - Document doc = null; - - try { - - DocumentBuilder builder = factory.newDocumentBuilder(); - doc = builder.newDocument(); - - } catch (ParserConfigurationException ex) { - - throw new OfficeDocumentException(ex); - - } - - Element root = (Element) doc.createElement(rootName); - doc.appendChild(root); - - root.setAttribute("xmlns:office", "http://openoffice.org/2000/office"); - root.setAttribute("xmlns:style", "http://openoffice.org/2000/style"); - root.setAttribute("xmlns:text", "http://openoffice.org/2000/text"); - root.setAttribute("xmlns:table", "http://openoffice.org/2000/table"); - root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing"); - root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format"); - root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink"); - root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle"); - root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg"); - root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart"); - root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d"); - root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML"); - root.setAttribute("xmlns:form", "http://openoffice.org/2000/form"); - root.setAttribute("xmlns:script", "http://openoffice.org/2000/script"); - root.setAttribute("office:class", getOfficeClassAttribute()); - root.setAttribute("office:version", "1.0"); - - return doc; - } - - - /** - * Return the office:class attribute value. - * - * @return The attribute value. - */ - // not really used... - protected String getOfficeClassAttribute() { return ""; } - - - /** - *

Hacked code to filter tag before - * sending stream to parser.

- * - *

This hacked code needs to be changed later on.

- * - *

Issue: using current jaxp1.0 parser, there is no way - * to turn off processing of dtds. Current set of dtds - * have bugs, processing them will throw exceptions.

- * - *

This is a simple hack that assumes the whole - * tag are all in the same line. This is sufficient for - * current StarOffice 6.0 generated XML files. Since this - * hack really needs to go away, I don't want to spend - * too much time in making it a perfect hack.

- * FIX (HJ): Removed requirement for DOCTYPE to be in one line - * FIX (HJ): No longer removes newlines - * - * @param is InputStream to be filtered. - * - * @return Reader value without the tag. - * - * @throws IOException If any I/O error occurs. - */ - private static Reader hack(InputStream is) throws IOException { - - BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); - StringBuffer buffer = new StringBuffer(); - - String str = null; - - while ((str = br.readLine()) != null) { - - int sIndex = str.indexOf(" -1) { - - buffer.append(str.substring(0, sIndex)); - - int eIndex = str.indexOf('>', sIndex + 8 ); - - if (eIndex > -1) { - - buffer.append(str.substring(eIndex + 1, str.length())); - // FIX (HJ): Preserve the newline - buffer.append("\n"); - - } else { - - // FIX (HJ): More than one line. Search for '>' in following lines - boolean bOK = false; - while ((str = br.readLine())!=null) { - eIndex = str.indexOf('>'); - if (eIndex>-1) { - buffer.append(str.substring(eIndex+1)); - // FIX (HJ): Preserve the newline - buffer.append("\n"); - bOK = true; - break; - } - } - - if (!bOK) { throw new IOException("Invalid XML"); } - } - - } else { - - buffer.append(str); - // FIX (HJ): Preserve the newline - buffer.append("\n"); - } - } - - StringReader r = new StringReader(buffer.toString()); - return r; - } - - /** - *

Transform the InputStream to a Reader Stream.

- * - *

This hacked code needs to be changed later on.

- * - *

Issue: the new oasis input file stream means - * that the old input stream fails. see #i33702#

- * - * @param is InputStream to be filtered. - * - * @return Reader value of the InputStream(). - * - * @throws IOException If any I/O error occurs. - */ - private static Reader secondHack(InputStream is) throws IOException { - - BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8")); - char[] charArray = new char[4096]; - StringBuffer sBuf = new StringBuffer(); - int n = 0; - while ((n=br.read(charArray, 0, charArray.length)) > 0) - sBuf.append(charArray, 0, n); - - // ensure there is no trailing garbage after the end of the stream. - int sIndex = sBuf.lastIndexOf(""); - sBuf.delete(sIndex, sBuf.length()); - sBuf.append(""); - StringReader r = new StringReader(sBuf.toString()); - return r; - } - - -} - diff --git a/source/java/writer2latex/xmerge/OfficeDocumentException.java b/source/java/writer2latex/xmerge/OfficeDocumentException.java deleted file mode 100644 index c03cfb4..0000000 --- a/source/java/writer2latex/xmerge/OfficeDocumentException.java +++ /dev/null @@ -1,145 +0,0 @@ -/************************************************************************ - * - * The Contents of this file are made available subject to the terms of - * - * - GNU Lesser General Public License Version 2.1 - * - * Sun Microsystems Inc., October, 2000 - * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ - * - * - ************************************************************************/ - -// This version is adapted for Writer2LaTeX -// Version 1.0 (2008-11-22) - -package writer2latex.xmerge; - -import java.io.IOException; - -//import javax.xml.parsers.ParserConfigurationException; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; - -//import org.openoffice.xmerge.util.Resources; - -/** - * Used by OfficeDocument to encapsulate exceptions. It will add - * more details to the message string if it is of type - * SAXParseException. - * - * @author Herbie Ong - */ - -public final class OfficeDocumentException extends IOException { - - StringBuffer message = null; - - - /** - * Constructor, capturing additional information from the - * SAXException. - * - * @param e The SAXException. - */ - public OfficeDocumentException(SAXException e) { - super(e.toString()); - message = new StringBuffer(); - if (e instanceof SAXParseException) { - String msgParseError = - "PARSE_ERROR"; - String msgLine = - "LINE"; - String msgColumn = - "COLUMN"; - String msgPublicId = - "PUBLIC_ID"; - String msgSystemId = - "SYSTEM_ID"; - SAXParseException spe = (SAXParseException) e; - message.append(msgParseError); - message.append(": "); - message.append(msgLine); - message.append(": "); - message.append(spe.getLineNumber()); - message.append(", "); - message.append(msgColumn); - message.append(": "); - message.append(spe.getColumnNumber()); - message.append(", "); - message.append(msgSystemId); - message.append(": "); - message.append(spe.getSystemId()); - message.append(", "); - message.append(msgPublicId); - message.append(": "); - message.append(spe.getPublicId()); - message.append("\n"); - } - - // if there exists an embedded exception - Exception ex = e.getException(); - if (ex != null) { - message.append(ex.getMessage()); - } - } - - - /** - * Constructor, creates exception with provided message. - * - * @param s Message value for the exception. - */ - public OfficeDocumentException(String s) { - super(s); - } - - - /** - * Constructor, creates exception with the message - * corresponding to the message value of the provided - * exception. - * - * @param e The Exception. - */ - public OfficeDocumentException(Exception e) { - super(e.getMessage()); - } - - - /** - * Returns the message value for the Exception. - * - * @return The message value for the Exception. - */ - public String getMessage() { - return message.toString() + super.getMessage(); - } -} - diff --git a/source/java/writer2latex/xmerge/OfficeZip.java b/source/java/writer2latex/xmerge/OfficeZip.java deleted file mode 100644 index e10b0e2..0000000 --- a/source/java/writer2latex/xmerge/OfficeZip.java +++ /dev/null @@ -1,274 +0,0 @@ -/************************************************************************ - * - * The Contents of this file are made available subject to the terms of - * - * - GNU Lesser General Public License Version 2.1 - * - * Sun Microsystems Inc., October, 2000 - * - * GNU Lesser General Public License Version 2.1 - * ============================================= - * Copyright 2000 by Sun Microsystems, Inc. - * 901 San Antonio Road, Palo Alto, CA 94303, USA - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License version 2.1, as published by the Free Software Foundation. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, - * MA 02111-1307 USA - * - * The Initial Developer of the Original Code is: Sun Microsystems, Inc. - * - * Copyright: 2000 by Sun Microsystems, Inc. - * - * All Rights Reserved. - * - * Contributor(s): _______________________________________ - * - * - ************************************************************************/ - -// This version is adapted for Writer2LaTeX -// Version 1.4 (2012-03-19) - -package writer2latex.xmerge; - -import java.util.List; -import java.util.LinkedList; -import java.util.zip.ZipInputStream; -import java.util.zip.ZipEntry; -import java.io.InputStream; -import java.io.IOException; -import java.io.ByteArrayOutputStream; - -/** - * Class used by {@link - * org.openoffice.xmerge.converter.OfficeDocument - * OfficeDocument} to handle reading - * from a ZIP file, as well as storing ZIP entries. - * - * @author Herbie Ong - */ -class OfficeZip { - - /** File name of the XML file in a zipped document. */ - private final static String CONTENTXML = "content.xml"; - - private final static String STYLEXML = "styles.xml"; - private final static String METAXML = "meta.xml"; - private final static String SETTINGSXML = "settings.xml"; - private final static String MANIFESTXML = "META-INF/manifest.xml"; - - private final static int BUFFERSIZE = 1024; - - private List entryList = null; - - private int contentIndex = -1; - private int styleIndex = -1; - private int metaIndex = -1; - private int settingsIndex = -1; - private int manifestIndex = -1; - - /** Default constructor. */ - OfficeZip() { - - entryList = new LinkedList(); - } - - - /** - *

Read each zip entry in the InputStream object - * and store in entryList both the ZipEntry object - * as well as the bits of each entry. Call this method before - * calling the getContentXMLBytes method or the - * getStyleXMLBytes method.

- * - *

Keep track of the CONTENTXML and STYLEXML using - * contentIndex and styleIndex, respectively.

- * - * @param is InputStream object to read. - * - * @throws IOException If any I/O error occurs. - */ - void read(InputStream is) throws IOException { - - ZipInputStream zis = new ZipInputStream(is); - ZipEntry ze = null; - int i = -1; - - while ((ze = zis.getNextEntry()) != null) { - - String name = ze.getName(); - - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - - int len = 0; - byte buffer[] = new byte[BUFFERSIZE]; - - while ((len = zis.read(buffer)) > 0) { - baos.write(buffer, 0, len); - } - - byte bytes[] = baos.toByteArray(); - Entry entry = new Entry(ze,bytes); - - entryList.add(entry); - - i++; - - if (name.equalsIgnoreCase(CONTENTXML)) { - contentIndex = i; - } - else if (name.equalsIgnoreCase(STYLEXML)) { - styleIndex = i; - } - else if (name.equalsIgnoreCase(METAXML)) { - metaIndex = i; - } - else if (name.equalsIgnoreCase(SETTINGSXML)) { - settingsIndex = i; - } - else if (name.equalsIgnoreCase(MANIFESTXML)) { - manifestIndex = i; - } - - } - - zis.close(); - } - - - /** - * This method returns the CONTENTXML file in a - * byte array. It returns null if there is no - * CONTENTXML in this zip file. - * - * @return CONTENTXML in a byte array. - */ - byte[] getContentXMLBytes() { - - return getEntryBytes(contentIndex); - } - - - /** - * This method returns the STYLEXML file in a - * byte array. It returns null if there is - * no STYLEXML in this zip file. - * - * @return STYLEXML in a byte array. - */ - byte[] getStyleXMLBytes() { - - return getEntryBytes(styleIndex); - } - - /** - * This method returns the METAXML file in a - * byte array. It returns null if there is - * no METAXML in this zip file. - * - * @return METAXML in a byte array. - */ - byte[] getMetaXMLBytes() { - return getEntryBytes(metaIndex); - } - - /** - * This method returns the SETTINGSXML file in a - * byte array. It returns null if there is - * no SETTINGSXML in this zip file. - * - * @return SETTINGSXML in a byte array. - */ - byte[] getSettingsXMLBytes() { - return getEntryBytes(settingsIndex); - } - - /** - * This method returns the MANIFESTXML file in a byte array. - * It returns null if there is no MANIFESTXML in this zip file. - * - * @return MANIFESTXML in a byte array. - */ - byte[] getManifestXMLBytes() { - return getEntryBytes(manifestIndex); - } - - /** - * This method returns the bytes corresponding to the entry named in the - * parameter. - * - * @param name The name of the entry in the Zip file to retrieve. - * - * @return The data for the named entry in a byte array or - * null if no entry is found. - */ - byte[] getNamedBytes(String name) { - - // The list is not sorted, and sorting it for a binary search would - // invalidate the indices stored for the main files. - - // Could improve performance by caching the name and index when - // iterating through the ZipFile in read(). - for (int i = 0; i < entryList.size(); i++) { - Entry e = entryList.get(i); - - if (e.zipEntry.getName().equals(name)) { - return getEntryBytes(i); - } - } - - return null; - } - - - /** - * Used by the getContentXMLBytes method and the - * getStyleXMLBytes method to return the - * byte array from the corresponding - * entry in the entryList. - * - * @param index Index of Entry object in - * entryList. - * - * @return byte array associated in that - * Entry object or null, if there is - * not such Entry. - */ - private byte[] getEntryBytes(int index) { - - byte[] bytes = null; - - if (index > -1) { - Entry entry = entryList.get(index); - bytes = entry.bytes; - } - return bytes; - } - - /** - * This inner class is used as a data structure for holding - * a ZipEntry info and its corresponding bytes. - * These are stored in entryList. - */ - private class Entry { - - ZipEntry zipEntry = null; - byte bytes[] = null; - - Entry(ZipEntry zipEntry, byte bytes[]) { - this.zipEntry = zipEntry; - this.bytes = bytes; - } - } -} -