From 8a6e654344f41c4443d49fb46b04930739536714 Mon Sep 17 00:00:00 2001 From: henrikjust Date: Mon, 25 Aug 2014 06:25:05 +0000 Subject: [PATCH] Optimized reading of package format git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@164 f0f2a975-2e09-46c8-9428-3b39399b9f3c --- source/distro/changelog.txt | 4 + .../writer2latex/latex/DrawConverter.java | 7 +- .../writer2latex/latex/MathConverter.java | 6 +- .../office/EmbeddedXMLObject.java | 24 +--- .../writer2latex/office/OfficeDocument.java | 120 ++++++------------ 5 files changed, 55 insertions(+), 106 deletions(-) diff --git a/source/distro/changelog.txt b/source/distro/changelog.txt index 4ecc050..2c7191a 100644 --- a/source/distro/changelog.txt +++ b/source/distro/changelog.txt @@ -1,5 +1,9 @@ Changelog for Writer2LaTeX version 1.2 -> 1.4 +---------- version 1.3.2 alpha ---------- + +[all] Optimized reading of package format: The settings.xml files are not parsed and the unused parts of the ZIP file are disposed + ---------- version 1.3.1 alpha ---------- [w2x] Starting with version 4.2, LibreOffice exports display="math" on display equations. This attribute is now diff --git a/source/java/writer2latex/latex/DrawConverter.java b/source/java/writer2latex/latex/DrawConverter.java index 939d508..c447633 100644 --- a/source/java/writer2latex/latex/DrawConverter.java +++ b/source/java/writer2latex/latex/DrawConverter.java @@ -20,7 +20,7 @@ * * All Rights Reserved. * - * Version 1.4 (2014-08-11) + * Version 1.4 (2014-08-25) * */ @@ -145,13 +145,12 @@ public class DrawConverter extends ConverterHelper { if (object!=null) { if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula! try { - Element settings = ((EmbeddedXMLObject) object).getSettingsDOM().getDocumentElement(); Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM(); Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo3.2 if (formula==null) { formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH); } - String sLaTeX = palette.getMathCv().convert(settings,formula); + String sLaTeX = palette.getMathCv().convert(formula); if (!" ".equals(sLaTeX)) { // ignore empty formulas ldp.append(" $") .append(sLaTeX) @@ -190,7 +189,7 @@ public class DrawConverter extends ConverterHelper { } if (formula!=null) { ldp.append(" $") - .append(palette.getMathCv().convert(null,formula)) + .append(palette.getMathCv().convert(formula)) .append("$"); if (Character.isLetterOrDigit(OfficeReader.getNextChar(node))) { ldp.append(" "); } } diff --git a/source/java/writer2latex/latex/MathConverter.java b/source/java/writer2latex/latex/MathConverter.java index d048cac..9e06355 100644 --- a/source/java/writer2latex/latex/MathConverter.java +++ b/source/java/writer2latex/latex/MathConverter.java @@ -20,7 +20,7 @@ * * All Rights Reserved. * - * Version 1.4 (2014-08-18) + * Version 1.4 (2014-08-25) * */ @@ -102,7 +102,7 @@ public final class MathConverter extends ConverterHelper { // TODO: Replace with a method "handleEquation" - public String convert(Element settings, Element formula) { + public String convert(Element formula) { // TODO: Use settings to determine display mode/text mode // formula must be a math:math node // First try to find a StarMath annotation @@ -242,7 +242,7 @@ public final class MathConverter extends ConverterHelper { } else { // MathML equation - sLaTeX = convert(null,equation); + sLaTeX = convert(equation); } if (sLaTeX!=null && !" ".equals(sLaTeX)) { // ignore empty formulas if (!bTexMaths || style!=TexMathsStyle.latex) { diff --git a/source/java/writer2latex/office/EmbeddedXMLObject.java b/source/java/writer2latex/office/EmbeddedXMLObject.java index 4a14b8f..5967676 100644 --- a/source/java/writer2latex/office/EmbeddedXMLObject.java +++ b/source/java/writer2latex/office/EmbeddedXMLObject.java @@ -16,11 +16,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2012 by Henrik Just + * Copyright: 2002-2014 by Henrik Just * * All Rights Reserved. * - * Version 1.4 (2012-03-27) + * Version 1.4 (2014-08-25) * */ @@ -37,17 +37,16 @@ import writer2latex.util.SimpleZipReader; /** This class represents those embedded objects in an ODF document that have an XML representation: * Formulas, charts, spreadsheets, text, drawings and presentations. * These object types are stored using a combination of content, settings and styles XML files. + * The settings are application specific and ignored. */ public class EmbeddedXMLObject extends EmbeddedObject { // Byte entries for the XML streams of this object private byte[] contentBytes = null; - private byte[] settingsBytes = null; private byte[] stylesBytes = null; // DOM trees representing the XML parts of this object protected Document contentDOM = null; - protected Document settingsDOM = null; protected Document stylesDOM = null; /** Read an object from an ODF package document @@ -60,7 +59,6 @@ public class EmbeddedXMLObject extends EmbeddedObject { super(sName, sType); // Read the bytes, but defer parsing until required (at that point, the bytes are nullified) contentBytes = source.getEntry(sName+"/"+OfficeDocument.CONTENTXML); - settingsBytes = source.getEntry(sName+"/"+OfficeDocument.SETTINGSXML); stylesBytes = source.getEntry(sName+"/"+OfficeDocument.STYLESXML); } @@ -80,22 +78,6 @@ public class EmbeddedXMLObject extends EmbeddedObject { return contentDOM; } - /** - * Returns the settings data for this embedded object. - * - * @return DOM representation of "settings.xml" - * - * @throws SAXException If any parser error occurs - * @throws IOException If any IO error occurs - */ - public Document getSettingsDOM() throws SAXException, IOException { - if (settingsDOM==null) { - settingsDOM=getDOM(settingsBytes); - settingsBytes=null; - } - return settingsDOM; - } - /** * Returns the style data for this embedded object. * diff --git a/source/java/writer2latex/office/OfficeDocument.java b/source/java/writer2latex/office/OfficeDocument.java index eb45e1c..70ce0b6 100644 --- a/source/java/writer2latex/office/OfficeDocument.java +++ b/source/java/writer2latex/office/OfficeDocument.java @@ -16,11 +16,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2012 by Henrik Just + * Copyright: 2002-2014 by Henrik Just * * All Rights Reserved. * - * Version 1.4 (2012-04-01) + * Version 1.4 (2014-08-25) * */ @@ -30,7 +30,6 @@ import java.io.BufferedInputStream; import java.io.InputStream; import java.io.ByteArrayInputStream; import java.io.IOException; -import java.util.Iterator; import java.util.Map; import java.util.HashMap; @@ -50,10 +49,9 @@ import writer2latex.util.SimpleZipReader; * This class implements reading of ODF files from various sources */ public class OfficeDocument { - // File names for the XML streams in a package document + // File names for the XML streams in a package document (settings.xml is ignored) protected final static String CONTENTXML = "content.xml"; protected final static String STYLESXML = "styles.xml"; - protected final static String SETTINGSXML = "settings.xml"; private final static String METAXML = "meta.xml"; private final static String MANIFESTXML = "META-INF/manifest.xml"; @@ -61,6 +59,9 @@ public class OfficeDocument { private final static String MANIFEST_FILE_ENTRY = "manifest:file-entry"; private final static String MANIFEST_MEDIA_TYPE = "manifest:media-type"; private final static String MANIFEST_FULL_PATH = "manifest:full-path"; + + // Identify package format + private boolean bIsPackageFormat = false; /** DOM Document of content.xml. */ private Document contentDoc = null; @@ -68,20 +69,12 @@ public class OfficeDocument { /** DOM Document of meta.xml. */ private Document metaDoc = null; - /** DOM Document of settings.xml. */ - private Document settingsDoc = null; - /** DOM Document of content.xml. */ private Document styleDoc = null; /** DOM Document of META-INF/manifest.xml. */ private Document manifestDoc = null; - /** SimpleZipReader to store the contents from the InputStream - * if the document is in package format (otherwise this will remain null) - */ - private SimpleZipReader zip = null; - /** Collection to keep track of the embedded objects in the document. */ private Map embeddedObjects = null; @@ -89,7 +82,7 @@ public class OfficeDocument { * @return true if the document is in package format, false if it's flat XML */ public boolean isPackageFormat() { - return zip!=null; + return bIsPackageFormat; } /** @@ -114,17 +107,6 @@ public class OfficeDocument { return metaDoc; } - /** - * Return a DOM Document object of the settings.xml - * file. Note that a settings DOM is not created when the constructor - * is called, but only after the read method has been invoked - * - * @return DOM Document object. - */ - public Document getSettingsDOM() { - return settingsDoc; - } - /** * Return a DOM Document object of the style.xml file. * Note that a style DOM is not created when the constructor @@ -137,47 +119,42 @@ public class OfficeDocument { } /** - * Returns all the embedded objects (graphics, formulae, etc.) present in + * Collect all the embedded objects (graphics, formulae, etc.) present in * this document. If the document is read from flat XML there will be no embedded objects. - * - * @return An Iterator of EmbeddedObject objects. */ - public Iterator getEmbeddedObjects() { - if (embeddedObjects == null) { - embeddedObjects = new HashMap(); - if (manifestDoc != null) { - // Need to read the manifest file and construct a list of objects - NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY); - int nLen = nl.getLength(); - for (int i = 0; i < nLen; i++) { - Element elm = (Element) nl.item(i); - String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE); - String sPath = elm.getAttribute(MANIFEST_FULL_PATH); + private void getEmbeddedObjects(SimpleZipReader zip) { + embeddedObjects = new HashMap(); + if (manifestDoc != null) { + // Need to read the manifest file and construct a list of objects + NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY); + int nLen = nl.getLength(); + for (int i = 0; i < nLen; i++) { + Element elm = (Element) nl.item(i); + String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE); + String sPath = elm.getAttribute(MANIFEST_FULL_PATH); - /* According to the ODF spec there are only two types of embedded object: - * Objects with an XML representation. - * Objects without an XML representation. - * The former are represented by one or more XML files. - * The latter are in binary form. - */ - if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) { - // Allow either ODF or old OOo 1.x embedded objects - if (!sPath.equals("/")) { // Exclude the main document entries - if (sPath.endsWith("/")) { // Remove trailing slash - sPath=sPath.substring(0, sPath.length()-1); - } - embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip)); + /* According to the ODF spec there are only two types of embedded object: + * Objects with an XML representation. + * Objects without an XML representation. + * The former are represented by one or more XML files. + * The latter are in binary form. + */ + if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) { + // Allow either ODF or old OOo 1.x embedded objects + if (!sPath.equals("/")) { // Exclude the main document entries + if (sPath.endsWith("/")) { // Remove trailing slash + sPath=sPath.substring(0, sPath.length()-1); } + embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip)); } - else if (!sType.equals("text/xml")) { - // XML entries are either embedded ODF doc entries or main document entries, all other - // entries are included as binary objects - embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip)); - } + } + else if (!sType.equals("text/xml")) { + // XML entries are either embedded ODF doc entries or main document entries, all other + // entries are included as binary objects + embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip)); } } } - return embeddedObjects.values().iterator(); } /** @@ -191,11 +168,7 @@ public class OfficeDocument { * object. */ public EmbeddedObject getEmbeddedObject(String sName) { - if (sName == null) { - return null; - } - getEmbeddedObjects(); - if (embeddedObjects.containsKey(sName)) { + if (sName!=null && embeddedObjects!=null && embeddedObjects.containsKey(sName)) { return embeddedObjects.get(sName); } return null; @@ -209,10 +182,9 @@ public class OfficeDocument { public void read(org.w3c.dom.Document dom) { contentDoc = dom; styleDoc = null; - settingsDoc = null; metaDoc = null; manifestDoc = null; - zip=null; + bIsPackageFormat = false; embeddedObjects = null; } @@ -244,7 +216,7 @@ public class OfficeDocument { } private void readZip(InputStream is) throws IOException { - zip = new SimpleZipReader(); + SimpleZipReader zip = new SimpleZipReader(); zip.read(is); byte contentBytes[] = zip.getEntry(CONTENTXML); @@ -275,15 +247,6 @@ public class OfficeDocument { } } - byte settingsBytes[] = zip.getEntry(SETTINGSXML); - if (settingsBytes != null) { - try { - settingsDoc = parse(settingsBytes); - } catch (SAXException ex) { - throw new IOException(ex); - } - } - byte manifestBytes[] = zip.getEntry(MANIFESTXML); if (manifestBytes != null) { try { @@ -292,6 +255,9 @@ public class OfficeDocument { throw new IOException(ex); } } + + bIsPackageFormat = true; + getEmbeddedObjects(zip); } @@ -302,10 +268,9 @@ public class OfficeDocument { throw new IOException(e); } styleDoc = null; - settingsDoc = null; metaDoc = null; manifestDoc = null; - zip=null; + bIsPackageFormat = false; embeddedObjects = null; } @@ -337,4 +302,3 @@ public class OfficeDocument { } } -