Optimized reading of package format

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@164 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
henrikjust 2014-08-25 06:25:05 +00:00
parent f79960ee86
commit 8a6e654344
5 changed files with 55 additions and 106 deletions

View file

@ -1,5 +1,9 @@
Changelog for Writer2LaTeX version 1.2 -> 1.4 Changelog for Writer2LaTeX version 1.2 -> 1.4
---------- version 1.3.2 alpha ----------
[all] Optimized reading of package format: The settings.xml files are not parsed and the unused parts of the ZIP file are disposed
---------- version 1.3.1 alpha ---------- ---------- version 1.3.1 alpha ----------
[w2x] Starting with version 4.2, LibreOffice exports display="math" on display equations. This attribute is now [w2x] Starting with version 4.2, LibreOffice exports display="math" on display equations. This attribute is now

View file

@ -20,7 +20,7 @@
* *
* All Rights Reserved. * All Rights Reserved.
* *
* Version 1.4 (2014-08-11) * Version 1.4 (2014-08-25)
* *
*/ */
@ -145,13 +145,12 @@ public class DrawConverter extends ConverterHelper {
if (object!=null) { if (object!=null) {
if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula! if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula!
try { try {
Element settings = ((EmbeddedXMLObject) object).getSettingsDOM().getDocumentElement();
Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM(); Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM();
Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo3.2 Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo3.2
if (formula==null) { if (formula==null) {
formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH); formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH);
} }
String sLaTeX = palette.getMathCv().convert(settings,formula); String sLaTeX = palette.getMathCv().convert(formula);
if (!" ".equals(sLaTeX)) { // ignore empty formulas if (!" ".equals(sLaTeX)) { // ignore empty formulas
ldp.append(" $") ldp.append(" $")
.append(sLaTeX) .append(sLaTeX)
@ -190,7 +189,7 @@ public class DrawConverter extends ConverterHelper {
} }
if (formula!=null) { if (formula!=null) {
ldp.append(" $") ldp.append(" $")
.append(palette.getMathCv().convert(null,formula)) .append(palette.getMathCv().convert(formula))
.append("$"); .append("$");
if (Character.isLetterOrDigit(OfficeReader.getNextChar(node))) { ldp.append(" "); } if (Character.isLetterOrDigit(OfficeReader.getNextChar(node))) { ldp.append(" "); }
} }

View file

@ -20,7 +20,7 @@
* *
* All Rights Reserved. * All Rights Reserved.
* *
* Version 1.4 (2014-08-18) * Version 1.4 (2014-08-25)
* *
*/ */
@ -102,7 +102,7 @@ public final class MathConverter extends ConverterHelper {
// TODO: Replace with a method "handleEquation" // TODO: Replace with a method "handleEquation"
public String convert(Element settings, Element formula) { public String convert(Element formula) {
// TODO: Use settings to determine display mode/text mode // TODO: Use settings to determine display mode/text mode
// formula must be a math:math node // formula must be a math:math node
// First try to find a StarMath annotation // First try to find a StarMath annotation
@ -242,7 +242,7 @@ public final class MathConverter extends ConverterHelper {
} }
else { else {
// MathML equation // MathML equation
sLaTeX = convert(null,equation); sLaTeX = convert(equation);
} }
if (sLaTeX!=null && !" ".equals(sLaTeX)) { // ignore empty formulas if (sLaTeX!=null && !" ".equals(sLaTeX)) { // ignore empty formulas
if (!bTexMaths || style!=TexMathsStyle.latex) { if (!bTexMaths || style!=TexMathsStyle.latex) {

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA * MA 02111-1307 USA
* *
* Copyright: 2002-2012 by Henrik Just * Copyright: 2002-2014 by Henrik Just
* *
* All Rights Reserved. * All Rights Reserved.
* *
* Version 1.4 (2012-03-27) * Version 1.4 (2014-08-25)
* *
*/ */
@ -37,17 +37,16 @@ import writer2latex.util.SimpleZipReader;
/** This class represents those embedded objects in an ODF document that have an XML representation: /** This class represents those embedded objects in an ODF document that have an XML representation:
* Formulas, charts, spreadsheets, text, drawings and presentations. * Formulas, charts, spreadsheets, text, drawings and presentations.
* These object types are stored using a combination of content, settings and styles XML files. * These object types are stored using a combination of content, settings and styles XML files.
* The settings are application specific and ignored.
*/ */
public class EmbeddedXMLObject extends EmbeddedObject { public class EmbeddedXMLObject extends EmbeddedObject {
// Byte entries for the XML streams of this object // Byte entries for the XML streams of this object
private byte[] contentBytes = null; private byte[] contentBytes = null;
private byte[] settingsBytes = null;
private byte[] stylesBytes = null; private byte[] stylesBytes = null;
// DOM trees representing the XML parts of this object // DOM trees representing the XML parts of this object
protected Document contentDOM = null; protected Document contentDOM = null;
protected Document settingsDOM = null;
protected Document stylesDOM = null; protected Document stylesDOM = null;
/** Read an object from an ODF package document /** Read an object from an ODF package document
@ -60,7 +59,6 @@ public class EmbeddedXMLObject extends EmbeddedObject {
super(sName, sType); super(sName, sType);
// Read the bytes, but defer parsing until required (at that point, the bytes are nullified) // Read the bytes, but defer parsing until required (at that point, the bytes are nullified)
contentBytes = source.getEntry(sName+"/"+OfficeDocument.CONTENTXML); contentBytes = source.getEntry(sName+"/"+OfficeDocument.CONTENTXML);
settingsBytes = source.getEntry(sName+"/"+OfficeDocument.SETTINGSXML);
stylesBytes = source.getEntry(sName+"/"+OfficeDocument.STYLESXML); stylesBytes = source.getEntry(sName+"/"+OfficeDocument.STYLESXML);
} }
@ -80,22 +78,6 @@ public class EmbeddedXMLObject extends EmbeddedObject {
return contentDOM; return contentDOM;
} }
/**
* Returns the settings data for this embedded object.
*
* @return DOM representation of "settings.xml"
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
public Document getSettingsDOM() throws SAXException, IOException {
if (settingsDOM==null) {
settingsDOM=getDOM(settingsBytes);
settingsBytes=null;
}
return settingsDOM;
}
/** /**
* Returns the style data for this embedded object. * Returns the style data for this embedded object.
* *

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA * MA 02111-1307 USA
* *
* Copyright: 2002-2012 by Henrik Just * Copyright: 2002-2014 by Henrik Just
* *
* All Rights Reserved. * All Rights Reserved.
* *
* Version 1.4 (2012-04-01) * Version 1.4 (2014-08-25)
* *
*/ */
@ -30,7 +30,6 @@ import java.io.BufferedInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.HashMap; import java.util.HashMap;
@ -50,10 +49,9 @@ import writer2latex.util.SimpleZipReader;
* This class implements reading of ODF files from various sources * This class implements reading of ODF files from various sources
*/ */
public class OfficeDocument { public class OfficeDocument {
// File names for the XML streams in a package document // File names for the XML streams in a package document (settings.xml is ignored)
protected final static String CONTENTXML = "content.xml"; protected final static String CONTENTXML = "content.xml";
protected final static String STYLESXML = "styles.xml"; protected final static String STYLESXML = "styles.xml";
protected final static String SETTINGSXML = "settings.xml";
private final static String METAXML = "meta.xml"; private final static String METAXML = "meta.xml";
private final static String MANIFESTXML = "META-INF/manifest.xml"; private final static String MANIFESTXML = "META-INF/manifest.xml";
@ -61,6 +59,9 @@ public class OfficeDocument {
private final static String MANIFEST_FILE_ENTRY = "manifest:file-entry"; private final static String MANIFEST_FILE_ENTRY = "manifest:file-entry";
private final static String MANIFEST_MEDIA_TYPE = "manifest:media-type"; private final static String MANIFEST_MEDIA_TYPE = "manifest:media-type";
private final static String MANIFEST_FULL_PATH = "manifest:full-path"; private final static String MANIFEST_FULL_PATH = "manifest:full-path";
// Identify package format
private boolean bIsPackageFormat = false;
/** DOM <code>Document</code> of content.xml. */ /** DOM <code>Document</code> of content.xml. */
private Document contentDoc = null; private Document contentDoc = null;
@ -68,20 +69,12 @@ public class OfficeDocument {
/** DOM <code>Document</code> of meta.xml. */ /** DOM <code>Document</code> of meta.xml. */
private Document metaDoc = null; private Document metaDoc = null;
/** DOM <code>Document</code> of settings.xml. */
private Document settingsDoc = null;
/** DOM <code>Document</code> of content.xml. */ /** DOM <code>Document</code> of content.xml. */
private Document styleDoc = null; private Document styleDoc = null;
/** DOM <code>Document</code> of META-INF/manifest.xml. */ /** DOM <code>Document</code> of META-INF/manifest.xml. */
private Document manifestDoc = null; private Document manifestDoc = null;
/** <code>SimpleZipReader</code> to store the contents from the <code>InputStream</code>
* if the document is in package format (otherwise this will remain null)
*/
private SimpleZipReader zip = null;
/** Collection to keep track of the embedded objects in the document. */ /** Collection to keep track of the embedded objects in the document. */
private Map<String, EmbeddedObject> embeddedObjects = null; private Map<String, EmbeddedObject> embeddedObjects = null;
@ -89,7 +82,7 @@ public class OfficeDocument {
* @return true if the document is in package format, false if it's flat XML * @return true if the document is in package format, false if it's flat XML
*/ */
public boolean isPackageFormat() { public boolean isPackageFormat() {
return zip!=null; return bIsPackageFormat;
} }
/** /**
@ -114,17 +107,6 @@ public class OfficeDocument {
return metaDoc; return metaDoc;
} }
/**
* Return a DOM <code>Document</code> object of the settings.xml
* file. Note that a settings DOM is not created when the constructor
* is called, but only after the <code>read</code> method has been invoked
*
* @return DOM <code>Document</code> object.
*/
public Document getSettingsDOM() {
return settingsDoc;
}
/** /**
* Return a DOM <code>Document</code> object of the style.xml file. * Return a DOM <code>Document</code> object of the style.xml file.
* Note that a style DOM is not created when the constructor * Note that a style DOM is not created when the constructor
@ -137,47 +119,42 @@ public class OfficeDocument {
} }
/** /**
* Returns all the embedded objects (graphics, formulae, etc.) present in * Collect all the embedded objects (graphics, formulae, etc.) present in
* this document. If the document is read from flat XML there will be no embedded objects. * this document. If the document is read from flat XML there will be no embedded objects.
*
* @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
*/ */
public Iterator<EmbeddedObject> getEmbeddedObjects() { private void getEmbeddedObjects(SimpleZipReader zip) {
if (embeddedObjects == null) { embeddedObjects = new HashMap<String, EmbeddedObject>();
embeddedObjects = new HashMap<String, EmbeddedObject>(); if (manifestDoc != null) {
if (manifestDoc != null) { // Need to read the manifest file and construct a list of objects
// Need to read the manifest file and construct a list of objects NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY);
NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY); int nLen = nl.getLength();
int nLen = nl.getLength(); for (int i = 0; i < nLen; i++) {
for (int i = 0; i < nLen; i++) { Element elm = (Element) nl.item(i);
Element elm = (Element) nl.item(i); String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE);
String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE); String sPath = elm.getAttribute(MANIFEST_FULL_PATH);
String sPath = elm.getAttribute(MANIFEST_FULL_PATH);
/* According to the ODF spec there are only two types of embedded object: /* According to the ODF spec there are only two types of embedded object:
* Objects with an XML representation. * Objects with an XML representation.
* Objects without an XML representation. * Objects without an XML representation.
* The former are represented by one or more XML files. * The former are represented by one or more XML files.
* The latter are in binary form. * The latter are in binary form.
*/ */
if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) { if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) {
// Allow either ODF or old OOo 1.x embedded objects // Allow either ODF or old OOo 1.x embedded objects
if (!sPath.equals("/")) { // Exclude the main document entries if (!sPath.equals("/")) { // Exclude the main document entries
if (sPath.endsWith("/")) { // Remove trailing slash if (sPath.endsWith("/")) { // Remove trailing slash
sPath=sPath.substring(0, sPath.length()-1); sPath=sPath.substring(0, sPath.length()-1);
}
embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip));
} }
embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip));
} }
else if (!sType.equals("text/xml")) { }
// XML entries are either embedded ODF doc entries or main document entries, all other else if (!sType.equals("text/xml")) {
// entries are included as binary objects // XML entries are either embedded ODF doc entries or main document entries, all other
embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip)); // entries are included as binary objects
} embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip));
} }
} }
} }
return embeddedObjects.values().iterator();
} }
/** /**
@ -191,11 +168,7 @@ public class OfficeDocument {
* object. * object.
*/ */
public EmbeddedObject getEmbeddedObject(String sName) { public EmbeddedObject getEmbeddedObject(String sName) {
if (sName == null) { if (sName!=null && embeddedObjects!=null && embeddedObjects.containsKey(sName)) {
return null;
}
getEmbeddedObjects();
if (embeddedObjects.containsKey(sName)) {
return embeddedObjects.get(sName); return embeddedObjects.get(sName);
} }
return null; return null;
@ -209,10 +182,9 @@ public class OfficeDocument {
public void read(org.w3c.dom.Document dom) { public void read(org.w3c.dom.Document dom) {
contentDoc = dom; contentDoc = dom;
styleDoc = null; styleDoc = null;
settingsDoc = null;
metaDoc = null; metaDoc = null;
manifestDoc = null; manifestDoc = null;
zip=null; bIsPackageFormat = false;
embeddedObjects = null; embeddedObjects = null;
} }
@ -244,7 +216,7 @@ public class OfficeDocument {
} }
private void readZip(InputStream is) throws IOException { private void readZip(InputStream is) throws IOException {
zip = new SimpleZipReader(); SimpleZipReader zip = new SimpleZipReader();
zip.read(is); zip.read(is);
byte contentBytes[] = zip.getEntry(CONTENTXML); byte contentBytes[] = zip.getEntry(CONTENTXML);
@ -275,15 +247,6 @@ public class OfficeDocument {
} }
} }
byte settingsBytes[] = zip.getEntry(SETTINGSXML);
if (settingsBytes != null) {
try {
settingsDoc = parse(settingsBytes);
} catch (SAXException ex) {
throw new IOException(ex);
}
}
byte manifestBytes[] = zip.getEntry(MANIFESTXML); byte manifestBytes[] = zip.getEntry(MANIFESTXML);
if (manifestBytes != null) { if (manifestBytes != null) {
try { try {
@ -292,6 +255,9 @@ public class OfficeDocument {
throw new IOException(ex); throw new IOException(ex);
} }
} }
bIsPackageFormat = true;
getEmbeddedObjects(zip);
} }
@ -302,10 +268,9 @@ public class OfficeDocument {
throw new IOException(e); throw new IOException(e);
} }
styleDoc = null; styleDoc = null;
settingsDoc = null;
metaDoc = null; metaDoc = null;
manifestDoc = null; manifestDoc = null;
zip=null; bIsPackageFormat = false;
embeddedObjects = null; embeddedObjects = null;
} }
@ -337,4 +302,3 @@ public class OfficeDocument {
} }
} }