Optimized reading of package format
git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@164 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
parent
f79960ee86
commit
8a6e654344
5 changed files with 55 additions and 106 deletions
|
@ -1,5 +1,9 @@
|
|||
Changelog for Writer2LaTeX version 1.2 -> 1.4
|
||||
|
||||
---------- version 1.3.2 alpha ----------
|
||||
|
||||
[all] Optimized reading of package format: The settings.xml files are not parsed and the unused parts of the ZIP file are disposed
|
||||
|
||||
---------- version 1.3.1 alpha ----------
|
||||
|
||||
[w2x] Starting with version 4.2, LibreOffice exports display="math" on display equations. This attribute is now
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Version 1.4 (2014-08-11)
|
||||
* Version 1.4 (2014-08-25)
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -145,13 +145,12 @@ public class DrawConverter extends ConverterHelper {
|
|||
if (object!=null) {
|
||||
if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula!
|
||||
try {
|
||||
Element settings = ((EmbeddedXMLObject) object).getSettingsDOM().getDocumentElement();
|
||||
Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM();
|
||||
Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo3.2
|
||||
if (formula==null) {
|
||||
formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH);
|
||||
}
|
||||
String sLaTeX = palette.getMathCv().convert(settings,formula);
|
||||
String sLaTeX = palette.getMathCv().convert(formula);
|
||||
if (!" ".equals(sLaTeX)) { // ignore empty formulas
|
||||
ldp.append(" $")
|
||||
.append(sLaTeX)
|
||||
|
@ -190,7 +189,7 @@ public class DrawConverter extends ConverterHelper {
|
|||
}
|
||||
if (formula!=null) {
|
||||
ldp.append(" $")
|
||||
.append(palette.getMathCv().convert(null,formula))
|
||||
.append(palette.getMathCv().convert(formula))
|
||||
.append("$");
|
||||
if (Character.isLetterOrDigit(OfficeReader.getNextChar(node))) { ldp.append(" "); }
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Version 1.4 (2014-08-18)
|
||||
* Version 1.4 (2014-08-25)
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -102,7 +102,7 @@ public final class MathConverter extends ConverterHelper {
|
|||
|
||||
|
||||
// TODO: Replace with a method "handleEquation"
|
||||
public String convert(Element settings, Element formula) {
|
||||
public String convert(Element formula) {
|
||||
// TODO: Use settings to determine display mode/text mode
|
||||
// formula must be a math:math node
|
||||
// First try to find a StarMath annotation
|
||||
|
@ -242,7 +242,7 @@ public final class MathConverter extends ConverterHelper {
|
|||
}
|
||||
else {
|
||||
// MathML equation
|
||||
sLaTeX = convert(null,equation);
|
||||
sLaTeX = convert(equation);
|
||||
}
|
||||
if (sLaTeX!=null && !" ".equals(sLaTeX)) { // ignore empty formulas
|
||||
if (!bTexMaths || style!=TexMathsStyle.latex) {
|
||||
|
|
|
@ -16,11 +16,11 @@
|
|||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
* MA 02111-1307 USA
|
||||
*
|
||||
* Copyright: 2002-2012 by Henrik Just
|
||||
* Copyright: 2002-2014 by Henrik Just
|
||||
*
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Version 1.4 (2012-03-27)
|
||||
* Version 1.4 (2014-08-25)
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -37,17 +37,16 @@ import writer2latex.util.SimpleZipReader;
|
|||
/** This class represents those embedded objects in an ODF document that have an XML representation:
|
||||
* Formulas, charts, spreadsheets, text, drawings and presentations.
|
||||
* These object types are stored using a combination of content, settings and styles XML files.
|
||||
* The settings are application specific and ignored.
|
||||
*/
|
||||
public class EmbeddedXMLObject extends EmbeddedObject {
|
||||
|
||||
// Byte entries for the XML streams of this object
|
||||
private byte[] contentBytes = null;
|
||||
private byte[] settingsBytes = null;
|
||||
private byte[] stylesBytes = null;
|
||||
|
||||
// DOM trees representing the XML parts of this object
|
||||
protected Document contentDOM = null;
|
||||
protected Document settingsDOM = null;
|
||||
protected Document stylesDOM = null;
|
||||
|
||||
/** Read an object from an ODF package document
|
||||
|
@ -60,7 +59,6 @@ public class EmbeddedXMLObject extends EmbeddedObject {
|
|||
super(sName, sType);
|
||||
// Read the bytes, but defer parsing until required (at that point, the bytes are nullified)
|
||||
contentBytes = source.getEntry(sName+"/"+OfficeDocument.CONTENTXML);
|
||||
settingsBytes = source.getEntry(sName+"/"+OfficeDocument.SETTINGSXML);
|
||||
stylesBytes = source.getEntry(sName+"/"+OfficeDocument.STYLESXML);
|
||||
}
|
||||
|
||||
|
@ -80,22 +78,6 @@ public class EmbeddedXMLObject extends EmbeddedObject {
|
|||
return contentDOM;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the settings data for this embedded object.
|
||||
*
|
||||
* @return DOM representation of "settings.xml"
|
||||
*
|
||||
* @throws SAXException If any parser error occurs
|
||||
* @throws IOException If any IO error occurs
|
||||
*/
|
||||
public Document getSettingsDOM() throws SAXException, IOException {
|
||||
if (settingsDOM==null) {
|
||||
settingsDOM=getDOM(settingsBytes);
|
||||
settingsBytes=null;
|
||||
}
|
||||
return settingsDOM;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the style data for this embedded object.
|
||||
*
|
||||
|
|
|
@ -16,11 +16,11 @@
|
|||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
||||
* MA 02111-1307 USA
|
||||
*
|
||||
* Copyright: 2002-2012 by Henrik Just
|
||||
* Copyright: 2002-2014 by Henrik Just
|
||||
*
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Version 1.4 (2012-04-01)
|
||||
* Version 1.4 (2014-08-25)
|
||||
*
|
||||
*/
|
||||
|
||||
|
@ -30,7 +30,6 @@ import java.io.BufferedInputStream;
|
|||
import java.io.InputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.HashMap;
|
||||
|
||||
|
@ -50,10 +49,9 @@ import writer2latex.util.SimpleZipReader;
|
|||
* This class implements reading of ODF files from various sources
|
||||
*/
|
||||
public class OfficeDocument {
|
||||
// File names for the XML streams in a package document
|
||||
// File names for the XML streams in a package document (settings.xml is ignored)
|
||||
protected final static String CONTENTXML = "content.xml";
|
||||
protected final static String STYLESXML = "styles.xml";
|
||||
protected final static String SETTINGSXML = "settings.xml";
|
||||
private final static String METAXML = "meta.xml";
|
||||
private final static String MANIFESTXML = "META-INF/manifest.xml";
|
||||
|
||||
|
@ -61,6 +59,9 @@ public class OfficeDocument {
|
|||
private final static String MANIFEST_FILE_ENTRY = "manifest:file-entry";
|
||||
private final static String MANIFEST_MEDIA_TYPE = "manifest:media-type";
|
||||
private final static String MANIFEST_FULL_PATH = "manifest:full-path";
|
||||
|
||||
// Identify package format
|
||||
private boolean bIsPackageFormat = false;
|
||||
|
||||
/** DOM <code>Document</code> of content.xml. */
|
||||
private Document contentDoc = null;
|
||||
|
@ -68,20 +69,12 @@ public class OfficeDocument {
|
|||
/** DOM <code>Document</code> of meta.xml. */
|
||||
private Document metaDoc = null;
|
||||
|
||||
/** DOM <code>Document</code> of settings.xml. */
|
||||
private Document settingsDoc = null;
|
||||
|
||||
/** DOM <code>Document</code> of content.xml. */
|
||||
private Document styleDoc = null;
|
||||
|
||||
/** DOM <code>Document</code> of META-INF/manifest.xml. */
|
||||
private Document manifestDoc = null;
|
||||
|
||||
/** <code>SimpleZipReader</code> to store the contents from the <code>InputStream</code>
|
||||
* if the document is in package format (otherwise this will remain null)
|
||||
*/
|
||||
private SimpleZipReader zip = null;
|
||||
|
||||
/** Collection to keep track of the embedded objects in the document. */
|
||||
private Map<String, EmbeddedObject> embeddedObjects = null;
|
||||
|
||||
|
@ -89,7 +82,7 @@ public class OfficeDocument {
|
|||
* @return true if the document is in package format, false if it's flat XML
|
||||
*/
|
||||
public boolean isPackageFormat() {
|
||||
return zip!=null;
|
||||
return bIsPackageFormat;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -114,17 +107,6 @@ public class OfficeDocument {
|
|||
return metaDoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a DOM <code>Document</code> object of the settings.xml
|
||||
* file. Note that a settings DOM is not created when the constructor
|
||||
* is called, but only after the <code>read</code> method has been invoked
|
||||
*
|
||||
* @return DOM <code>Document</code> object.
|
||||
*/
|
||||
public Document getSettingsDOM() {
|
||||
return settingsDoc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a DOM <code>Document</code> object of the style.xml file.
|
||||
* Note that a style DOM is not created when the constructor
|
||||
|
@ -137,47 +119,42 @@ public class OfficeDocument {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns all the embedded objects (graphics, formulae, etc.) present in
|
||||
* Collect all the embedded objects (graphics, formulae, etc.) present in
|
||||
* this document. If the document is read from flat XML there will be no embedded objects.
|
||||
*
|
||||
* @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
|
||||
*/
|
||||
public Iterator<EmbeddedObject> getEmbeddedObjects() {
|
||||
if (embeddedObjects == null) {
|
||||
embeddedObjects = new HashMap<String, EmbeddedObject>();
|
||||
if (manifestDoc != null) {
|
||||
// Need to read the manifest file and construct a list of objects
|
||||
NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY);
|
||||
int nLen = nl.getLength();
|
||||
for (int i = 0; i < nLen; i++) {
|
||||
Element elm = (Element) nl.item(i);
|
||||
String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE);
|
||||
String sPath = elm.getAttribute(MANIFEST_FULL_PATH);
|
||||
private void getEmbeddedObjects(SimpleZipReader zip) {
|
||||
embeddedObjects = new HashMap<String, EmbeddedObject>();
|
||||
if (manifestDoc != null) {
|
||||
// Need to read the manifest file and construct a list of objects
|
||||
NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY);
|
||||
int nLen = nl.getLength();
|
||||
for (int i = 0; i < nLen; i++) {
|
||||
Element elm = (Element) nl.item(i);
|
||||
String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE);
|
||||
String sPath = elm.getAttribute(MANIFEST_FULL_PATH);
|
||||
|
||||
/* According to the ODF spec there are only two types of embedded object:
|
||||
* Objects with an XML representation.
|
||||
* Objects without an XML representation.
|
||||
* The former are represented by one or more XML files.
|
||||
* The latter are in binary form.
|
||||
*/
|
||||
if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) {
|
||||
// Allow either ODF or old OOo 1.x embedded objects
|
||||
if (!sPath.equals("/")) { // Exclude the main document entries
|
||||
if (sPath.endsWith("/")) { // Remove trailing slash
|
||||
sPath=sPath.substring(0, sPath.length()-1);
|
||||
}
|
||||
embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip));
|
||||
/* According to the ODF spec there are only two types of embedded object:
|
||||
* Objects with an XML representation.
|
||||
* Objects without an XML representation.
|
||||
* The former are represented by one or more XML files.
|
||||
* The latter are in binary form.
|
||||
*/
|
||||
if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) {
|
||||
// Allow either ODF or old OOo 1.x embedded objects
|
||||
if (!sPath.equals("/")) { // Exclude the main document entries
|
||||
if (sPath.endsWith("/")) { // Remove trailing slash
|
||||
sPath=sPath.substring(0, sPath.length()-1);
|
||||
}
|
||||
embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip));
|
||||
}
|
||||
else if (!sType.equals("text/xml")) {
|
||||
// XML entries are either embedded ODF doc entries or main document entries, all other
|
||||
// entries are included as binary objects
|
||||
embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip));
|
||||
}
|
||||
}
|
||||
else if (!sType.equals("text/xml")) {
|
||||
// XML entries are either embedded ODF doc entries or main document entries, all other
|
||||
// entries are included as binary objects
|
||||
embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip));
|
||||
}
|
||||
}
|
||||
}
|
||||
return embeddedObjects.values().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -191,11 +168,7 @@ public class OfficeDocument {
|
|||
* object.
|
||||
*/
|
||||
public EmbeddedObject getEmbeddedObject(String sName) {
|
||||
if (sName == null) {
|
||||
return null;
|
||||
}
|
||||
getEmbeddedObjects();
|
||||
if (embeddedObjects.containsKey(sName)) {
|
||||
if (sName!=null && embeddedObjects!=null && embeddedObjects.containsKey(sName)) {
|
||||
return embeddedObjects.get(sName);
|
||||
}
|
||||
return null;
|
||||
|
@ -209,10 +182,9 @@ public class OfficeDocument {
|
|||
public void read(org.w3c.dom.Document dom) {
|
||||
contentDoc = dom;
|
||||
styleDoc = null;
|
||||
settingsDoc = null;
|
||||
metaDoc = null;
|
||||
manifestDoc = null;
|
||||
zip=null;
|
||||
bIsPackageFormat = false;
|
||||
embeddedObjects = null;
|
||||
}
|
||||
|
||||
|
@ -244,7 +216,7 @@ public class OfficeDocument {
|
|||
}
|
||||
|
||||
private void readZip(InputStream is) throws IOException {
|
||||
zip = new SimpleZipReader();
|
||||
SimpleZipReader zip = new SimpleZipReader();
|
||||
zip.read(is);
|
||||
|
||||
byte contentBytes[] = zip.getEntry(CONTENTXML);
|
||||
|
@ -275,15 +247,6 @@ public class OfficeDocument {
|
|||
}
|
||||
}
|
||||
|
||||
byte settingsBytes[] = zip.getEntry(SETTINGSXML);
|
||||
if (settingsBytes != null) {
|
||||
try {
|
||||
settingsDoc = parse(settingsBytes);
|
||||
} catch (SAXException ex) {
|
||||
throw new IOException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
byte manifestBytes[] = zip.getEntry(MANIFESTXML);
|
||||
if (manifestBytes != null) {
|
||||
try {
|
||||
|
@ -292,6 +255,9 @@ public class OfficeDocument {
|
|||
throw new IOException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
bIsPackageFormat = true;
|
||||
getEmbeddedObjects(zip);
|
||||
}
|
||||
|
||||
|
||||
|
@ -302,10 +268,9 @@ public class OfficeDocument {
|
|||
throw new IOException(e);
|
||||
}
|
||||
styleDoc = null;
|
||||
settingsDoc = null;
|
||||
metaDoc = null;
|
||||
manifestDoc = null;
|
||||
zip=null;
|
||||
bIsPackageFormat = false;
|
||||
embeddedObjects = null;
|
||||
}
|
||||
|
||||
|
@ -337,4 +302,3 @@ public class OfficeDocument {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue