Optimized the loading of ODF files (the old xmerge code is completely rewritten)

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@144 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
henrikjust 2012-03-27 08:31:31 +00:00
parent e3a808f820
commit ecacd13bce
26 changed files with 1215 additions and 2603 deletions

View file

@ -2,6 +2,11 @@ Changelog for Writer2LaTeX version 1.2 -> 1.4
---------- version 1.3.1 alpha ----------
[all] Optimized the parsing of the source document saving some time and space (several intermediate steps and large byte arrays
are now avoided)
[all] API change: The converters can now convert directly from a DOM tree
[all] Removed unused code in writer2latex.xmerge
[w2x] Moved localized strings to .properties files

View file

@ -16,22 +16,18 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2010 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2010-05-17)
* Version 1.4 (2012-03-22)
*
*/
// This file was originally based on OOo's XMergeBridge, which is (c) by Sun Microsystems
package org.openoffice.da.comp.w2lcommon.filter;
import com.sun.star.lib.uno.adapter.XInputStreamToInputStreamAdapter;
import com.sun.star.lib.uno.adapter.XOutputStreamToOutputStreamAdapter;
//import com.sun.star.beans.PropertyValue;
import com.sun.star.io.XInputStream;
import com.sun.star.io.XOutputStream;
import com.sun.star.lang.XMultiServiceFactory;
@ -43,461 +39,345 @@ import com.sun.star.ucb.XSimpleFileAccess2;
import com.sun.star.uno.Type;
import com.sun.star.uno.UnoRuntime;
import com.sun.star.uno.XComponentContext;
//import com.sun.star.xml.sax.InputSource;
//import com.sun.star.xml.sax.XParser;
import com.sun.star.xml.sax.XDocumentHandler;
import com.sun.star.xml.XExportFilter;
import org.openoffice.da.comp.w2lcommon.helper.MessageBox;
//import org.openoffice.da.comp.w2lcommon.helper.PropertyHelper;
import writer2latex.api.Converter;
import writer2latex.api.ConverterFactory;
import writer2latex.api.ConverterResult;
import writer2latex.api.OutputFile;
import writer2latex.util.Misc;
import writer2latex.util.SimpleDOMBuilder;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
//import java.util.Enumeration;
//import java.util.Vector;
import java.io.*;
//import javax.xml.parsers.*;
//import org.xml.sax.SAXException;
//import java.net.URI;
/** This class provides an abstract uno component which implements an XExportFilter.
* The filter is actually generic and only then constructor and 3 strings needs
* to changed by the subclass.
/** This class provides an abstract UNO component which implements an XExportFilter.
* The filter is actually generic and only the constructor and 3 strings needs
* to be changed by the subclass.
*/
public abstract class ExportFilterBase implements
XExportFilter,
XServiceName,
XServiceInfo,
XDocumentHandler,
XTypeProvider {
/** Service name for the component */
public static final String __serviceName = "";
/** Implementation name for the component */
public static final String __implementationName = "";
/** Filter name to include in error messages */
XExportFilter,
XServiceName,
XServiceInfo,
XDocumentHandler,
XTypeProvider {
/** Service name for the component */
public static final String __serviceName = "";
/** Implementation name for the component */
public static final String __implementationName = "";
/** Filter name to include in error messages */
public String __displayName = "";
private static XComponentContext xComponentContext = null;
protected static XMultiServiceFactory xMSF;
private static XInputStream xInStream =null;
private static XOutputStream xOutStream=null;
private static XOutputStream xos = null;
private static String sdMime=null;
private static String sURL="";
private Object filterData;
private XSimpleFileAccess2 sfa2;
private static XComponentContext xComponentContext = null;
protected static XMultiServiceFactory xMSF;
private SimpleDOMBuilder domBuilder = new SimpleDOMBuilder();
private static XOutputStream xos = null;
private static String sdMime=null;
private static String sURL="";
/** We need to get the Service Manager from the Component context to
* instantiate certain services, hence this constructor.
* The subclass must override this to set xMSF properly from the reigstration class
*/
public ExportFilterBase(XComponentContext xComponentContext1) {
xComponentContext = xComponentContext1;
xMSF = null;
}
// Some utility methods:
String getFileName(String origName) {
String name=null;
if (origName !=null) {
if(origName.equalsIgnoreCase(""))
name = "OutFile";
else {
if (origName.lastIndexOf("/")>=0) {
origName=origName.substring(origName.lastIndexOf("/")+1,origName.length());
}
if (origName.lastIndexOf(".")>=0) {
name = origName.substring(0,(origName.lastIndexOf(".")));
}
else {
name=origName;
}
}
}
else{
name = "OutFile";
}
private Object filterData;
private XSimpleFileAccess2 sfa2;
return name;
}
public String needsMask(String origString) {
StringBuffer buf = new StringBuffer();
int nLen = origString.length();
for (int i=0; i<nLen; i++) {
char c = origString.charAt(i);
if (c=='&'){
buf.append("&amp;");
}
else if (c=='\"'){
buf.append("&quot;");
}
else if (c=='<'){
buf.append("&lt;");
}
else if (c=='>'){
buf.append("&gt;");
}
//else if (c=='\u0009' || c=='\n' || c=='\r' || (c>='\u0020' && c<='\uD7FF') || (c>='\uE000' && c<'\uFFFD')) {
else if (c=='\u0009' || c=='\n' || c=='\r' || (c>='\u0020' && c<'\uFFFD')) {
// Valid characters found at xml.com
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
// (the latter are represented as surrogate pairs (#xD800-#xDFFF)
buf.append(c);
}
else {
// Found illegal character
//System.out.println("Illegal character : "+Integer.toHexString(c));
}
}
return buf.toString();
}
// Implementation of XExportFilter:
public boolean exporter(com.sun.star.beans.PropertyValue[] aSourceData,
java.lang.String[] msUserData) throws com.sun.star.uno.RuntimeException{
sURL=null;
filterData = null;
// Get user data from configuration (type detection)
//String udConvertClass=msUserData[0];
//String udImport =msUserData[2];
//String udExport =msUserData[3];
sdMime = msUserData[5];
// Get source data (only the OutputStream and the URL are actually used)
com.sun.star.beans.PropertyValue[] pValue = aSourceData;
for (int i = 0 ; i < pValue.length; i++) {
try{
if (pValue[i].Name.compareTo("OutputStream")==0){
xos=(com.sun.star.io.XOutputStream)AnyConverter.toObject(new Type(com.sun.star.io.XOutputStream.class), pValue[i].Value);
}
//if (pValue[i].Name.compareTo("FileName")==0){
// sFileName=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value);
//}
if (pValue[i].Name.compareTo("URL")==0){
sURL=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value);
}
//if (pValue[i].Name.compareTo("Title")==0){
// title=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value);
//}
if (pValue[i].Name.compareTo("FilterData")==0) {
filterData = pValue[i].Value;
}
}
catch(com.sun.star.lang.IllegalArgumentException AnyExec){
System.err.println("\nIllegalArgumentException "+AnyExec);
}
}
if (sURL==null){
sURL="";
}
// Create a pipe to be used by the XDocumentHandler implementation:
try {
Object xPipeObj=xMSF.createInstance("com.sun.star.io.Pipe");
xInStream = (XInputStream) UnoRuntime.queryInterface(
XInputStream.class , xPipeObj );
xOutStream = (XOutputStream) UnoRuntime.queryInterface(
XOutputStream.class , xPipeObj );
}
catch (Exception e){
System.err.println("Exception "+e);
return false;
}
return true;
}
// Implementation of XDocumentHandler:
// Flat xml is created by the sax events and passed through the pipe
// created by exporter()
public void startDocument () {
//Do nothing
}
public void endDocument()throws com.sun.star.uno.RuntimeException {
try{
xOutStream.closeOutput();
convert(xInStream,xos);
}
catch (IOException e){
MessageBox msgBox = new MessageBox(xComponentContext);
msgBox.showMessage(__displayName+": IO error in conversion",
e.toString()+" at "+e.getStackTrace()[0].toString());
throw new com.sun.star.uno.RuntimeException(e.getMessage());
}
catch (Exception e){
MessageBox msgBox = new MessageBox(xComponentContext);
msgBox.showMessage(__displayName+": Internal error in conversion",
e.toString()+" at "+e.getStackTrace()[0].toString());
throw new com.sun.star.uno.RuntimeException(__displayName+" Exception");
}
}
public void startElement (String str, com.sun.star.xml.sax.XAttributeList xattribs)
{
str="<".concat(str);
if (xattribs !=null)
{
str= str.concat(" ");
int len=xattribs.getLength();
for (short i=0;i<len;i++)
{
str=str.concat(xattribs.getNameByIndex(i));
str=str.concat("=\"");
str=str.concat(needsMask(xattribs.getValueByIndex(i)));
str=str.concat("\" ");
}
}
str=str.concat(">");
try{
xOutStream.writeBytes(str.getBytes("UTF-8"));
}
catch (Exception e){
System.err.println("\n"+e);
}
/** We need to get the Service Manager from the Component context to
* instantiate certain services, hence this constructor.
* The subclass must override this to set xMSF properly from the registration class
*/
public ExportFilterBase(XComponentContext xComponentContext1) {
xComponentContext = xComponentContext1;
xMSF = null;
}
public void endElement(String str){
str="</".concat(str);
str=str.concat(">");
try{
xOutStream.writeBytes(str.getBytes("UTF-8"));
}
catch (Exception e){
System.err.println("\n"+e);
}
}
public void characters(String str){
str=needsMask(str);
try{
xOutStream.writeBytes(str.getBytes("UTF-8"));
}
catch (Exception e){
System.err.println("\n"+e);
}
}
public void ignorableWhitespace(String str){
}
public void processingInstruction(String aTarget, String aData){
}
public void setDocumentLocator(com.sun.star.xml.sax.XLocator xLocator){
}
// Utility method:
// This is the actual conversion method, using Writer2LaTeX to convert
// the flat xml recieved from the XInputStream, and writing the result
// to the XOutputStream. The XMLExporter does not support export to
// compound documents with multiple output files; so the main file
// is written to the XOutStream and other files are written using ucb.
public void convert (com.sun.star.io.XInputStream xml,com.sun.star.io.XOutputStream exportStream)
throws com.sun.star.uno.RuntimeException, IOException {
// Initialise the file access
sfa2 = null;
try {
Object sfaObject = xComponentContext.getServiceManager().createInstanceWithContext(
"com.sun.star.ucb.SimpleFileAccess", xComponentContext);
sfa2 = (XSimpleFileAccess2) UnoRuntime.queryInterface(XSimpleFileAccess2.class, sfaObject);
}
catch (com.sun.star.uno.Exception e) {
// failed to get SimpleFileAccess service (should not happen)
}
// Get base name from the url provided by OOo
String sName= getFileName(sURL);
// Adapter for input stream (OpenDocument flat xml)
XInputStreamToInputStreamAdapter xis =new XInputStreamToInputStreamAdapter(xml);
// Adapter for output stream (Main output file)
XOutputStreamToOutputStreamAdapter newxos =new XOutputStreamToOutputStreamAdapter(exportStream);
// Create converter
Converter converter = ConverterFactory.createConverter(sdMime);
if (converter==null) {
throw new com.sun.star.uno.RuntimeException("Failed to create converter to "+sdMime);
}
// Apply the FilterData to the converter
if (filterData!=null) {
FilterDataParser fdp = new FilterDataParser(xComponentContext);
fdp.applyFilterData(filterData,converter);
}
// Do conversion
converter.setGraphicConverter(new GraphicConverterImpl(xComponentContext));
ConverterResult dataOut = null;
//try {
dataOut = converter.convert(xis,Misc.makeFileName(sName));
//}
//catch (IOException e) {
// Fail silently
//}
// Write out files
Iterator<OutputFile> docEnum = dataOut.iterator();
// Remove the file name part of the url
String sNewURL = null;
if (sURL.lastIndexOf("/")>-1) {
// Take the url up to and including the last slash
sNewURL = sURL.substring(0,sURL.lastIndexOf("/")+1);
}
else {
// The url does not include a path; this should not really happen,
// but in this case we will write to the current default directory
sNewURL = "";
}
while (docEnum.hasNext() && sURL.startsWith("file:")) {
OutputFile docOut = docEnum.next();
if (dataOut.getMasterDocument()==docOut) {
// The master document is written to the XOutStream supplied
// by the XMLFilterAdaptor
docOut.write(newxos);
newxos.flush();
newxos.close();
}
else {
// Additional documents are written directly using ucb
// Get the file name and the (optional) directory name
String sFullFileName = Misc.makeHref(docOut.getFileName());
String sDirName = "";
String sFileName = sFullFileName;
int nSlash = sFileName.indexOf("/");
if (nSlash>-1) {
sDirName = sFileName.substring(0,nSlash);
sFileName = sFileName.substring(nSlash+1);
}
try{
// Create subdirectory if required
if (sDirName.length()>0 && !sfa2.exists(sNewURL+sDirName)) {
sfa2.createFolder(sNewURL+sDirName);
}
// writeFile demands an InputStream, so we need a pipe
Object xPipeObj=xMSF.createInstance("com.sun.star.io.Pipe");
XInputStream xInStream
= (XInputStream) UnoRuntime.queryInterface(XInputStream.class, xPipeObj );
XOutputStream xOutStream
= (XOutputStream) UnoRuntime.queryInterface(XOutputStream.class, xPipeObj );
OutputStream outStream = new XOutputStreamToOutputStreamAdapter(xOutStream);
// Feed the pipe with content...
docOut.write(outStream);
outStream.flush();
outStream.close();
xOutStream.closeOutput();
// ...and then write the content to the url
sfa2.writeFile(sNewURL+sFullFileName,xInStream);
}
catch (Throwable e){
MessageBox msgBox = new MessageBox(xComponentContext);
msgBox.showMessage(__displayName+": Error writing files",
e.toString()+" at "+e.getStackTrace()[0].toString());
}
}
String getFileName(String origName) {
String name=null;
if (origName !=null) {
if(origName.equalsIgnoreCase(""))
name = "OutFile";
else {
if (origName.lastIndexOf("/")>=0) {
origName=origName.substring(origName.lastIndexOf("/")+1,origName.length());
}
if (origName.lastIndexOf(".")>=0) {
name = origName.substring(0,(origName.lastIndexOf(".")));
}
else {
name=origName;
}
}
}
}
else{
name = "OutFile";
}
return name;
}
// Implementation of XExportFilter:
public boolean exporter(com.sun.star.beans.PropertyValue[] aSourceData,
java.lang.String[] msUserData) throws com.sun.star.uno.RuntimeException{
sURL=null;
filterData = null;
// Get user data from configuration (type detection)
//String udConvertClass=msUserData[0];
//String udImport =msUserData[2];
//String udExport =msUserData[3];
sdMime = msUserData[5];
// Get source data (only the OutputStream and the URL are actually used)
com.sun.star.beans.PropertyValue[] pValue = aSourceData;
for (int i = 0 ; i < pValue.length; i++) {
try{
if (pValue[i].Name.compareTo("OutputStream")==0){
xos=(com.sun.star.io.XOutputStream)AnyConverter.toObject(new Type(com.sun.star.io.XOutputStream.class), pValue[i].Value);
}
//if (pValue[i].Name.compareTo("FileName")==0){
// sFileName=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value);
//}
if (pValue[i].Name.compareTo("URL")==0){
sURL=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value);
}
//if (pValue[i].Name.compareTo("Title")==0){
// title=(String)AnyConverter.toObject(new Type(java.lang.String.class), pValue[i].Value);
//}
if (pValue[i].Name.compareTo("FilterData")==0) {
filterData = pValue[i].Value;
}
}
catch(com.sun.star.lang.IllegalArgumentException AnyExec){
System.err.println("\nIllegalArgumentException "+AnyExec);
}
}
if (sURL==null){
sURL="";
}
return true;
}
// Implementation of XDocumentHandler:
// A flat XML DOM tree is created by the SAX events and finally converted
public void startDocument () {
//Do nothing
}
public void endDocument()throws com.sun.star.uno.RuntimeException {
try{
convert(domBuilder.getDOM(),xos);
}
catch (IOException e){
MessageBox msgBox = new MessageBox(xComponentContext);
msgBox.showMessage(__displayName+": IO error in conversion",
e.toString()+" at "+e.getStackTrace()[0].toString());
throw new com.sun.star.uno.RuntimeException(e.getMessage());
}
catch (Exception e){
MessageBox msgBox = new MessageBox(xComponentContext);
msgBox.showMessage(__displayName+": Internal error in conversion",
e.toString()+" at "+e.getStackTrace()[0].toString());
throw new com.sun.star.uno.RuntimeException(__displayName+" Exception");
}
}
// Implement methods from interface XTypeProvider
// Implementation of XTypeProvider
public com.sun.star.uno.Type[] getTypes() {
Type[] typeReturn = {};
try {
typeReturn = new Type[] {
new Type( XTypeProvider.class ),
new Type( XExportFilter.class ),
new Type( XServiceName.class ),
new Type( XServiceInfo.class ) };
}
catch( Exception exception ) {
}
public void startElement (String sTagName, com.sun.star.xml.sax.XAttributeList xAttribs) {
domBuilder.startElement(sTagName);
int nLen = xAttribs.getLength();
for (short i=0;i<nLen;i++) {
domBuilder.setAttribute(xAttribs.getNameByIndex(i), xAttribs.getValueByIndex(i));
}
}
return( typeReturn );
}
public void endElement(String sTagName){
domBuilder.endElement();
}
public void characters(String sText){
domBuilder.characters(sText);
}
public void ignorableWhitespace(String str){
}
public void processingInstruction(String aTarget, String aData){
}
public void setDocumentLocator(com.sun.star.xml.sax.XLocator xLocator){
}
public byte[] getImplementationId() {
byte[] byteReturn = {};
// This is the actual conversion method, using Writer2LaTeX to convert
// the flat XML from the DOM, and writing the result
// to the XOutputStream. The XMLExporter does not support export to
// compound documents with multiple output files; hence the main file
// is written to the XOutStream and other files are written using UCB.
byteReturn = new String( "" + this.hashCode() ).getBytes();
public void convert (org.w3c.dom.Document dom,com.sun.star.io.XOutputStream exportStream)
throws com.sun.star.uno.RuntimeException, IOException {
// Initialize the file access
sfa2 = null;
try {
Object sfaObject = xComponentContext.getServiceManager().createInstanceWithContext(
"com.sun.star.ucb.SimpleFileAccess", xComponentContext);
sfa2 = (XSimpleFileAccess2) UnoRuntime.queryInterface(XSimpleFileAccess2.class, sfaObject);
}
catch (com.sun.star.uno.Exception e) {
// failed to get SimpleFileAccess service (should not happen)
}
return( byteReturn );
}
// Get base name from the URL provided by OOo
String sName= getFileName(sURL);
// Adapter for output stream (Main output file)
XOutputStreamToOutputStreamAdapter newxos =new XOutputStreamToOutputStreamAdapter(exportStream);
// Create converter
Converter converter = ConverterFactory.createConverter(sdMime);
if (converter==null) {
throw new com.sun.star.uno.RuntimeException("Failed to create converter to "+sdMime);
}
// Apply the FilterData to the converter
if (filterData!=null) {
FilterDataParser fdp = new FilterDataParser(xComponentContext);
fdp.applyFilterData(filterData,converter);
}
// Do conversion
converter.setGraphicConverter(new GraphicConverterImpl(xComponentContext));
ConverterResult dataOut = converter.convert(dom,Misc.makeFileName(sName));
// Write out files
Iterator<OutputFile> docEnum = dataOut.iterator();
// Remove the file name part of the URL
String sNewURL = null;
if (sURL.lastIndexOf("/")>-1) {
// Take the URL up to and including the last slash
sNewURL = sURL.substring(0,sURL.lastIndexOf("/")+1);
}
else {
// The URL does not include a path; this should not really happen,
// but in this case we will write to the current default directory
sNewURL = "";
}
while (docEnum.hasNext() && sURL.startsWith("file:")) {
OutputFile docOut = docEnum.next();
if (dataOut.getMasterDocument()==docOut) {
// The master document is written to the XOutStream supplied
// by the XMLFilterAdaptor
docOut.write(newxos);
newxos.flush();
newxos.close();
}
else {
// Additional files are written directly using UCB
// Get the file name and the (optional) directory name
String sFullFileName = Misc.makeHref(docOut.getFileName());
String sDirName = "";
String sFileName = sFullFileName;
int nSlash = sFileName.indexOf("/");
if (nSlash>-1) {
sDirName = sFileName.substring(0,nSlash);
sFileName = sFileName.substring(nSlash+1);
}
try{
// Create subdirectory if required
if (sDirName.length()>0 && !sfa2.exists(sNewURL+sDirName)) {
sfa2.createFolder(sNewURL+sDirName);
}
// writeFile demands an InputStream, so we need a pipe
Object xPipeObj=xMSF.createInstance("com.sun.star.io.Pipe");
XInputStream xInStream
= (XInputStream) UnoRuntime.queryInterface(XInputStream.class, xPipeObj );
XOutputStream xOutStream
= (XOutputStream) UnoRuntime.queryInterface(XOutputStream.class, xPipeObj );
OutputStream outStream = new XOutputStreamToOutputStreamAdapter(xOutStream);
// Feed the pipe with content...
docOut.write(outStream);
outStream.flush();
outStream.close();
xOutStream.closeOutput();
// ...and then write the content to the URL
sfa2.writeFile(sNewURL+sFullFileName,xInStream);
}
catch (Throwable e){
MessageBox msgBox = new MessageBox(xComponentContext);
msgBox.showMessage(__displayName+": Error writing files",
e.toString()+" at "+e.getStackTrace()[0].toString());
}
}
}
}
// Implement methods from interface XTypeProvider
// Implementation of XTypeProvider
public com.sun.star.uno.Type[] getTypes() {
Type[] typeReturn = {};
try {
typeReturn = new Type[] {
new Type( XTypeProvider.class ),
new Type( XExportFilter.class ),
new Type( XServiceName.class ),
new Type( XServiceInfo.class ) };
}
catch( Exception exception ) {
}
return( typeReturn );
}
public byte[] getImplementationId() {
byte[] byteReturn = {};
byteReturn = new String( "" + this.hashCode() ).getBytes();
return( byteReturn );
}
// Implement method from interface XServiceName
public String getServiceName() {
return( __serviceName );
}
// Implement methods from interface XServiceInfo
public boolean supportsService(String stringServiceName) {
return( stringServiceName.equals( __serviceName ) );
}
public String getImplementationName() {
return __implementationName;
}
public String[] getSupportedServiceNames() {
String[] stringSupportedServiceNames = { __serviceName };
return( stringSupportedServiceNames );
}
// Implement method from interface XServiceName
public String getServiceName() {
return( __serviceName );
}
// Implement methods from interface XServiceInfo
public boolean supportsService(String stringServiceName) {
return( stringServiceName.equals( __serviceName ) );
}
public String getImplementationName() {
return __implementationName;
//return( W2LExportFilter.class.getName() );
}
public String[] getSupportedServiceNames() {
String[] stringSupportedServiceNames = { __serviceName };
return( stringSupportedServiceNames );
}
}

View file

@ -87,9 +87,11 @@ public final class Application {
*/
public static final void main (String[] args){
try {
long time = System.currentTimeMillis();
Application app = new Application();
app.parseCommandLine(args);
app.doConversion();
System.out.println("Total conversion time was "+(System.currentTimeMillis()-time)+" miliseconds");
} catch (IllegalArgumentException ex) {
String msg = ex.getMessage();
showUsage(msg);

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2011 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2011-06-05)
* Version 1.4 (2012-03-21)
*
*/
@ -137,5 +137,17 @@ public interface Converter {
*/
public ConverterResult convert(File source, String sTargetFileName)
throws FileNotFoundException, IOException;
/** Convert a document
*
* @param dom a DOM tree representing the document as flat XML
* @param sTargetFileName the file name to use for the converted document
* (if the converted document is a compound document consisting consisting
* of several files, this name will be used for the master document)
* @return a <code>ConverterResult</code> containing the converted document
* @throws IOException if some exception occurs while reading the document
*/
public ConverterResult convert(org.w3c.dom.Document dom, String sTargetFileName)
throws IOException;
}

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-19)
* Version 1.4 (2012-03-27)
*
*/
@ -33,7 +33,7 @@ public class ConverterFactory {
// Version information
private static final String VERSION = "1.3.1";
private static final String DATE = "2012-03-19";
private static final String DATE = "2012-03-27";
/** Return the Writer2LaTeX version in the form
* (major version).(minor version).(patch level)<br/>

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2011 by Henrik Just
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2011-02-19)
* Version 1.4 (2012-03-23)
*
*/
@ -35,11 +35,11 @@ import writer2latex.api.GraphicConverter;
import writer2latex.api.Converter;
import writer2latex.api.ConverterResult;
import writer2latex.api.OutputFile;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.ImageLoader;
import writer2latex.office.MetaData;
import writer2latex.office.OfficeDocument;
import writer2latex.office.OfficeReader;
import writer2latex.xmerge.EmbeddedObject;
import writer2latex.xmerge.OfficeDocument;
/**<p>Abstract base implementation of <code>writer2latex.api.Converter</code></p>
*/
@ -93,8 +93,19 @@ public abstract class ConverterBase implements Converter {
public ConverterResult convert(InputStream is, String sTargetFileName) throws IOException {
// Read document
odDoc = new OfficeDocument("InFile");
odDoc = new OfficeDocument();
odDoc.read(is);
return convert(sTargetFileName);
}
public ConverterResult convert(org.w3c.dom.Document dom, String sTargetFileName) throws IOException {
// Read document
odDoc = new OfficeDocument();
odDoc.read(dom);
return convert(sTargetFileName);
}
private ConverterResult convert(String sTargetFileName) throws IOException {
ofr = new OfficeReader(odDoc,false);
metaData = new MetaData(odDoc);
imageLoader = new ImageLoader(odDoc,true);

View file

@ -33,12 +33,12 @@ import org.w3c.dom.Document;
import org.w3c.dom.Element;
//import org.w3c.dom.Node;
import writer2latex.xmerge.EmbeddedObject;
import writer2latex.xmerge.EmbeddedXMLObject;
import writer2latex.latex.util.BeforeAfter;
import writer2latex.latex.util.Context;
//import writer2latex.office.ImageLoader;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.EmbeddedXMLObject;
import writer2latex.office.MIMETypes;
import writer2latex.office.OfficeReader;
import writer2latex.office.StyleWithProperties;

View file

@ -34,13 +34,13 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
//import writer2latex.latex.i18n.I18n;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.EmbeddedXMLObject;
import writer2latex.office.MIMETypes;
import writer2latex.office.OfficeReader;
import writer2latex.office.TableReader;
import writer2latex.office.XMLString;
import writer2latex.util.Misc;
import writer2latex.xmerge.EmbeddedObject;
import writer2latex.xmerge.EmbeddedXMLObject;
/**
* This class converts mathml nodes to LaTeX.

View file

@ -0,0 +1,60 @@
/************************************************************************
*
* EmbeddedBinaryObject.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-26)
*
*/
package writer2latex.office;
import writer2latex.util.SimpleZipReader;
/**
* This class represents an embedded object with a binary representation in an ODF package document
*/
public class EmbeddedBinaryObject extends EmbeddedObject {
/** The object's binary representation. */
private byte[] objData = null;
/**
* Package private constructor for use when reading an object from a
* package ODF file
*
* @param name The name of the object.
* @param type The MIME-type of the object.
* @param source A <code>SimpleZipReader</code> containing the object
*/
protected EmbeddedBinaryObject(String sName, String sType, SimpleZipReader source) {
super(sName,sType);
objData = source.getEntry(sName);
}
/** Get the binary data for this object
*
* @return A <code>byte</code> array containing the object's data.
*/
public byte[] getBinaryData() {
return objData;
}
}

View file

@ -0,0 +1,61 @@
/************************************************************************
*
* EmbeddedObject.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-27)
*
*/
package writer2latex.office;
/** This class represents and embedded object within an ODF package document
*/
public abstract class EmbeddedObject {
private String sName;
private String sType;
/** Construct a new embedded object
*
* @param sName The name of the object.
* @param sType The MIME-type of the object.
*/
protected EmbeddedObject(String name, String type) {
sName = name;
sType = type;
}
/** Get the name of the embedded object represented by this instance.
* The name refers to the manifest.xml file
*
* @return The name of the object.
*/
public final String getName() {
return sName;
}
/** Get the MIME type of the embedded object represented by this instance.
* The MIME type refers to the manifest.xml file
*/
public final String getType() {
return sType;
}
}

View file

@ -0,0 +1,122 @@
/************************************************************************
*
* EmbeddedXMLObject.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-27)
*
*/
package writer2latex.office;
import java.io.IOException;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import writer2latex.util.SimpleZipReader;
/** This class represents those embedded objects in an ODF document that have an XML representation:
* Formulas, charts, spreadsheets, text, drawings and presentations.
* These object types are stored using a combination of content, settings and styles XML files.
*/
public class EmbeddedXMLObject extends EmbeddedObject {
// Byte entries for the XML streams of this object
private byte[] contentBytes = null;
private byte[] settingsBytes = null;
private byte[] stylesBytes = null;
// DOM trees representing the XML parts of this object
protected Document contentDOM = null;
protected Document settingsDOM = null;
protected Document stylesDOM = null;
/** Read an object from an ODF package document
*
* @param sName The name of the object.
* @param sType The MIME-type of the object.
* @param source A ZIP reader providing the contents of the package
*/
protected EmbeddedXMLObject(String sName, String sType, SimpleZipReader source) {
super(sName, sType);
// Read the bytes, but defer parsing until required (at that point, the bytes are nullified)
contentBytes = source.getEntry(sName+"/"+OfficeDocument.CONTENTXML);
settingsBytes = source.getEntry(sName+"/"+OfficeDocument.SETTINGSXML);
stylesBytes = source.getEntry(sName+"/"+OfficeDocument.STYLESXML);
}
/**
* Returns the content data for this embedded object.
*
* @return DOM representation of "content.xml"
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
public Document getContentDOM() throws SAXException, IOException {
if (contentDOM==null) {
contentDOM=getDOM(contentBytes);
contentBytes=null;
}
return contentDOM;
}
/**
* Returns the settings data for this embedded object.
*
* @return DOM representation of "settings.xml"
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
public Document getSettingsDOM() throws SAXException, IOException {
if (settingsDOM==null) {
settingsDOM=getDOM(settingsBytes);
settingsBytes=null;
}
return settingsDOM;
}
/**
* Returns the style data for this embedded object.
*
* @return DOM representation of "styles.xml"
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
public Document getStylesDOM() throws SAXException, IOException {
if (stylesDOM==null) {
stylesDOM = getDOM(stylesBytes);
stylesBytes=null;
}
return stylesDOM;
}
private Document getDOM(byte[] data) throws SAXException, IOException {
if (data!=null) {
return OfficeDocument.parse(data);
}
return null;
}
}

View file

@ -40,9 +40,6 @@ import writer2latex.api.GraphicConverter;
import writer2latex.util.Base64;
import writer2latex.util.Misc;
import writer2latex.xmerge.BinaryGraphicsDocument;
import writer2latex.xmerge.EmbeddedObject;
import writer2latex.xmerge.EmbeddedBinaryObject;
import writer2latex.xmerge.OfficeDocument;
//import writer2latex.util.*;

View file

@ -36,7 +36,6 @@ import org.w3c.dom.NodeList;
import writer2latex.util.*;
//import writer2latex.office.*;
import writer2latex.xmerge.OfficeDocument;
/**
* <p>This class represents the metadata of an OOo Writer document.</p>

View file

@ -0,0 +1,349 @@
/************************************************************************
*
* OfficeDocument.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-27)
*
*/
package writer2latex.office;
import java.io.BufferedInputStream;
import java.io.InputStream;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import writer2latex.util.SimpleSAXHandler;
import writer2latex.util.SimpleZipReader;
/**
* This class implements reading of ODF files from various sources
*/
public class OfficeDocument {
// File names for the XML streams in a package document
protected final static String CONTENTXML = "content.xml";
protected final static String STYLESXML = "styles.xml";
protected final static String SETTINGSXML = "settings.xml";
private final static String METAXML = "meta.xml";
private final static String MANIFESTXML = "META-INF/manifest.xml";
// Some tag and attribute names in manifest.xml
private final static String MANIFEST_FILE_ENTRY = "manifest:file-entry";
private final static String MANIFEST_MEDIA_TYPE = "manifest:media-type";
private final static String MANIFEST_FULL_PATH = "manifest:full-path";
/** DOM <code>Document</code> of content.xml. */
private Document contentDoc = null;
/** DOM <code>Document</code> of meta.xml. */
private Document metaDoc = null;
/** DOM <code>Document</code> of settings.xml. */
private Document settingsDoc = null;
/** DOM <code>Document</code> of content.xml. */
private Document styleDoc = null;
/** DOM <code>Document</code> of META-INF/manifest.xml. */
private Document manifestDoc = null;
/** <code>SimpleZipReader</code> to store the contents from the <code>InputStream</code>
* if the document is in package format (otherwise this will remain null)
*/
private SimpleZipReader zip = null;
/** Collection to keep track of the embedded objects in the document. */
private Map<String, EmbeddedObject> embeddedObjects = null;
/** Package or flat format?
* @return true if the document is in package format, false if it's flat XML
*/
public boolean isPackageFormat() {
return zip!=null;
}
/**
* Return a DOM <code>Document</code> object of the content.xml file.
* file. Note that a content DOM is not created when the constructor
* is called, but only after the <code>read</code> method has been invoked
*
* @return DOM <code>Document</code> object.
*/
public Document getContentDOM() {
return contentDoc;
}
/**
* Return a DOM <code>Document</code> object of the meta.xml
* file. Note that a meta DOM is not created when the constructor
* is called, but only after the <code>read</code> method has been invoked
*
* @return DOM <code>Document</code> object.
*/
public Document getMetaDOM() {
return metaDoc;
}
/**
* Return a DOM <code>Document</code> object of the settings.xml
* file. Note that a settings DOM is not created when the constructor
* is called, but only after the <code>read</code> method has been invoked
*
* @return DOM <code>Document</code> object.
*/
public Document getSettingsDOM() {
return settingsDoc;
}
/**
* Return a DOM <code>Document</code> object of the style.xml file.
* Note that a style DOM is not created when the constructor
* is called, but only after the <code>read</code> method has been invoked
*
* @return DOM <code>Document</code> object.
*/
public Document getStyleDOM() {
return styleDoc;
}
/**
* Returns all the embedded objects (graphics, formulae, etc.) present in
* this document. If the document is read from flat XML there will be no embedded objects.
*
* @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
*/
public Iterator<EmbeddedObject> getEmbeddedObjects() {
if (embeddedObjects == null) {
embeddedObjects = new HashMap<String, EmbeddedObject>();
if (manifestDoc != null) {
// Need to read the manifest file and construct a list of objects
NodeList nl = manifestDoc.getElementsByTagName(MANIFEST_FILE_ENTRY);
int nLen = nl.getLength();
for (int i = 0; i < nLen; i++) {
Element elm = (Element) nl.item(i);
String sType = elm.getAttribute(MANIFEST_MEDIA_TYPE);
String sPath = elm.getAttribute(MANIFEST_FULL_PATH);
/* According to the ODF spec there are only two types of embedded object:
* Objects with an XML representation.
* Objects without an XML representation.
* The former are represented by one or more XML files.
* The latter are in binary form.
*/
if (sType.startsWith("application/vnd.oasis.opendocument") || sType.startsWith("application/vnd.sun.xml")) {
// Allow either ODF or old OOo 1.x embedded objects
if (!sPath.equals("/")) { // Exclude the main document entries
if (sPath.endsWith("/")) { // Remove trailing slash
sPath=sPath.substring(0, sPath.length()-1);
}
embeddedObjects.put(sPath, new EmbeddedXMLObject(sPath, sType, zip));
}
}
else if (!sType.equals("text/xml")) {
// XML entries are either embedded ODF doc entries or main document entries, all other
// entries are included as binary objects
embeddedObjects.put(sPath, new EmbeddedBinaryObject(sPath, sType, zip));
}
}
}
}
return embeddedObjects.values().iterator();
}
/**
* Returns the embedded object corresponding to the name provided.
* The name should be stripped of any preceding path characters, such as
* '/', '.' or '#'.
*
* @param sName The name of the embedded object to retrieve.
*
* @return An <code>EmbeddedObject</code> instance representing the named
* object.
*/
public EmbeddedObject getEmbeddedObject(String sName) {
if (sName == null) {
return null;
}
getEmbeddedObjects();
if (embeddedObjects.containsKey(sName)) {
return embeddedObjects.get(sName);
}
return null;
}
/**
* Read the document from a DOM tree (flat XML format)
*
* @param dom the DOM tree
*/
public void read(org.w3c.dom.Document dom) {
contentDoc = dom;
styleDoc = null;
settingsDoc = null;
metaDoc = null;
manifestDoc = null;
zip=null;
embeddedObjects = null;
}
/**
* Read the Office <code>Document</code> from the given
* <code>InputStream</code>.
* Performs simple type detection to determine package or flat format
*
* @param is Office document <code>InputStream</code>.
*
* @throws IOException If any I/O error occurs.
*/
public void read(InputStream is) throws IOException {
// We need to read 4 bytes ahead to detect flat or zip format
BufferedInputStream inbuf = new BufferedInputStream(is);
byte[] bytes = new byte[4];
inbuf.mark(4);
inbuf.read(bytes);
inbuf.reset();
boolean bZip = MIMETypes.ZIP.equals(MIMETypes.getMagicMIMEType(bytes));
if (bZip) {
readZip(inbuf);
}
else {
readFlat(inbuf);
}
}
private void readZip(InputStream is) throws IOException {
zip = new SimpleZipReader();
zip.read(is);
byte contentBytes[] = zip.getEntry(CONTENTXML);
if (contentBytes == null) {
throw new IOException("Entry content.xml not found in file");
}
try {
contentDoc = parse(contentBytes);
} catch (SAXException ex) {
throw new IOException(ex);
}
byte styleBytes[] = zip.getEntry(STYLESXML);
if (styleBytes != null) {
try {
styleDoc = parse(styleBytes);
} catch (SAXException ex) {
throw new IOException(ex);
}
}
byte metaBytes[] = zip.getEntry(METAXML);
if (metaBytes != null) {
try {
metaDoc = parse(metaBytes);
} catch (SAXException ex) {
throw new IOException(ex);
}
}
byte settingsBytes[] = zip.getEntry(SETTINGSXML);
if (settingsBytes != null) {
try {
settingsDoc = parse(settingsBytes);
} catch (SAXException ex) {
throw new IOException(ex);
}
}
byte manifestBytes[] = zip.getEntry(MANIFESTXML);
if (manifestBytes != null) {
try {
manifestDoc = parse(manifestBytes);
} catch (SAXException ex) {
throw new IOException(ex);
}
}
}
private void readFlat(InputStream is) throws IOException {
SAXParserFactory factory=SAXParserFactory.newInstance();
SimpleSAXHandler handler = new SimpleSAXHandler();
try {
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(is,handler);
}
catch (SAXException e){
System.err.println("Oops - Error parsing document");
e.printStackTrace();
}
catch (ParserConfigurationException e) {
System.err.println("Oops - failed to get XML parser!?");
e.printStackTrace();
}
contentDoc = handler.getDOM();
styleDoc = null;
settingsDoc = null;
metaDoc = null;
manifestDoc = null;
zip=null;
embeddedObjects = null;
}
/**
* Parse given <code>byte</code> array into a DOM
* <code>Document</code> object using the
* <code>DocumentBuilder</code> object.
*
* @param builder <code>DocumentBuilder</code> object for parsing.
* @param bytes <code>byte</code> array for parsing.
*
* @return Resulting DOM <code>Document</code> object.
*
* @throws SAXException If any parsing error occurs.
*/
static Document parse(byte bytes[]) throws SAXException, IOException {
SAXParserFactory factory=SAXParserFactory.newInstance();
SimpleSAXHandler handler = new SimpleSAXHandler();
try {
SAXParser saxParser = factory.newSAXParser();
saxParser.parse(new ByteArrayInputStream(bytes),handler);
return handler.getDOM();
}
catch (ParserConfigurationException e) {
System.err.println("Oops - failed to get XML parser!?");
e.printStackTrace();
}
return null;
}
}

View file

@ -37,7 +37,6 @@ import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Document;
import writer2latex.xmerge.OfficeDocument;
import writer2latex.util.Misc;
/** <p> This class reads and collects global information about an OOo document.

View file

@ -0,0 +1,123 @@
/************************************************************************
*
* SimpleDOMBuilder.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-22)
*
*/
package writer2latex.util;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.DOMImplementation;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
/** This class provides a simple way to create and populate a DOM tree in logical order
*/
public class SimpleDOMBuilder {
private Document dom=null;
private Element currentElement=null;
/**
* Append an element to the current element and set this new element to be the current element.
* If there is no current element, a new DOM tree will be created (discarding the current DOM tree if any)
* with the new element as the document element.
*
* @param sTagName
* @return true on success
*/
public boolean startElement(String sTagName) {
if (currentElement!=null) {
currentElement = (Element) currentElement.appendChild(dom.createElement(sTagName));
}
else {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = factory.newDocumentBuilder();
DOMImplementation domImpl = builder.getDOMImplementation();
DocumentType doctype = domImpl.createDocumentType(sTagName, "", "");
dom = domImpl.createDocument("",sTagName,doctype);
currentElement = dom.getDocumentElement();
} catch (ParserConfigurationException e) {
return false;
}
}
return true;
}
/**
* Set the current element to the parent of the current element
* @return true on success, false if there is no current element to end
*/
public boolean endElement() {
if (currentElement!=null) {
if (currentElement!=dom.getDocumentElement()) {
currentElement=(Element) currentElement.getParentNode();
}
else { // Back at document element: Finished populating the DOM tree
currentElement=null;
}
return true;
}
return false;
}
/**
* Set an attribute of the current element
* @param sName
* @param sValue
* @return true on success, false if there is no current element
*/
public boolean setAttribute(String sName,String sValue) {
if (currentElement!=null) {
currentElement.setAttribute(sName, sValue);
return true;
}
return false;
}
/**
* Add characters to the currentElement
* @param sText
* @return true on success, false if there is no current element
*/
public boolean characters(String sText) {
if (currentElement!=null) {
currentElement.appendChild(dom.createTextNode(sText));
return true;
}
return false;
}
/**
* Get the DOM tree
*
* @return the DOM tree, or null if none has been created
*/
public Document getDOM() {
return dom;
}
}

View file

@ -0,0 +1,62 @@
/************************************************************************
*
* SimpleSAXHandler.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-23)
*
*/
package writer2latex.util;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/** A simple SAX handler which transforms the SAX events into a DOM tree
* (supporting element and text nodes only)
*/
public class SimpleSAXHandler extends DefaultHandler {
private SimpleDOMBuilder builder = new SimpleDOMBuilder();
public org.w3c.dom.Document getDOM() {
return builder.getDOM();
}
@Override public void startElement(String nameSpace, String localName, String qName, Attributes attributes){
builder.startElement(qName);
int nLen = attributes.getLength();
for (int i=0;i<nLen;i++) {
builder.setAttribute(attributes.getQName(i), attributes.getValue(i));
}
}
@Override public void endElement(String nameSpace, String localName, String qName){
builder.endElement();
}
@Override public void characters(char[] characters, int nStart, int nEnd) throws SAXException {
builder.characters(new String(characters,nStart,nEnd));
}
}

View file

@ -0,0 +1,84 @@
/************************************************************************
*
* SimpleZipReader.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-27)
*
*/
package writer2latex.util;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
public class SimpleZipReader {
private final static int BUFFERSIZE = 1024;
private Map<String,byte[]> entries = new HashMap<String,byte[]>();
/** Read a zipped stream
*
* @param is <code>InputStream</code> to read
*
* @throws IOException if an I/O error occurs
*/
public void read(InputStream is) throws IOException {
ZipInputStream zis = new ZipInputStream(is);
ZipEntry entry = null;
while ((entry=zis.getNextEntry())!=null) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int nLen = 0;
byte buffer[] = new byte[BUFFERSIZE];
while ((nLen = zis.read(buffer)) > 0) {
baos.write(buffer, 0, nLen);
}
byte bytes[] = baos.toByteArray();
entries.put(entry.getName(), bytes);
}
zis.close();
}
/** Get an entry from the ZIP file. Getting should be taken quite literally here:
* You can only get an entry once: The <code>SimpleZipReader</code> removes the entry from the
* collection when this method is called (memory optimization).
*
* @param name the name (path) of the ZIP entry
*
* @return a byte array with the contents of the entry, or null if the entry does not exist
*/
public byte[] getEntry(String sName) {
if (entries.containsKey(sName)) {
byte[] bytes = entries.get(sName);
entries.remove(sName);
return bytes;
}
return null;
}
}

View file

@ -52,13 +52,13 @@ import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
import writer2latex.xmerge.EmbeddedObject;
//import writer2latex.xmerge.EmbeddedBinaryObject;
import writer2latex.xmerge.EmbeddedXMLObject;
import writer2latex.util.Misc;
import writer2latex.util.CSVList;
import writer2latex.xmerge.BinaryGraphicsDocument;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.EmbeddedXMLObject;
import writer2latex.office.XMLString;
import writer2latex.office.MIMETypes;
import writer2latex.office.StyleWithProperties;

View file

@ -1,109 +0,0 @@
/************************************************************************
*
* The Contents of this file are made available subject to the terms of
* either of the following licenses
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version is adapted for Writer2LaTeX
// Version 1.4 (2012-03-19)
package writer2latex.xmerge;
/**
* This class represents embedded object's in an OpenOffice.org document that
* have a binary representation.
*/
public class EmbeddedBinaryObject extends EmbeddedObject {
/** The object's binary representation. */
protected byte[] objData = null;
/**
* Constructor for an embedded object stored using an XML representation.
*
* @param name The name of the object.
* @param type The mime-type of the object. See the class summary.
*/
public EmbeddedBinaryObject(String name, String type) {
super(name, type);
}
/**
* Package private constructor for use when reading an object from a
* compressed SX? file.
*
* @param name The name of the object.
* @param type The mime-type of the object. See the class summary.
* @param source The OfficeZip representation of the SX? file that stores
* the object.
*/
EmbeddedBinaryObject(String name, String type, OfficeZip source) {
super(name, type, source);
}
/**
* This method returns the data for this object.
*
* @return A <code>byte</code> array containing the object's data.
*/
public byte[] getBinaryData() {
if (objData == null) {
// See if we came from a Zip file
if (zipFile != null) {
objData = zipFile.getNamedBytes(objName);
}
}
return objData;
}
/**
* Sets the data for this object.
*
* @param data A <code>byte</code> array containing data for the object.
*/
/*public void setBinaryData(byte[] data) {
objData = data;
hasChanged = true;
}*/
}

View file

@ -1,108 +0,0 @@
/************************************************************************
*
* The Contents of this file are made available subject to the terms of
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version is adapted for Writer2LaTeX
// Version 1.4 (2012-03-19)
package writer2latex.xmerge;
public abstract class EmbeddedObject {
protected String objName;
protected String objType;
/** Representation of the file from which this object was read. */
protected OfficeZip zipFile = null;
/** Flag indicating if this document has changed since reading or is new. */
protected boolean hasChanged = false;
/**
* Constructor for an embedded object stored using an XML representation.
*
* @param name The name of the object.
* @param type The mime-type of the object. See the class summary.
*/
public EmbeddedObject(String name, String type) {
objName = name;
objType = type;
hasChanged = true;
}
/**
* Package private constructor for use when reading an object from a
* compressed SX? file.
*
* @param name The name of the object.
* @param type The mime-type of the object. See the class summary.
* @param source The OfficeZip representation of the SX? file that stores
* the object.
*/
EmbeddedObject(String name, String type, OfficeZip source) {
this(name, type);
zipFile = source;
}
/**
* Retrieves the name of the embedded object represented by an instance of
* this class.
*
* <b>N.B.</b>The name referes to the name as found in the
* <code>META-INF/manifest.xml</code> file.
*
* @return The name of the object.
*/
public final String getName() {
return objName;
}
/**
* Retrieves the type of the embedded object represented by an instance of
* this class.
*
* The <code>META-INF/manifest.xml</code> file currently represents the
* type of an object using MIME types.
*/
public final String getType() {
return objType;
}
}

View file

@ -1,239 +0,0 @@
/************************************************************************
*
* The Contents of this file are made available subject to the terms of
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version is adapted for Writer2LaTeX
// Version 1.4 (2012-03-19)
package writer2latex.xmerge;
//import java.io.ByteArrayInputStream;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
//import org.xml.sax.EntityResolver;
//import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* This class represents those embedded objects in an OpenOffice.org document
* that have an XML representation. Currently, according to the OpenOffice.org
* File Format 1.0 document, there are 6 such objects:
*
* Formulae created with Math (application/vnd.sun.xml.math)
* Charts created with Chart (application/vnd.sun.xml.chart)
* Spreadsheets created with Calc (application/vnd.sun.xml.calc)
* Text created with Writer (application/vnd.sun.xml.writer)
* Drawings created with Draw (application/vnd.sun.xml.draw)
* Presentations created with Impress (application/vnd.sun.xml.impress)
*
* These object types are stored using a combination of content, settings and styles
* XML files.
*/
public class EmbeddedXMLObject extends EmbeddedObject {
// Entries for the subdocuments that constitute this object;
protected Document contentDOM = null;
protected Document settingsDOM = null;
protected Document stylesDOM = null;
private DocumentBuilder builder = null;
/**
* Constructor for an embedded object stored using an XML representation.
*
* @param name The name of the object.
* @param type The mime-type of the object. See the class summary.
*/
public EmbeddedXMLObject(String name, String type) {
super(name, type);
}
/**
* Package private constructor for use when reading an object from a
* compressed SX? file.
*
* @param name The name of the object.
* @param type The mime-type of the object. See the class summary.
* @param source The OfficeZip representation of the SX? file that stores
* the object.
*/
EmbeddedXMLObject(String name, String type, OfficeZip source) {
super(name, type, source);
}
/**
* Returns the content data for this embedded object.
*
* @return DOM represenation of "content.xml"
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
public Document getContentDOM() throws SAXException, IOException {
if (contentDOM == null) {
contentDOM = getNamedDOM("content.xml");
}
return contentDOM;
}
/**
* Sets the content data for the embedded object.
*
* @param content DOM representation of the object's content.
*/
/*public void setContentDOM(Document content) {
contentDOM = content;
hasChanged = true;
}*/
/**
* Returns the settings data for this embedded object.
*
* @return DOM represenation of "settings.xml"
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
public Document getSettingsDOM() throws SAXException, IOException {
if (settingsDOM == null) {
settingsDOM = getNamedDOM("settings.xml");
}
return settingsDOM;
}
/**
* Sets the settings data for the embedded object.
*
* @param settings DOM representation of the object's settings.
*/
/*public void setSettingsDOM(Document settings) {
settingsDOM = settings;
hasChanged = true;
}*/
/**
* Returns the style data for this embedded object.
*
* @return DOM represenation of "styles.xml"
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
public Document getStylesDOM() throws SAXException, IOException {
if (stylesDOM == null) {
stylesDOM = getNamedDOM("styles.xml");
}
return stylesDOM;
}
/**
* Sets the styles data for the embedded object.
*
* @param styles DOM representation of the object's styles.
*/
/*public void setStylesDOM(Document styles) {
stylesDOM = styles;
hasChanged = true;
}*/
/**
* This method extracts the data for the given XML file from the SX? file
* and creates a DOM representation of it.
*
* @param name The name of the XML file to retrieve. It is paired with
* the object name to access the SX? file.
*
* @return DOM representation of the named XML file.
*
* @throws SAXException If any parser error occurs
* @throws IOException If any IO error occurs
*/
private Document getNamedDOM(String name) throws SAXException, IOException {
if (zipFile == null) {
return null;
}
try {
if (builder == null) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
builder = factory.newDocumentBuilder();
}
byte[] data = zipFile.getNamedBytes(new String(objName + "/" + name));
if (data != null) {
return OfficeDocument.parse(builder, data);
}
else {
return null;
}
}
catch (SAXException se) {
throw se;
}
catch (IOException ioe) {
throw ioe;
}
catch (ParserConfigurationException pce) {
throw new SAXException(pce);
}
}
}

View file

@ -1,455 +0,0 @@
/************************************************************************
*
* The Contents of this file are made available subject to the terms of
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version has been adapted for Writer2LaTeX
package writer2latex.xmerge;
/**
* This interface contains constants for StarOffice XML tags,
* attributes (StarCalc cell types, etc.).
*
* @author Herbie Ong, Paul Rank, Martin Maher
*/
public interface OfficeConstants {
/** Element tag for <i>office:document</i>, this is the root tag. */
public final static String TAG_OFFICE_DOCUMENT = "office:document";
/**
* Element tag for <i>office:document-content</i>, this is the root
* tag in content.xml.
*/
public final static String TAG_OFFICE_DOCUMENT_CONTENT = "office:document-content";
/**
* Element tag for <i>office:document-settings</i>, this is the root
* tag in content.xml.
*/
public final static String TAG_OFFICE_DOCUMENT_SETTINGS= "office:document-settings";
/**
* Element tag for <i>office:document-meta</i>, this is the root
* tag in content.xml.
*/
public final static String TAG_OFFICE_DOCUMENT_META= "office:document-meta";
/**
* Element tag for <i>office:document-styles</i>, this is the root tag
* in styles.xml.
*/
public final static String TAG_OFFICE_DOCUMENT_STYLES = "office:document-styles";
/**
* Attribute tag for <i>office:class</i> of element
* <i>office:document</i>.
*/
public final static String ATTRIBUTE_OFFICE_CLASS = "office:class";
/** Element tag for <i>office:styles</i>. */
public final static String TAG_OFFICE_STYLES = "office:styles";
/** Element tag for <i>office:meta</i>. */
public final static String TAG_OFFICE_META = "office:meta";
/** Element tag for <i>office:automatic-styles</i>. */
public final static String TAG_OFFICE_AUTOMATIC_STYLES = "office:automatic-styles";
/** Element tag for <i>office:master-styles</i>. */
public final static String TAG_OFFICE_MASTER_STYLES = "office:master-styles";
/** Element tag for <i>office:body</i>. */
public final static String TAG_OFFICE_BODY = "office:body";
/** Element tag for <i>office:font-face-decls</i>. */
public final static String TAG_OFFICE_FONT_FACE_DECLS = "office:font-face-decls";
/** Element tag for <i>office:settings</i>. */
public final static String TAG_OFFICE_SETTINGS = "office:settings";
//Adding
/** Element tag for <i>text:variable-set</i>. */
public final static String TAG_TEXT_VARIABLE_SET = "text:variable-set";
/** Element tag for <i>text:variable-get</i>. */
public final static String TAG_TEXT_VARIABLE_GET = "text:variable-get";
/** Element tag for <i>text:expression</i>. */
public final static String TAG_TEXT_EXPRESSION = "text:expression";
/** Element tag for <i>text:user-field-get</i>. */
public final static String TAG_TEXT_USER_FIELD_GET = "text:user-field-get";
/** Element tag for <i>text:page-variable-get</i>. */
public final static String TAG_TEXT_PAGE_VARIABLE_GET = "text:page-variable-get";
/** Element tag for <i>text:sequence</i>. */
public final static String TAG_TEXT_SEQUENCE = "text:sequence";
/** Element tag for <i>text:text-input</i>. */
public final static String TAG_TEXT_VARIABLE_INPUT = "text:variable-input";
/** Element tag for <i>text:time</i>. */
public final static String TAG_TEXT_TIME = "text:time";
/** Element tag for <i>text:page-count</i>. */
public final static String TAG_TEXT_PAGE_COUNT = "text:page-count";
/** Element tag for <i>text:page-number</i>. */
public final static String TAG_TEXT_PAGE_NUMBER = "text:page-number";
/** Element tag for <i>text:author-initials</i>. */
public final static String TAG_TEXT_AUTHOR_INITIALS = "text:author-initials";
/** Element tag for <i>text:subject</i>. */
public final static String TAG_TEXT_SUBJECT = "text:subject";
/** Element tag for <i>text:title</i>. */
public final static String TAG_TEXT_TITLE = "text:title";
/** Element tag for <i>text:creation-time</i>. */
public final static String TAG_TEXT_CREATION_TIME = "text:creation-time";
/** Element tag for <i>text:date</i>. */
public final static String TAG_TEXT_DATE = "text:date";
/** Element tag for <i>text:text-input</i>. */
public final static String TAG_TEXT_TEXT_INPUT = "text:text-input";
//end adding
/** Element tag for <i>office:font-decls</i>. */
public final static String TAG_OFFICE_FONT_DECLS = "office:font-decls";
/** Element tag for <i>style:font-decl</i>. */
public final static String TAG_STYLE_FONT_DECL = "style:font-decl";
/** Attribute tag for <i>style:name</i> of element <i>style:name</i>. */
public final static String ATTRIBUTE_STYLE_NAME = "style:name";
/**
* Attribute tag for <i>style:font-pitch</i> of element
* <i>style:font-pitch</i>.
*/
public final static String ATTRIBUTE_STYLE_FONT_PITCH = "style:font-pitch";
/**
* Attribute tag for <i>fo:font-family</i> of element
* <i>fo:font-family</i>.
*/
public final static String ATTRIBUTE_FO_FONT_FAMILY = "fo:font-family";
/**
* Attribute tag for <i>fo:font-family</i> of element
* <i>fo:font-family</i>.
*/
public final static String ATTRIBUTE_FO_FONT_FAMILY_GENERIC = "fo:font-family-generic";
/** Element tag for <i>text:p</i>. */
public final static String TAG_PARAGRAPH = "text:p";
/** Element tag for <i>text:</i>. */
public final static String TAG_TEXT = "text:";
/** Element tag for <i>text:h</i>. */
public final static String TAG_HEADING = "text:h";
/** Element tag for <i>text:s</i>. */
public final static String TAG_SPACE = "text:s";
/** Element tag for <i>text:tab-stop</i>. */
public final static String TAG_TAB_STOP = "text:tab-stop";
/** Element tag for <i>text:line-break</i>. */
public final static String TAG_LINE_BREAK = "text:line-break";
/** Element tag for <i>text:span</i>. */
public final static String TAG_SPAN = "text:span";
/** Element tag for <i>text:a</i>. */
public final static String TAG_HYPERLINK = "text:a";
/** Element tag for <i>text:bookmark</i>. */
public final static String TAG_BOOKMARK = "text:bookmark";
/** Element tag for <i>text:bookmark-start</i>. */
public final static String TAG_BOOKMARK_START = "text:bookmark-start";
/** Element tag for <i>text:unordered-list</i>. */
public final static String TAG_UNORDERED_LIST = "text:unordered-list";
/** Element tag for <i>text:ordered-list</i>. */
public final static String TAG_ORDERED_LIST = "text:ordered-list";
/** Element tag for <i>text:list-header</i>. */
public final static String TAG_LIST_HEADER = "text:list-header";
/** Element tag for <i>text:list-item</i>. */
public final static String TAG_LIST_ITEM = "text:list-item";
/** Attribute tag for <i>text:c</i> of element <i>text:s</i>. */
public final static String ATTRIBUTE_SPACE_COUNT = "text:c";
/**
* Attribute tag for <i>text:style-name</i> of element
* <i>text:style-name</i>.
*/
public final static String ATTRIBUTE_TEXT_STYLE_NAME = "text:style-name";
/** Element tag for <i>table:table</i>. */
public final static String TAG_TABLE = "table:table";
/** Element tag for <i>table:named-expression</i>. */
public final static String TAG_NAMED_EXPRESSIONS = "table:named-expressions";
/** Element tag for <i>table:named-range</i>. */
public final static String TAG_TABLE_NAMED_RANGE= "table:named-range";
/** Element tag for <i>table:named-expression</i>. */
public final static String TAG_TABLE_NAMED_EXPRESSION= "table:named-expression";
/**
* Attribute tag for <i>table:name</i> of element
* <i>table:table</i>.
*/
public final static String ATTRIBUTE_TABLE_NAME = "table:name";
/**
* Attribute tag for <i>table:expression</i> of element
* <i>table:named-range</i>.
*/
public final static String ATTRIBUTE_TABLE_EXPRESSION = "table:expression";
/**
* Attribute tag for <i>table:base-cell-address</i> of element
* <i>table:named-range</i>.
*/
public final static String ATTRIBUTE_TABLE_BASE_CELL_ADDRESS = "table:base-cell-address";
/**
* Attribute tag for <i>table:cell-range-address</i> of element
* <i>table:named-range</i>.
*/
public final static String ATTRIBUTE_TABLE_CELL_RANGE_ADDRESS = "table:cell-range-address";
/** Element tag for <i>table:table-row</i>. */
public final static String TAG_TABLE_ROW = "table:table-row";
/** Element tag for <i>table:table-column</i>. */
public final static String TAG_TABLE_COLUMN = "table:table-column";
/**
* Attribute tag for <i>table:default-cell-style-name</i>
* of element <i>table:table-column</i>.
*/
public final static String ATTRIBUTE_DEFAULT_CELL_STYLE = "table:default-cell-style-name";
/** Element tag for <i>table:scenario</i>. */
public final static String TAG_TABLE_SCENARIO = "table:scenario";
/** Element tag for <i>table:table-cell</i>. */
public final static String TAG_TABLE_CELL = "table:table-cell";
/**
* Attribute tag for <i>table:value-type</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_VALUE_TYPE = "table:value-type";
/**
* Attribute tag for <i>table:number-columns-repeated</i>
* of element <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_NUM_COLUMNS_REPEATED =
"table:number-columns-repeated";
/**
* Attribute tag for <i>table:number-rows-repeated</i>
* of element <i>table:table-row</i>.
*/
public final static String ATTRIBUTE_TABLE_NUM_ROWS_REPEATED =
"table:number-rows-repeated";
/**
* Attribute tag for <i>table:formula</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_FORMULA = "table:formula";
/**
* Attribute tag for <i>table:value</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_VALUE = "table:value";
/**
* Attribute tag for <i>table:date-value</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_DATE_VALUE = "table:date-value";
/**
* Attribute tag for <i>table:time-value</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_TIME_VALUE = "table:time-value";
/**
* Attribute tag for <i>table:string-value</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_STRING_VALUE =
"table:string-value";
/**
* Attribute tag for <i>table:time-boolean-value</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_BOOLEAN_VALUE =
"table:boolean-value";
/** Attribute tag for <i>table:style-name</i> of table elements. */
public final static String ATTRIBUTE_TABLE_STYLE_NAME = "table:style-name";
/**
* Attribute tag for <i>table:currency</i> of element
* <i>table:table-cell</i>.
*/
public final static String ATTRIBUTE_TABLE_CURRENCY = "table:currency";
/** The cell contains data of type <i>string</i>. */
public final static String CELLTYPE_STRING = "string";
/** The cell contains data of type <i>float</i>. */
public final static String CELLTYPE_FLOAT = "float";
/** The cell contains data of type <i>time</i>. */
public final static String CELLTYPE_TIME = "time";
/** The cell contains data of type <i>date</i>. */
public final static String CELLTYPE_DATE = "date";
/** The cell contains data of type <i>currency</i>. */
public final static String CELLTYPE_CURRENCY = "currency";
/** The cell contains data of type <i>boolean</i>. */
public final static String CELLTYPE_BOOLEAN = "boolean";
/** The cell contains data of type <i>percent</i>. */
public final static String CELLTYPE_PERCENT = "percentage";
/** StarWriter XML file extension. */
public final static String SXW_FILE_EXTENSION = ".sxw";
/** StarWriter XML <i>office:class</i> value. */
public final static String SXW_TYPE = "text";
/** StarCalc XML file extension. */
public final static String SXC_FILE_EXTENSION = ".sxc";
/** StarCalc XML <i>office:class</i> value. */
public final static String SXC_TYPE = "spreadsheet";
/** Element tag for <i>manifest:manifest</i>entry in Manifest XML */
public final static String TAG_MANIFEST_ROOT = "manifest:manifest";
/** Element tag for <i>manifest:file-entry</i> entry in Manifest XML. */
public final static String TAG_MANIFEST_FILE = "manifest:file-entry";
/**
* Attribute tag for <i>manifest:media-type</i> of element
* <i>manifest:file-entry</i>.
*/
public final static String ATTRIBUTE_MANIFEST_FILE_TYPE = "manifest:media-type";
/**
* Attribute tag for <i>manifest:full-path</i> of element
* <i>manifest:file-entry</i>.
*/
public final static String ATTRIBUTE_MANIFEST_FILE_PATH = "manifest:full-path";
// Tags and Elements for the settings.xml
/** Element tag for <i>config:config-item</i>. */
public final static String TAG_CONFIG_ITEM = "config:config-item";
/** Element tag for <i>config:config-item-set</i>. */
public final static String TAG_CONFIG_ITEM_SET = "config:config-item-set";
/** Element tag for <i>config:config-item-map-indexed</i>. */
public final static String TAG_CONFIG_ITEM_MAP_INDEXED = "config:config-item-map-indexed";
/** Element tag for <i>config:config-item-map-named</i>. */
public final static String TAG_CONFIG_ITEM_MAP_NAMED = "config:config-item-map-named";
/** Element tag for <i>config:config-item-map-entry</i>. */
public final static String TAG_CONFIG_ITEM_MAP_ENTRY= "config:config-item-map-entry";
/**
* Attribute tag for <i>config:name</i> of element
* <i>config:config-item</i>.
*/
public final static String ATTRIBUTE_CONFIG_NAME = "config:name";
/**
* Attribute tag for <i>config:type</i> of element
* <i>config:config-item</i>.
*/
public final static String ATTRIBUTE_CONFIG_TYPE = "config:type";
/** StarWriter XML MIME type. */
public final static String SXW_MIME_TYPE = "application/vnd.sun.xml.writer";
/** StarWriter XML Template MIME type. */
public final static String STW_MIME_TYPE = "application/vnd.sun.xml.writer.template";
/** StarCalc XML MIME type. */
public final static String SXC_MIME_TYPE = "application/vnd.sun.xml.calc";
/** StarCalc XML Template MIME type. */
public final static String STC_MIME_TYPE = "application/vnd.sun.xml.calc.template";
/** StarImpress XML MIME type. */
public final static String SXI_MIME_TYPE = "application/vnd.sun.xml.impress";
/** StarImpress XML Template MIME type. */
public final static String STI_MIME_TYPE = "application/vnd.sun.xml.impress.template";
/** StarDraw XML MIME type. */
public final static String SXD_MIME_TYPE = "application/vnd.sun.xml.draw";
/** StarMath XML MIME type. */
public final static String SXM_MIME_TYPE = "application/vnd.sun.xml.math";
/** StarWriter Global XML MIME Type */
public final static String SXG_MIME_TYPE = "application/vnd.sun.xml.writer.global";
}

View file

@ -1,824 +0,0 @@
/************************************************************************
*
* The Contents of this file are made available subject to the terms of
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version is adapted for Writer2LaTeX
// Version 1.4 (2012-03-19)
package writer2latex.xmerge;
import java.io.InputStream;
import java.io.Reader;
import java.io.BufferedReader;
import java.io.StringReader;
import java.io.InputStreamReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.w3c.dom.NamedNodeMap;
import org.xml.sax.SAXException;
import writer2latex.office.MIMETypes;
import writer2latex.util.Misc;
/**
* This class implements reading of ODF files
*/
public class OfficeDocument
implements OfficeConstants {
/** Factory for <code>DocumentBuilder</code> objects. */
private static DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
/** DOM <code>Document</code> of content.xml. */
private Document contentDoc = null;
/** DOM <code>Document</code> of meta.xml. */
private Document metaDoc = null;
/** DOM <code>Document</code> of settings.xml. */
private Document settingsDoc = null;
/** DOM <code>Document</code> of content.xml. */
private Document styleDoc = null;
/** DOM <code>Document</code> of META-INF/manifest.xml. */
private Document manifestDoc = null;
private String documentName = null;
private String fileName = null;
/**
* <code>OfficeZip</code> object to store zip contents from
* read <code>InputStream</code>. Note that this member
* will still be null if it was initialized using a template
* file instead of reading from a StarOffice zipped
* XML file.
*/
private OfficeZip zip = null;
/** Collection to keep track of the embedded objects in the document. */
private Map<String, EmbeddedObject> embeddedObjects = null;
/**
* Default constructor.
*
* @param name <code>Document</code> name.
*/
public OfficeDocument(String name)
{
this(name, true, false);
}
/**
* Constructor with arguments to set <code>namespaceAware</code>
* and <code>validating</code> flags.
*
* @param name <code>Document</code> name (may or may not
* contain extension).
* @param namespaceAware Value for <code>namespaceAware</code> flag.
* @param validating Value for <code>validating</code> flag.
*/
public OfficeDocument(String name, boolean namespaceAware, boolean validating) {
//res = Resources.getInstance();
factory.setValidating(validating);
factory.setNamespaceAware(namespaceAware);
this.documentName = trimDocumentName(name);
this.fileName = documentName + getFileExtension();
}
/**
* Removes the file extension from the <code>Document</code>
* name.
*
* @param name Full <code>Document</code> name with extension.
*
* @return Name of <code>Document</code> without the extension.
*/
private String trimDocumentName(String name) {
String temp = name.toLowerCase();
String ext = getFileExtension();
if (temp.endsWith(ext)) {
// strip the extension
int nlen = name.length();
int endIndex = nlen - ext.length();
name = name.substring(0,endIndex);
}
return name;
}
// FIX2 (HJ): Determine wether this is package or flat format
/** Package or flat format?
* @return true if the document is in package format, false if it's flat xml
*/
public boolean isPackageFormat() { return zip!=null; }
/**
* Return a DOM <code>Document</code> object of the content.xml
* file. Note that a content DOM is not created when the constructor
* is called. So, either the <code>read</code> method or the
* <code>initContentDOM</code> method will need to be called ahead
* on this object before calling this method.
*
* @return DOM <code>Document</code> object.
*/
public Document getContentDOM() {
return contentDoc;
}
/**
* Return a DOM <code>Document</code> object of the meta.xml
* file. Note that a content DOM is not created when the constructor
* is called. So, either the <code>read</code> method or the
* <code>initContentDOM</code> method will need to be called ahead
* on this object before calling this method.
*
* @return DOM <code>Document</code> object.
*/
public Document getMetaDOM() {
return metaDoc;
}
/**
* Return a DOM <code>Document</code> object of the settings.xml
* file. Note that a content DOM is not created when the constructor
* is called. So, either the <code>read</code> method or the
* <code>initContentDOM</code> method will need to be called ahead
* on this object before calling this method.
*
* @return DOM <code>Document</code> object.
*/
public Document getSettingsDOM() {
return settingsDoc;
}
/**
* Return a DOM <code>Document</code> object of the style.xml file.
* Note that this may return null if there is no style DOM.
* Note that a style DOM is not created when the constructor
* is called. Depending on the <code>InputStream</code>, a
* <code>read</code> method may or may not build a style DOM. When
* creating a new style DOM, call the <code>initStyleDOM</code> method
* first.
*
* @return DOM <code>Document</code> object.
*/
public Document getStyleDOM() {
return styleDoc;
}
/**
* Return the name of the <code>Document</code>.
*
* @return The name of <code>Document</code>.
*/
public String getName() {
return documentName;
}
/**
* Return the file name of the <code>Document</code>, possibly
* with the standard extension.
*
* @return The file name of <code>Document</code>.
*/
public String getFileName() {
return fileName;
}
/**
* Returns the file extension for this type of
* <code>Document</code>.
*
* @return The file extension of <code>Document</code>.
*/
// TODO: is this used?
protected String getFileExtension() { return ""; }
/**
* Returns all the embedded objects (graphics, formulae, etc.) present in
* this document.
*
* @return An <code>Iterator</code> of <code>EmbeddedObject</code> objects.
*/
public Iterator<EmbeddedObject> getEmbeddedObjects() {
if (embeddedObjects == null && manifestDoc != null) {
embeddedObjects = new HashMap<String, EmbeddedObject>();
// Need to read the manifest file and construct a list of objects
NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE);
// Dont create the HashMap if there are no embedded objects
int len = nl.getLength();
for (int i = 0; i < len; i++) {
Node n = nl.item(i);
NamedNodeMap attrs = n.getAttributes();
String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue();
String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue();
/*
* According to OpenOffice.org XML File Format document (ver. 1)
* there are only two types of embedded object:
*
* Objects with an XML representation.
* Objects without an XML representation.
*
* The former are represented by one or more XML files.
* The latter are in binary form.
*/
// FIX2 (HJ): Allow either OOo 1.x or OpenDocument embedded objects
if (type.startsWith("application/vnd.sun.xml") || type.startsWith("application/vnd.oasis.opendocument"))
{
if (path.equals("/")) {
// Exclude the main document entries
continue;
}
// Take off the trailing '/'
String name = path.substring(0, path.length() - 1);
embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip));
}
else if (type.equals("text/xml")) {
// XML entries are either embedded StarOffice doc entries or main
// document entries
continue;
}
else { // FIX (HJ): allows empty MIME type
embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip));
}
}
}
return embeddedObjects.values().iterator();
}
/**
* Returns the embedded object corresponding to the name provided.
* The name should be stripped of any preceding path characters, such as
* '/', '.' or '#'.
*
* @param name The name of the embedded object to retrieve.
*
* @return An <code>EmbeddedObject</code> instance representing the named
* object.
*/
public EmbeddedObject getEmbeddedObject(String name) {
if (name == null) {
return null;
}
if (embeddedObjects == null) {
// FIX2 (HJ): Return null if there's no manifest
if (manifestDoc != null) {
getEmbeddedObjects();
}
else {
return null;
}
}
if (embeddedObjects.containsKey(name)) {
return embeddedObjects.get(name);
}
else {
return null;
}
}
/**
* Adds a new embedded object to the document.
*
* @param embObj An instance of <code>EmbeddedObject</code>.
*/
/*public void addEmbeddedObject(EmbeddedObject embObj) {
if (embObj == null) {
return;
}
if (embeddedObjects == null) {
embeddedObjects = new HashMap<String, EmbeddedObject>();
}
embeddedObjects.put(embObj.getName(), embObj);
}*/
/**
* Read the Office <code>Document</code> from the given
* <code>InputStream</code>.
* FIX3 (HJ): Perform simple type detection to determine package or flat format
*
* @param is Office document <code>InputStream</code>.
*
* @throws IOException If any I/O error occurs.
*/
public void read(InputStream is) throws IOException {
byte[] doc = Misc.inputStreamToByteArray(is);
boolean bZip = MIMETypes.ZIP.equals(MIMETypes.getMagicMIMEType(doc));
// if it's zip, assume package - otherwise assume flat
read(new ByteArrayInputStream(doc),bZip);
}
private void readZip(InputStream is) throws IOException {
// Debug.log(Debug.INFO, "reading Office file");
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
// read in Office zip file format
zip = new OfficeZip();
zip.read(is);
// grab the content.xml and
// parse it into contentDoc.
byte contentBytes[] = zip.getContentXMLBytes();
if (contentBytes == null) {
throw new OfficeDocumentException("Entry content.xml not found in file");
}
try {
contentDoc = parse(builder, contentBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
// if style.xml exists, grab the style.xml
// parse it into styleDoc.
byte styleBytes[] = zip.getStyleXMLBytes();
if (styleBytes != null) {
try {
styleDoc = parse(builder, styleBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
byte metaBytes[] = zip.getMetaXMLBytes();
if (metaBytes != null) {
try {
metaDoc = parse(builder, metaBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
byte settingsBytes[] = zip.getSettingsXMLBytes();
if (settingsBytes != null) {
try {
settingsDoc = parse(builder, settingsBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
// Read in the META-INF/manifest.xml file
byte manifestBytes[] = zip.getManifestXMLBytes();
if (manifestBytes != null) {
try {
manifestDoc = parse(builder, manifestBytes);
} catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
}
/**
* Read the Office <code>Document</code> from the given
* <code>InputStream</code>.
*
* @param is Office document <code>InputStream</code>.
* @param isZip <code>boolean</code> Identifies whether
* a file is zipped or not
*
* @throws IOException If any I/O error occurs.
*/
public void read(InputStream is, boolean isZip) throws IOException {
// Debug.log(Debug.INFO, "reading Office file");
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
if (isZip)
{
readZip(is);
}
else{
try{
//contentDoc= builder.parse((InputStream)is);
Reader r = secondHack(is);
InputSource ins = new InputSource(r);
org.w3c.dom.Document newDoc = builder.parse(ins);
//org.w3c.dom.Document newDoc = builder.parse((InputStream)is);
Element rootElement=newDoc.getDocumentElement();
NodeList nodeList;
Node tmpNode;
Node rootNode = (Node)rootElement;
if (newDoc !=null){
/*content*/
contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
rootElement=contentDoc.getDocumentElement();
rootNode = (Node)rootElement;
// FIX (HJ): Include office:font-decls in content DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX2 (HJ): Include office:font-face-decls (OpenDocument) in content DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_FACE_DECLS);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_BODY);
if (nodeList.getLength()>0){
tmpNode = contentDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
/*Styles*/
styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
rootElement=styleDoc.getDocumentElement();
rootNode = (Node)rootElement;
// FIX (HJ): Include office:font-decls in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_DECLS);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX2 (HJ): Include office:font-face-decls in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_FONT_FACE_DECLS);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_STYLES);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX (HJ): Include office:automatic-styles in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
// FIX (HJ): Include office:master-styles in styles DOM
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
if (nodeList.getLength()>0){
tmpNode = styleDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
/*Settings*/
settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
rootElement=settingsDoc.getDocumentElement();
rootNode = (Node)rootElement;
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
if (nodeList.getLength()>0){
tmpNode = settingsDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
/*Meta*/
metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META);
rootElement=metaDoc.getDocumentElement();
rootNode = (Node)rootElement;
nodeList= newDoc.getElementsByTagName(TAG_OFFICE_META);
if (nodeList.getLength()>0){
tmpNode = metaDoc.importNode(nodeList.item(0),true);
rootNode.appendChild(tmpNode);
}
}
}
catch (SAXException ex) {
throw new OfficeDocumentException(ex);
}
}
}
/**
* Parse given <code>byte</code> array into a DOM
* <code>Document</code> object using the
* <code>DocumentBuilder</code> object.
*
* @param builder <code>DocumentBuilder</code> object for parsing.
* @param bytes <code>byte</code> array for parsing.
*
* @return Resulting DOM <code>Document</code> object.
*
* @throws SAXException If any parsing error occurs.
*/
static Document parse(DocumentBuilder builder, byte bytes[])
throws SAXException, IOException {
Document doc = null;
ByteArrayInputStream is = new ByteArrayInputStream(bytes);
// TODO: replace hack with a more appropriate fix.
Reader r = hack(is);
InputSource ins = new InputSource(r);
doc = builder.parse(ins);
return doc;
}
/**
* <p>Creates a new DOM <code>Document</code> containing minimum
* OpenOffice XML tags.</p>
*
* <p>This method uses the subclass
* <code>getOfficeClassAttribute</code> method to get the
* attribute for <i>office:class</i>.</p>
*
* @param rootName root name of <code>Document</code>.
*
* @throws IOException If any I/O error occurs.
*/
private final Document createDOM(String rootName) throws IOException {
Document doc = null;
try {
DocumentBuilder builder = factory.newDocumentBuilder();
doc = builder.newDocument();
} catch (ParserConfigurationException ex) {
throw new OfficeDocumentException(ex);
}
Element root = (Element) doc.createElement(rootName);
doc.appendChild(root);
root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
root.setAttribute("xmlns:style", "http://openoffice.org/2000/style");
root.setAttribute("xmlns:text", "http://openoffice.org/2000/text");
root.setAttribute("xmlns:table", "http://openoffice.org/2000/table");
root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing");
root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format");
root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle");
root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg");
root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart");
root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d");
root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML");
root.setAttribute("xmlns:form", "http://openoffice.org/2000/form");
root.setAttribute("xmlns:script", "http://openoffice.org/2000/script");
root.setAttribute("office:class", getOfficeClassAttribute());
root.setAttribute("office:version", "1.0");
return doc;
}
/**
* Return the <i>office:class</i> attribute value.
*
* @return The attribute value.
*/
// not really used...
protected String getOfficeClassAttribute() { return ""; }
/**
* <p>Hacked code to filter <!DOCTYPE> tag before
* sending stream to parser.</p>
*
* <p>This hacked code needs to be changed later on.</p>
*
* <p>Issue: using current jaxp1.0 parser, there is no way
* to turn off processing of dtds. Current set of dtds
* have bugs, processing them will throw exceptions.</p>
*
* <p>This is a simple hack that assumes the whole <!DOCTYPE>
* tag are all in the same line. This is sufficient for
* current StarOffice 6.0 generated XML files. Since this
* hack really needs to go away, I don't want to spend
* too much time in making it a perfect hack.</p>
* FIX (HJ): Removed requirement for DOCTYPE to be in one line
* FIX (HJ): No longer removes newlines
*
* @param is <code>InputStream</code> to be filtered.
*
* @return Reader value without the <!DOCTYPE> tag.
*
* @throws IOException If any I/O error occurs.
*/
private static Reader hack(InputStream is) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
StringBuffer buffer = new StringBuffer();
String str = null;
while ((str = br.readLine()) != null) {
int sIndex = str.indexOf("<!DOCTYPE");
if (sIndex > -1) {
buffer.append(str.substring(0, sIndex));
int eIndex = str.indexOf('>', sIndex + 8 );
if (eIndex > -1) {
buffer.append(str.substring(eIndex + 1, str.length()));
// FIX (HJ): Preserve the newline
buffer.append("\n");
} else {
// FIX (HJ): More than one line. Search for '>' in following lines
boolean bOK = false;
while ((str = br.readLine())!=null) {
eIndex = str.indexOf('>');
if (eIndex>-1) {
buffer.append(str.substring(eIndex+1));
// FIX (HJ): Preserve the newline
buffer.append("\n");
bOK = true;
break;
}
}
if (!bOK) { throw new IOException("Invalid XML"); }
}
} else {
buffer.append(str);
// FIX (HJ): Preserve the newline
buffer.append("\n");
}
}
StringReader r = new StringReader(buffer.toString());
return r;
}
/**
* <p>Transform the InputStream to a Reader Stream.</p>
*
* <p>This hacked code needs to be changed later on.</p>
*
* <p>Issue: the new oasis input file stream means
* that the old input stream fails. see #i33702# </p>
*
* @param is <code>InputStream</code> to be filtered.
*
* @return Reader value of the InputStream().
*
* @throws IOException If any I/O error occurs.
*/
private static Reader secondHack(InputStream is) throws IOException {
BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
char[] charArray = new char[4096];
StringBuffer sBuf = new StringBuffer();
int n = 0;
while ((n=br.read(charArray, 0, charArray.length)) > 0)
sBuf.append(charArray, 0, n);
// ensure there is no trailing garbage after the end of the stream.
int sIndex = sBuf.lastIndexOf("</office:document>");
sBuf.delete(sIndex, sBuf.length());
sBuf.append("</office:document>");
StringReader r = new StringReader(sBuf.toString());
return r;
}
}

View file

@ -1,145 +0,0 @@
/************************************************************************
*
* The Contents of this file are made available subject to the terms of
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version is adapted for Writer2LaTeX
// Version 1.0 (2008-11-22)
package writer2latex.xmerge;
import java.io.IOException;
//import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
//import org.openoffice.xmerge.util.Resources;
/**
* Used by OfficeDocument to encapsulate exceptions. It will add
* more details to the message string if it is of type
* <code>SAXParseException</code>.
*
* @author Herbie Ong
*/
public final class OfficeDocumentException extends IOException {
StringBuffer message = null;
/**
* Constructor, capturing additional information from the
* <code>SAXException</code>.
*
* @param e The <code>SAXException</code>.
*/
public OfficeDocumentException(SAXException e) {
super(e.toString());
message = new StringBuffer();
if (e instanceof SAXParseException) {
String msgParseError =
"PARSE_ERROR";
String msgLine =
"LINE";
String msgColumn =
"COLUMN";
String msgPublicId =
"PUBLIC_ID";
String msgSystemId =
"SYSTEM_ID";
SAXParseException spe = (SAXParseException) e;
message.append(msgParseError);
message.append(": ");
message.append(msgLine);
message.append(": ");
message.append(spe.getLineNumber());
message.append(", ");
message.append(msgColumn);
message.append(": ");
message.append(spe.getColumnNumber());
message.append(", ");
message.append(msgSystemId);
message.append(": ");
message.append(spe.getSystemId());
message.append(", ");
message.append(msgPublicId);
message.append(": ");
message.append(spe.getPublicId());
message.append("\n");
}
// if there exists an embedded exception
Exception ex = e.getException();
if (ex != null) {
message.append(ex.getMessage());
}
}
/**
* Constructor, creates exception with provided message.
*
* @param s Message value for the exception.
*/
public OfficeDocumentException(String s) {
super(s);
}
/**
* Constructor, creates exception with the message
* corresponding to the message value of the provided
* exception.
*
* @param e The Exception.
*/
public OfficeDocumentException(Exception e) {
super(e.getMessage());
}
/**
* Returns the message value for the <code>Exception</code>.
*
* @return The message value for the <code>Exception</code>.
*/
public String getMessage() {
return message.toString() + super.getMessage();
}
}

View file

@ -1,274 +0,0 @@
/************************************************************************
*
* The Contents of this file are made available subject to the terms of
*
* - GNU Lesser General Public License Version 2.1
*
* Sun Microsystems Inc., October, 2000
*
* GNU Lesser General Public License Version 2.1
* =============================================
* Copyright 2000 by Sun Microsystems, Inc.
* 901 San Antonio Road, Palo Alto, CA 94303, USA
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* The Initial Developer of the Original Code is: Sun Microsystems, Inc.
*
* Copyright: 2000 by Sun Microsystems, Inc.
*
* All Rights Reserved.
*
* Contributor(s): _______________________________________
*
*
************************************************************************/
// This version is adapted for Writer2LaTeX
// Version 1.4 (2012-03-19)
package writer2latex.xmerge;
import java.util.List;
import java.util.LinkedList;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipEntry;
import java.io.InputStream;
import java.io.IOException;
import java.io.ByteArrayOutputStream;
/**
* Class used by {@link
* org.openoffice.xmerge.converter.OfficeDocument
* OfficeDocument} to handle reading
* from a ZIP file, as well as storing ZIP entries.
*
* @author Herbie Ong
*/
class OfficeZip {
/** File name of the XML file in a zipped document. */
private final static String CONTENTXML = "content.xml";
private final static String STYLEXML = "styles.xml";
private final static String METAXML = "meta.xml";
private final static String SETTINGSXML = "settings.xml";
private final static String MANIFESTXML = "META-INF/manifest.xml";
private final static int BUFFERSIZE = 1024;
private List<Entry> entryList = null;
private int contentIndex = -1;
private int styleIndex = -1;
private int metaIndex = -1;
private int settingsIndex = -1;
private int manifestIndex = -1;
/** Default constructor. */
OfficeZip() {
entryList = new LinkedList<Entry>();
}
/**
* <p>Read each zip entry in the <code>InputStream</code> object
* and store in entryList both the <code>ZipEntry</code> object
* as well as the bits of each entry. Call this method before
* calling the <code>getContentXMLBytes</code> method or the
* <code>getStyleXMLBytes</code> method.</p>
*
* <p>Keep track of the CONTENTXML and STYLEXML using
* contentIndex and styleIndex, respectively.</p>
*
* @param is <code>InputStream</code> object to read.
*
* @throws IOException If any I/O error occurs.
*/
void read(InputStream is) throws IOException {
ZipInputStream zis = new ZipInputStream(is);
ZipEntry ze = null;
int i = -1;
while ((ze = zis.getNextEntry()) != null) {
String name = ze.getName();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
int len = 0;
byte buffer[] = new byte[BUFFERSIZE];
while ((len = zis.read(buffer)) > 0) {
baos.write(buffer, 0, len);
}
byte bytes[] = baos.toByteArray();
Entry entry = new Entry(ze,bytes);
entryList.add(entry);
i++;
if (name.equalsIgnoreCase(CONTENTXML)) {
contentIndex = i;
}
else if (name.equalsIgnoreCase(STYLEXML)) {
styleIndex = i;
}
else if (name.equalsIgnoreCase(METAXML)) {
metaIndex = i;
}
else if (name.equalsIgnoreCase(SETTINGSXML)) {
settingsIndex = i;
}
else if (name.equalsIgnoreCase(MANIFESTXML)) {
manifestIndex = i;
}
}
zis.close();
}
/**
* This method returns the CONTENTXML file in a
* <code>byte</code> array. It returns null if there is no
* CONTENTXML in this zip file.
*
* @return CONTENTXML in a <code>byte</code> array.
*/
byte[] getContentXMLBytes() {
return getEntryBytes(contentIndex);
}
/**
* This method returns the STYLEXML file in a
* <code>byte</code> array. It returns null if there is
* no STYLEXML in this zip file.
*
* @return STYLEXML in a <code>byte</code> array.
*/
byte[] getStyleXMLBytes() {
return getEntryBytes(styleIndex);
}
/**
* This method returns the METAXML file in a
* <code>byte</code> array. It returns null if there is
* no METAXML in this zip file.
*
* @return METAXML in a <code>byte</code> array.
*/
byte[] getMetaXMLBytes() {
return getEntryBytes(metaIndex);
}
/**
* This method returns the SETTINGSXML file in a
* <code>byte</code> array. It returns null if there is
* no SETTINGSXML in this zip file.
*
* @return SETTINGSXML in a <code>byte</code> array.
*/
byte[] getSettingsXMLBytes() {
return getEntryBytes(settingsIndex);
}
/**
* This method returns the MANIFESTXML file in a <code>byte</code> array.
* It returns null if there is no MANIFESTXML in this zip file.
*
* @return MANIFESTXML in a <code>byte</code> array.
*/
byte[] getManifestXMLBytes() {
return getEntryBytes(manifestIndex);
}
/**
* This method returns the bytes corresponding to the entry named in the
* parameter.
*
* @param name The name of the entry in the Zip file to retrieve.
*
* @return The data for the named entry in a <code>byte</code> array or
* <code>null</code> if no entry is found.
*/
byte[] getNamedBytes(String name) {
// The list is not sorted, and sorting it for a binary search would
// invalidate the indices stored for the main files.
// Could improve performance by caching the name and index when
// iterating through the ZipFile in read().
for (int i = 0; i < entryList.size(); i++) {
Entry e = entryList.get(i);
if (e.zipEntry.getName().equals(name)) {
return getEntryBytes(i);
}
}
return null;
}
/**
* Used by the <code>getContentXMLBytes</code> method and the
* <code>getStyleXMLBytes</code> method to return the
* <code>byte</code> array from the corresponding
* <code>entry</code> in the <code>entryList</code>.
*
* @param index Index of <code>Entry</code> object in
* <code>entryList</code>.
*
* @return <code>byte</code> array associated in that
* <code>Entry</code> object or null, if there is
* not such <code>Entry</code>.
*/
private byte[] getEntryBytes(int index) {
byte[] bytes = null;
if (index > -1) {
Entry entry = entryList.get(index);
bytes = entry.bytes;
}
return bytes;
}
/**
* This inner class is used as a data structure for holding
* a <code>ZipEntry</code> info and its corresponding bytes.
* These are stored in entryList.
*/
private class Entry {
ZipEntry zipEntry = null;
byte bytes[] = null;
Entry(ZipEntry zipEntry, byte bytes[]) {
this.zipEntry = zipEntry;
this.bytes = bytes;
}
}
}