Some minor bugfixing, refactoring and rearrangement

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@165 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
henrikjust 2014-08-27 07:25:22 +00:00
parent 8a6e654344
commit 6249ef406e
31 changed files with 146 additions and 288 deletions

View file

@ -0,0 +1,179 @@
/************************************************************************
*
* BinaryGraphicsDocument.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2014 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2014-08-25)
*
*/
package writer2latex.base;
import java.io.OutputStream;
import java.io.InputStream;
//import java.io.ByteArrayOutputStream;
import java.io.IOException;
import writer2latex.api.OutputFile;
import writer2latex.util.Misc;
/**
* <p>Class representing a binary graphics document.
* This class is used for representing graphics documents that are <i>not</i>
* interpreted in any way, but simply copied verbatim from the source format
* to the target format.</p>
*
* <p><code>GraphicsDocument</code> is used to create new graphics documents.</p>
*
*/
public class BinaryGraphicsDocument implements OutputFile {
//private final static int BUFFERSIZE = 1024;
private String docName;
private byte[] data;
private int nOff;
private int nLen;
private String sFileExtension;
private String sMimeType;
/**
* <p>Constructs a new graphics document.</p>
*
* <p>This new document does not contain any information. Document data must
* either be added using appropriate methods, or an existing file can be
* {@link #read(InputStream) read} from an <code>InputStream</code>.</p>
*
* @param name The name of the <code>GraphicsDocument</code>.
*/
public BinaryGraphicsDocument(String name, String sFileExtension, String sMimeType) {
this.sFileExtension = sFileExtension;
this.sMimeType = sMimeType;
docName = trimDocumentName(name);
}
/**
* <p>This method reads <code>byte</code> data from the InputStream.</p>
*
* @param is InputStream containing a binary data file.
*
* @throws IOException In case of any I/O errors.
*/
public void read(InputStream is) throws IOException {
data = Misc.inputStreamToByteArray(is);
}
public void read(byte[] data) {
read(data,0,data.length);
}
public void read(byte[] data, int nOff, int nLen) {
this.data = data;
this.nOff = nOff;
this.nLen = nLen;
}
/*
* Utility method to make sure the document name is stripped of any file
* extensions before use.
*/
private String trimDocumentName(String name) {
String temp = name.toLowerCase();
if (temp.endsWith(getFileExtension())) {
// strip the extension
int nlen = name.length();
int endIndex = nlen - getFileExtension().length();
name = name.substring(0,endIndex);
}
return name;
}
/**
* <p>Returns the <code>Document</code> name with no file extension.</p>
*
* @return The <code>Document</code> name with no file extension.
*/
public String getName() {
return docName;
}
/**
* <p>Returns the <code>Document</code> name with file extension.</p>
*
* @return The <code>Document</code> name with file extension.
*/
public String getFileName() {
return new String(docName + getFileExtension());
}
public byte[] getData() {
return data;
}
/**
* <p>Writes out the <code>Document</code> content to the specified
* <code>OutputStream</code>.</p>
*
* <p>This method may not be thread-safe.
* Implementations may or may not synchronize this
* method. User code (i.e. caller) must make sure that
* calls to this method are thread-safe.</p>
*
* @param os <code>OutputStream</code> to write out the
* <code>Document</code> content.
*
* @throws IOException If any I/O error occurs.
*/
public void write(OutputStream os) throws IOException {
os.write(data, nOff, nLen);
}
/**
* Returns the file extension for this type of
* <code>Document</code>.
*
* @return The file extension of <code>Document</code>.
*/
public String getFileExtension(){ return sFileExtension; }
/**
* Method to return the MIME type of the document.
*
* @return String The document's MIME type.
*/
public String getDocumentMIMEType(){ return sMimeType; }
public String getMIMEType() {
return sMimeType;
}
public boolean isMasterDocument() {
return false;
}
}

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
* Copyright: 2002-2014 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-03-19)
* Version 1.4 (2014-08-26)
*
*/
@ -40,13 +40,14 @@ import java.util.Map;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.DOMImplementation;
import writer2latex.api.ComplexOption;
import writer2latex.xmerge.DOMDocument;
public abstract class ConfigBase implements writer2latex.api.Config {
@ -127,7 +128,7 @@ public abstract class ConfigBase implements writer2latex.api.Config {
if (elm.getTagName().equals("option")) {
String sName = elm.getAttribute("name");
String sValue = elm.getAttribute("value");
if (sName!="") { setOption(sName,sValue); }
if (sName.length()>0) { setOption(sName,sValue); }
}
else {
readInner(elm);
@ -154,9 +155,10 @@ public abstract class ConfigBase implements writer2latex.api.Config {
DocumentBuilder builder = builderFactory.newDocumentBuilder();
DOMImplementation domImpl = builder.getDOMImplementation();
dom = domImpl.createDocument("","config",null);
}
catch (Throwable t) {
t.printStackTrace();
} catch (ParserConfigurationException e) {
// This will not happen
e.printStackTrace();
return;
}
Element rootElement = dom.getDocumentElement();

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.4 (2014-08-13)
* Version 1.4 (2014-08-27)
*
*/
@ -38,7 +38,6 @@ import writer2latex.api.Converter;
import writer2latex.api.ConverterResult;
import writer2latex.api.OutputFile;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.ImageLoader;
import writer2latex.office.MetaData;
import writer2latex.office.OfficeDocument;
import writer2latex.office.OfficeReader;
@ -58,7 +57,7 @@ public abstract class ConverterBase implements Converter {
protected OfficeDocument odDoc;
protected OfficeReader ofr;
protected MetaData metaData;
protected ImageLoader imageLoader;
protected ImageConverter imageConverter;
// The output file(s)
protected String sTargetFileName;
@ -114,8 +113,8 @@ public abstract class ConverterBase implements Converter {
private ConverterResult convert(String sTargetFileName) throws IOException {
ofr = new OfficeReader(odDoc,false);
metaData = new MetaData(odDoc);
imageLoader = new ImageLoader(odDoc,true);
imageLoader.setGraphicConverter(graphicConverter);
imageConverter = new ImageConverter(odDoc,true);
imageConverter.setGraphicConverter(graphicConverter);
// Prepare output
this.sTargetFileName = sTargetFileName;
@ -136,7 +135,7 @@ public abstract class ConverterBase implements Converter {
public MetaData getMetaData() { return metaData; }
public ImageLoader getImageLoader() { return imageLoader; }
public ImageConverter getImageCv() { return imageConverter; }
public void addDocument(OutputFile doc) { converterResult.addDocument(doc); }

View file

@ -0,0 +1,366 @@
/************************************************************************
*
* DOMDocument.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2014 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2014-08-26)
*
*/
package writer2latex.base;
import java.io.InputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
//import org.xml.sax.SAXParseException;
import writer2latex.api.OutputFile;
/**
* This class represents XML-based documents. It is loosely based on a class from the former xmerge project
* from OOo.
*/
public class DOMDocument implements OutputFile {
/** Factory for <code>DocumentBuilder</code> objects. */
private static DocumentBuilderFactory factory =
DocumentBuilderFactory.newInstance();
/** DOM <code>Document</code> of content.xml. */
private Document contentDoc = null;
/** DOM <code>Document</code> of styles.xml. */
//private Document styleDoc = null;
private String documentName = null;
private String fileName = null;
private String fileExt = null;
/** Resources object. */
//private Resources res = null;
/**
* Default constructor.
*
* @param name <code>Document</code> name.
* @param ext <code>Document</code> extension.
*/
public DOMDocument(String name,String ext)
{
this(name,ext,true, false);
}
/**
* Returns the file extension of the <code>Document</code>
* represented.
*
* @return file extension of the <code>Document</code>.
*/
protected String getFileExtension() {
return fileExt;
}
/**
* Constructor with arguments to set <code>namespaceAware</code>
* and <code>validating</code> flags.
*
* @param name <code>Document</code> name (may or may not
* contain extension).
* @param ext <code>Document</code> extension.
* @param namespaceAware Value for <code>namespaceAware</code> flag.
* @param validating Value for <code>validating</code> flag.
*/
public DOMDocument(String name, String ext,boolean namespaceAware, boolean validating) {
//res = Resources.getInstance();
factory.setValidating(validating);
factory.setNamespaceAware(namespaceAware);
this.fileExt = ext;
this.documentName = trimDocumentName(name);
this.fileName = documentName + getFileExtension();
}
/**
* Removes the file extension from the <code>Document</code>
* name.
*
* @param name Full <code>Document</code> name with extension.
*
* @return Name of <code>Document</code> without the extension.
*/
private String trimDocumentName(String name) {
String temp = name.toLowerCase();
String ext = getFileExtension();
if (temp.endsWith(ext)) {
// strip the extension
int nlen = name.length();
int endIndex = nlen - ext.length();
name = name.substring(0,endIndex);
}
return name;
}
/**
* Return a DOM <code>Document</code> object of the document content
* file. Note that a content DOM is not created when the constructor
* is called. So, either the <code>read</code> method or the
* <code>initContentDOM</code> method will need to be called ahead
* on this object before calling this method.
*
* @return DOM <code>Document</code> object.
*/
public Document getContentDOM() {
return contentDoc;
}
/**
* Sets the Content of the <code>Document</code> to the contents of the
* supplied <code>Node</code> list.
*
* @param newDom DOM <code>Document</code> object.
*/
public void setContentDOM( Node newDom) {
contentDoc=(Document)newDom;
}
/**
* Return the name of the <code>Document</code>.
*
* @return The name of <code>Document</code>.
*/
public String getName() {
return documentName;
}
/**
* Return the file name of the <code>Document</code>, possibly
* with the standard extension.
*
* @return The file name of <code>Document</code>.
*/
public String getFileName() {
return fileName;
}
/**
* Read the Office <code>Document</code> from the specified
* <code>InputStream</code>.
*
* @param is Office document <code>InputStream</code>.
*
* @throws IOException If any I/O error occurs.
*/
public void read(InputStream is) throws IOException {
DocumentBuilder builder = null;
try {
builder = factory.newDocumentBuilder();
} catch (ParserConfigurationException ex) {
throw new IOException(ex.getMessage());
}
try {
contentDoc= builder.parse(is);
} catch (SAXException ex) {
throw new IOException(ex.getMessage());
}
}
/**
* Write out content to the supplied <code>OutputStream</code>.
* (with pretty printing)
* @param os XML <code>OutputStream</code>.
* @throws IOException If any I/O error occurs.
*/
public void write(OutputStream os) throws IOException {
OutputStreamWriter osw = new OutputStreamWriter(os,"UTF-8");
osw.write("<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n");
write(getContentDOM().getDocumentElement(),0,osw);
osw.flush();
osw.close();
}
// Write nodes; we only need element, text and comment nodes
private void write(Node node, int nLevel, OutputStreamWriter osw) throws IOException {
short nType = node.getNodeType();
switch (nType) {
case Node.ELEMENT_NODE:
if (node.hasChildNodes()) {
// Block pretty print from this node?
NodeList list = node.getChildNodes();
int nLen = list.getLength();
boolean bBlockPrettyPrint = false;
if (nLevel>=0) {
for (int i = 0; i < nLen; i++) {
bBlockPrettyPrint |= list.item(i).getNodeType()==Node.TEXT_NODE;
}
}
// Print start tag
if (nLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("<"+node.getNodeName());
writeAttributes(node,osw);
osw.write(">");
if (nLevel>=0 && !bBlockPrettyPrint) { osw.write("\n"); }
// Print children
for (int i = 0; i < nLen; i++) {
int nNextLevel;
if (bBlockPrettyPrint || nLevel<0) { nNextLevel=-1; }
else { nNextLevel=nLevel+1; }
write(list.item(i),nNextLevel,osw);
}
// Print end tag
if (nLevel>=0 && !bBlockPrettyPrint) { writeSpaces(nLevel,osw); }
osw.write("</"+node.getNodeName()+">");
if (nLevel>=0) { osw.write("\n"); }
}
else { // empty element
if (nLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("<"+node.getNodeName());
writeAttributes(node,osw);
osw.write(" />");
if (nLevel>=0) { osw.write("\n"); }
}
break;
case Node.TEXT_NODE:
write(node.getNodeValue(),osw);
break;
case Node.COMMENT_NODE:
if (nLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("<!-- ");
write(node.getNodeValue(),osw);
osw.write(" -->");
if (nLevel>=0) { osw.write("\n"); }
}
}
private void writeAttributes(Node node, OutputStreamWriter osw) throws IOException {
NamedNodeMap attr = node.getAttributes();
int nLen = attr.getLength();
for (int i=0; i<nLen; i++) {
Node item = attr.item(i);
osw.write(" ");
write(item.getNodeName(),osw);
osw.write("=\"");
write(item.getNodeValue(),osw);
osw.write("\"");
}
}
private void writeSpaces(int nCount, OutputStreamWriter osw) throws IOException {
for (int i=0; i<nCount; i++) { osw.write(" "); }
}
private void write(String s, OutputStreamWriter osw) throws IOException {
int nLen = s.length();
char c;
for (int i=0; i<nLen; i++) {
c = s.charAt(i);
switch (c) {
case ('<'): osw.write("&lt;"); break;
case ('>'): osw.write("&gt;"); break;
case ('&'): osw.write("&amp;"); break;
case ('"'): osw.write("&quot;"); break;
case ('\''): osw.write( "&apos;"); break;
default: osw.write(c);
}
}
}
/**
* Initializes a new DOM <code>Document</code> with the content
* containing minimum XML tags.
*
* @throws IOException If any I/O error occurs.
*/
public final void initContentDOM() throws IOException {
contentDoc = createDOM("");
}
/**
* <p>Creates a new DOM <code>Document</code> containing minimum
* OpenOffice XML tags.</p>
*
* <p>This method uses the subclass
* <code>getOfficeClassAttribute</code> method to get the
* attribute for <i>office:class</i>.</p>
*
* @param rootName root name of <code>Document</code>.
*
* @throws IOException If any I/O error occurs.
*/
private final Document createDOM(String rootName) throws IOException {
Document doc = null;
try {
DocumentBuilder builder = factory.newDocumentBuilder();
doc = builder.newDocument();
} catch (ParserConfigurationException ex) {
// This will not happen
System.err.println("Error:"+ ex);
throw new IOException(ex);
}
Element root = (Element) doc.createElement(rootName);
doc.appendChild(root);
return doc;
}
// We need these because we implement OutputFile
public String getMIMEType() {
return "";
}
public boolean isMasterDocument() {
return false;
}
}

View file

@ -0,0 +1,201 @@
/************************************************************************
*
* ImageLoader.java
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License version 2.1, as published by the Free Software Foundation.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-04-03)
*
*/
package writer2latex.base;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.HashSet;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import writer2latex.api.GraphicConverter;
import writer2latex.office.EmbeddedBinaryObject;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.MIMETypes;
import writer2latex.office.OfficeDocument;
import writer2latex.office.SVMReader;
import writer2latex.office.XMLString;
import writer2latex.util.Base64;
import writer2latex.util.Misc;
/**
* <p>This class extracts images from an OOo file.
* The images are returned as BinaryGraphicsDocument.</p>
*/
public final class ImageConverter {
// The Office document to load images from
private OfficeDocument oooDoc;
// Data for file name generation
private String sBaseFileName = "";
private String sSubDirName = "";
private int nImageCount = 0;
private NumberFormat formatter;
// should EPS be extracted from SVM?
private boolean bExtractEPS;
// Data for image conversion
private GraphicConverter gcv = null;
private boolean bAcceptOtherFormats = true;
private String sDefaultFormat = null;
private String sDefaultVectorFormat = null;
private HashSet<String> acceptedFormats = new HashSet<String>();
public ImageConverter(OfficeDocument oooDoc, boolean bExtractEPS) {
this.oooDoc = oooDoc;
this.bExtractEPS = bExtractEPS;
this.formatter = new DecimalFormat("000");
}
public void setBaseFileName(String sBaseFileName) { this.sBaseFileName = sBaseFileName; }
public void setUseSubdir(String sSubDirName) { this.sSubDirName = sSubDirName+"/"; }
public void setAcceptOtherFormats(boolean b) { bAcceptOtherFormats = b; }
public void setDefaultFormat(String sMime) {
addAcceptedFormat(sMime);
sDefaultFormat = sMime;
}
public void setDefaultVectorFormat(String sMime) {
addAcceptedFormat(sMime);
sDefaultVectorFormat = sMime;
}
public void addAcceptedFormat(String sMime) { acceptedFormats.add(sMime); }
private boolean isAcceptedFormat(String sMime) { return acceptedFormats.contains(sMime); }
public void setGraphicConverter(GraphicConverter gcv) { this.gcv = gcv; }
public BinaryGraphicsDocument getImage(Node node) {
// node must be a draw:image element.
// variables to hold data about the image:
String sMIME = null;
String sExt = null;
byte[] blob = null;
String sHref = Misc.getAttribute(node,XMLString.XLINK_HREF);
if (sHref==null || sHref.length()==0) {
// Image must be contained in an office:binary-element as base64:
Node obd = Misc.getChildByTagName(node,XMLString.OFFICE_BINARY_DATA);
if (obd!=null) {
StringBuffer buf = new StringBuffer();
NodeList nl = obd.getChildNodes();
int nLen = nl.getLength();
for (int i=0; i<nLen; i++) {
if (nl.item(i).getNodeType()==Node.TEXT_NODE) {
buf.append(nl.item(i).getNodeValue());
}
}
blob = Base64.decode(buf.toString());
sMIME = MIMETypes.getMagicMIMEType(blob);
sExt = MIMETypes.getFileExtension(sMIME);
}
}
else {
// Image may be embedded in package:
if (sHref.startsWith("#")) { sHref = sHref.substring(1); }
if (sHref.startsWith("./")) { sHref = sHref.substring(2); }
EmbeddedObject obj = oooDoc.getEmbeddedObject(sHref);
if (obj!=null && obj instanceof EmbeddedBinaryObject) {
EmbeddedBinaryObject object = (EmbeddedBinaryObject) obj;
blob = object.getBinaryData();
sMIME = object.getType();
if (sMIME.length()>0) {
// If the manifest provides a media type, trust that
sExt = MIMETypes.getFileExtension(sMIME);
}
else {
// Otherwise determine it by byte inspection
sMIME = MIMETypes.getMagicMIMEType(blob);
sExt = MIMETypes.getFileExtension(sMIME);
}
}
else {
// This is a linked image
// TODO: Perhaps we should download the image from the url in sHref?
// Alternatively BinaryGraphicsDocument should be extended to
// handle external graphics.
}
}
if (blob==null) { return null; }
// Assign a name (without extension)
String sName = sSubDirName+sBaseFileName+formatter.format(++nImageCount);
BinaryGraphicsDocument bgd = null;
if (bExtractEPS && MIMETypes.SVM.equals(MIMETypes.getMagicMIMEType(blob))) {
// Look for postscript:
int[] offlen = new int[2];
if (SVMReader.readSVM(blob,offlen)) {
bgd = new BinaryGraphicsDocument(sName,
MIMETypes.EPS_EXT,MIMETypes.EPS);
bgd.read(blob,offlen[0],offlen[1]);
}
}
if (bgd==null) {
// If we have a converter AND a default format AND this image
// is not in an accepted format AND the converter knows how to
// convert it - try to convert...
if (gcv!=null && !isAcceptedFormat(sMIME) && sDefaultFormat!=null) {
byte[] newBlob = null;
String sTargetMIME = null;
if (MIMETypes.isVectorFormat(sMIME) && sDefaultVectorFormat!=null &&
gcv.supportsConversion(sMIME,sDefaultVectorFormat,false,false)) {
// Try vector format first
newBlob = gcv.convert(blob, sMIME, sTargetMIME=sDefaultVectorFormat);
}
if (newBlob==null && gcv.supportsConversion(sMIME,sDefaultFormat,false,false)) {
// Then try bitmap format
newBlob = gcv.convert(blob,sMIME,sTargetMIME=sDefaultFormat);
}
if (newBlob!=null) {
// Conversion successful - create new data
blob = newBlob;
sMIME = sTargetMIME;
sExt = MIMETypes.getFileExtension(sMIME);
}
}
if (isAcceptedFormat(sMIME) || bAcceptOtherFormats) {
bgd = new BinaryGraphicsDocument(sName,sExt,sMIME);
bgd.read(blob);
}
}
return bgd;
}
}