/************************************************************************
*
* ImageConverter.java
*
* Copyright: 2002-2014 by Henrik Just
*
* This file is part of Writer2LaTeX.
*
* Writer2LaTeX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Writer2LaTeX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Writer2LaTeX. If not, see .
*
* Version 1.6 (2014-11-18)
*
*/
package writer2latex.base;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import javax.xml.bind.DatatypeConverter;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import writer2latex.api.GraphicConverter;
import writer2latex.office.EmbeddedBinaryObject;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.MIMETypes;
import writer2latex.office.OfficeReader;
import writer2latex.office.SVMReader;
import writer2latex.office.XMLString;
import writer2latex.util.Misc;
/** This class extracts and converts images from an office document.
* The images are returned as BinaryGraphicsDocument
.
* The image converter can be configured as destructive. In this case, the returned
* graphics documents will contain the only reference to the image (the original data
* will be removed).
*/
public final class ImageConverter {
private OfficeReader ofr;
private boolean bDestructive;
// Data for file name generation
private String sBaseFileName = "";
private String sSubDirName = "";
private int nImageCount = 0;
private NumberFormat formatter;
// should EPS be extracted from SVM?
private boolean bExtractEPS;
// Data for image conversion
private GraphicConverter gcv = null;
private boolean bAcceptOtherFormats = true;
private String sDefaultFormat = null;
private String sDefaultVectorFormat = null;
private HashSet acceptedFormats = new HashSet();
// In the package format, the same image file may be used more than once in the document
// Hence we keep information of all documents for potential
private HashMap recycledImages = new HashMap();
/** Construct a new ImageConverter
referring to a specific document
*
* @param ofr the office reader to use
* @param bExtractEPS set true if EPS content should be extracted from SVM files
*/
public ImageConverter(OfficeReader ofr, boolean bDestructive, boolean bExtractEPS) {
this.ofr = ofr;
this.bDestructive = bDestructive;
this.bExtractEPS = bExtractEPS;
this.formatter = new DecimalFormat("000");
}
/** Define the base file name to use for generating file names
*
* @param sBaseFileName the base file name
*/
public void setBaseFileName(String sBaseFileName) {
this.sBaseFileName = sBaseFileName;
}
/** Define the name of a sub directory to prepend to file names
*
* @param sSubDirName the sub directory
*/
public void setUseSubdir(String sSubDirName) {
this.sSubDirName = sSubDirName+"/";
}
/** Specify that the ImageConverter
should return an image even if it was not possible
* to convert it to an acceptable format.
*
* @param b true if other formats should be accepted
*/
public void setAcceptOtherFormats(boolean b) {
bAcceptOtherFormats = b;
}
/** Define the default format for raster graphics
*
* @param sMime the MIME type of the default raster format
*/
public void setDefaultFormat(String sMime) {
addAcceptedFormat(sMime);
sDefaultFormat = sMime;
}
/** Define the default format for vector graphics
*
* @param sMime the MIME type for the default vector format
*/
public void setDefaultVectorFormat(String sMime) {
addAcceptedFormat(sMime);
sDefaultVectorFormat = sMime;
}
/** Define an accepted graphics format
*
* @param sMime the MIME type of the format
*/
public void addAcceptedFormat(String sMime) {
acceptedFormats.add(sMime);
}
/** Is a given format accepted?
*
* @param sMime the MIME type to query
* @return true if this is an accepted format
*/
private boolean isAcceptedFormat(String sMime) {
return acceptedFormats.contains(sMime);
}
/** Define the GraphicConverter
to use for image conversion
*
* @param gcv the graphics converter
*/
public void setGraphicConverter(GraphicConverter gcv) {
this.gcv = gcv;
}
/** Get an image from a draw:image
element. If the converter is destructive, the returned
* BinaryGraphicsDocument
will hold the only reference to the image data (the original
* data will be removed).
*
* @param node the image element
* @return a document containing the (converted) image, or null if it was not possible to read the image
* or convert it to an accepted format
*/
public BinaryGraphicsDocument getImage(Element node) {
String sName = sSubDirName+sBaseFileName+formatter.format(++nImageCount);
BinaryGraphicsDocument bgd = getImage(node,sName);
if (bgd!=null) {
if (!bgd.isAcceptedFormat() || (sDefaultVectorFormat!=null && !sDefaultVectorFormat.equals(bgd.getMIMEType()))) {
// We may have better luck with an alternative image
Element sibling = getAlternativeImage(node);
if (sibling!=null) {
BinaryGraphicsDocument altBgd = getImage(sibling,sName);
if (altBgd!=null && altBgd.isAcceptedFormat()) {
if (!bgd.isAcceptedFormat() ||
(sDefaultVectorFormat!=null && !sDefaultVectorFormat.equals(bgd.getMIMEType()) &&
sDefaultVectorFormat.equals(altBgd.getMIMEType()))) {
bgd = altBgd;
}
}
}
}
}
if (bgd==null || bgd.isLinked() || bgd.isRecycled()) {
// The file name was not used
nImageCount--;
}
return bgd;
}
private BinaryGraphicsDocument getImage(Element node, String sName) {
assert(XMLString.DRAW_IMAGE.equals(node.getTagName()));
// Image data
String sExt = null;
String sMIME = null;
byte[] blob = null;
String sId = null;
// First try to extract the image using the xlink:href attribute
if (node.hasAttribute(XMLString.XLINK_HREF)) {
String sHref = node.getAttribute(XMLString.XLINK_HREF);
if (sHref.length()>0) {
// We may have seen this image before, return the recycled version
if (recycledImages.containsKey(sHref)) {
return recycledImages.get(sHref);
}
// Image may be embedded in package:
String sPath = sHref;
if (sPath.startsWith("#")) { sPath = sPath.substring(1); }
if (sPath.startsWith("./")) { sPath = sPath.substring(2); }
EmbeddedObject obj = ofr.getEmbeddedObject(sPath);
if (obj!=null && obj instanceof EmbeddedBinaryObject) {
EmbeddedBinaryObject object = (EmbeddedBinaryObject) obj;
blob = object.getBinaryData();
sMIME = object.getType();
if (sMIME.length()==0) {
// If the manifest provides a media type, trust that
// Otherwise determine it by byte inspection
sMIME = MIMETypes.getMagicMIMEType(blob);
}
sExt = MIMETypes.getFileExtension(sMIME);
if (bDestructive) {
object.dispose();
}
// We got an image, define ID for recycling
sId = sHref;
}
else {
// This is a linked image
// TODO: Add option to download image from the URL?
String sFileName = ofr.fixRelativeLink(sHref);
BinaryGraphicsDocument bgd
= new BinaryGraphicsDocument(sFileName,null);
return bgd;
}
}
}
// If there is no suitable xlink:href attribute, the image must be contained in an office:binary-element as base64
if (blob==null) {
Node obd = Misc.getChildByTagName(node,XMLString.OFFICE_BINARY_DATA);
if (obd!=null) {
StringBuilder buf = new StringBuilder();
NodeList nl = obd.getChildNodes();
int nLen = nl.getLength();
for (int i=0; i