/************************************************************************ * * ImageConverter.java * * Copyright: 2002-2014 by Henrik Just * * This file is part of Writer2LaTeX. * * Writer2LaTeX is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Writer2LaTeX is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Writer2LaTeX. If not, see . * * Version 1.6 (2014-11-18) * */ package writer2latex.base; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; import java.text.DecimalFormat; import java.text.NumberFormat; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; import javax.xml.bind.DatatypeConverter; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import writer2latex.api.GraphicConverter; import writer2latex.office.EmbeddedBinaryObject; import writer2latex.office.EmbeddedObject; import writer2latex.office.MIMETypes; import writer2latex.office.OfficeReader; import writer2latex.office.SVMReader; import writer2latex.office.XMLString; import writer2latex.util.Misc; /** This class extracts and converts images from an office document. * The images are returned as BinaryGraphicsDocument. * The image converter can be configured as destructive. In this case, the returned * graphics documents will contain the only reference to the image (the original data * will be removed). */ public final class ImageConverter { private OfficeReader ofr; private boolean bDestructive; // Data for file name generation private String sBaseFileName = ""; private String sSubDirName = ""; private int nImageCount = 0; private NumberFormat formatter; // should EPS be extracted from SVM? private boolean bExtractEPS; // Data for image conversion private GraphicConverter gcv = null; private boolean bAcceptOtherFormats = true; private String sDefaultFormat = null; private String sDefaultVectorFormat = null; private HashSet acceptedFormats = new HashSet(); // In the package format, the same image file may be used more than once in the document // Hence we keep information of all documents for potential private HashMap recycledImages = new HashMap(); /** Construct a new ImageConverter referring to a specific document * * @param ofr the office reader to use * @param bExtractEPS set true if EPS content should be extracted from SVM files */ public ImageConverter(OfficeReader ofr, boolean bDestructive, boolean bExtractEPS) { this.ofr = ofr; this.bDestructive = bDestructive; this.bExtractEPS = bExtractEPS; this.formatter = new DecimalFormat("000"); } /** Define the base file name to use for generating file names * * @param sBaseFileName the base file name */ public void setBaseFileName(String sBaseFileName) { this.sBaseFileName = sBaseFileName; } /** Define the name of a sub directory to prepend to file names * * @param sSubDirName the sub directory */ public void setUseSubdir(String sSubDirName) { this.sSubDirName = sSubDirName+"/"; } /** Specify that the ImageConverter should return an image even if it was not possible * to convert it to an acceptable format. * * @param b true if other formats should be accepted */ public void setAcceptOtherFormats(boolean b) { bAcceptOtherFormats = b; } /** Define the default format for raster graphics * * @param sMime the MIME type of the default raster format */ public void setDefaultFormat(String sMime) { addAcceptedFormat(sMime); sDefaultFormat = sMime; } /** Define the default format for vector graphics * * @param sMime the MIME type for the default vector format */ public void setDefaultVectorFormat(String sMime) { addAcceptedFormat(sMime); sDefaultVectorFormat = sMime; } /** Define an accepted graphics format * * @param sMime the MIME type of the format */ public void addAcceptedFormat(String sMime) { acceptedFormats.add(sMime); } /** Is a given format accepted? * * @param sMime the MIME type to query * @return true if this is an accepted format */ private boolean isAcceptedFormat(String sMime) { return acceptedFormats.contains(sMime); } /** Define the GraphicConverter to use for image conversion * * @param gcv the graphics converter */ public void setGraphicConverter(GraphicConverter gcv) { this.gcv = gcv; } /** Get an image from a draw:image element. If the converter is destructive, the returned * BinaryGraphicsDocument will hold the only reference to the image data (the original * data will be removed). * * @param node the image element * @return a document containing the (converted) image, or null if it was not possible to read the image * or convert it to an accepted format */ public BinaryGraphicsDocument getImage(Element node) { String sName = sSubDirName+sBaseFileName+formatter.format(++nImageCount); BinaryGraphicsDocument bgd = getImage(node,sName); if (bgd!=null) { if (!bgd.isAcceptedFormat() || (sDefaultVectorFormat!=null && !sDefaultVectorFormat.equals(bgd.getMIMEType()))) { // We may have better luck with an alternative image Element sibling = getAlternativeImage(node); if (sibling!=null) { BinaryGraphicsDocument altBgd = getImage(sibling,sName); if (altBgd!=null && altBgd.isAcceptedFormat()) { if (!bgd.isAcceptedFormat() || (sDefaultVectorFormat!=null && !sDefaultVectorFormat.equals(bgd.getMIMEType()) && sDefaultVectorFormat.equals(altBgd.getMIMEType()))) { bgd = altBgd; } } } } } if (bgd==null || bgd.isLinked() || bgd.isRecycled()) { // The file name was not used nImageCount--; } return bgd; } private BinaryGraphicsDocument getImage(Element node, String sName) { assert(XMLString.DRAW_IMAGE.equals(node.getTagName())); // Image data String sExt = null; String sMIME = null; byte[] blob = null; String sId = null; // First try to extract the image using the xlink:href attribute if (node.hasAttribute(XMLString.XLINK_HREF)) { String sHref = node.getAttribute(XMLString.XLINK_HREF); if (sHref.length()>0) { // We may have seen this image before, return the recycled version if (recycledImages.containsKey(sHref)) { return recycledImages.get(sHref); } // Image may be embedded in package: String sPath = sHref; if (sPath.startsWith("#")) { sPath = sPath.substring(1); } if (sPath.startsWith("./")) { sPath = sPath.substring(2); } EmbeddedObject obj = ofr.getEmbeddedObject(sPath); if (obj!=null && obj instanceof EmbeddedBinaryObject) { EmbeddedBinaryObject object = (EmbeddedBinaryObject) obj; blob = object.getBinaryData(); sMIME = object.getType(); if (sMIME.length()==0) { // If the manifest provides a media type, trust that // Otherwise determine it by byte inspection sMIME = MIMETypes.getMagicMIMEType(blob); } sExt = MIMETypes.getFileExtension(sMIME); if (bDestructive) { object.dispose(); } // We got an image, define ID for recycling sId = sHref; } else { // This is a linked image // TODO: Add option to download image from the URL? String sFileName = ofr.fixRelativeLink(sHref); BinaryGraphicsDocument bgd = new BinaryGraphicsDocument(sFileName,null); return bgd; } } } // If there is no suitable xlink:href attribute, the image must be contained in an office:binary-element as base64 if (blob==null) { Node obd = Misc.getChildByTagName(node,XMLString.OFFICE_BINARY_DATA); if (obd!=null) { StringBuilder buf = new StringBuilder(); NodeList nl = obd.getChildNodes(); int nLen = nl.getLength(); for (int i=0; i