From 79ae2524192fd5042efcb20e1d570a97063344c7 Mon Sep 17 00:00:00 2001 From: henrikjust Date: Fri, 8 Aug 2014 13:08:58 +0000 Subject: [PATCH] TexMaths support git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@159 f0f2a975-2e09-46c8-9428-3b39399b9f3c --- build.xml | 4 +- source/distro/changelog.txt | 2 + .../java/writer2latex/base/ConverterBase.java | 180 +++++++------ .../writer2latex/latex/DrawConverter.java | 36 ++- .../writer2latex/latex/MathmlConverter.java | 245 +++++++----------- .../java/writer2latex/latex/i18n/symbols.xml | 6 +- 6 files changed, 233 insertions(+), 240 deletions(-) diff --git a/build.xml b/build.xml index 51e4c8e..07b31dc 100644 --- a/build.xml +++ b/build.xml @@ -2,7 +2,7 @@ ############################################################################ # This is the Ant build file for writer2latex # Original: Sep 2004 (mgn) - # version 1.4 (2012-03-16) + # version 1.4 (2014-08-08) ############################################################################ --> @@ -69,7 +69,7 @@ diff --git a/source/distro/changelog.txt b/source/distro/changelog.txt index 69fa205..e3b9b90 100644 --- a/source/distro/changelog.txt +++ b/source/distro/changelog.txt @@ -2,6 +2,8 @@ Changelog for Writer2LaTeX version 1.2 -> 1.4 ---------- version 1.3.1 alpha ---------- +[w2l] Added support for TexMaths equations + [all] The command line application now gives an explanation if the source file is not in ODF format [all] Bugfix: Fixed typo that caused writer2latex.office.MIMETypes.getMagicMIMEType() to fail in some cases diff --git a/source/java/writer2latex/base/ConverterBase.java b/source/java/writer2latex/base/ConverterBase.java index a1e89f3..9f12513 100644 --- a/source/java/writer2latex/base/ConverterBase.java +++ b/source/java/writer2latex/base/ConverterBase.java @@ -16,11 +16,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2012 by Henrik Just + * Copyright: 2002-2014 by Henrik Just * * All Rights Reserved. * - * Version 1.4 (2012-04-07) + * Version 1.4 (2014-08-06) * */ @@ -167,83 +167,28 @@ public abstract class ConverterBase implements Converter { return theSequence; } - /** Determine whether or not a paragraph contains a display equation. - * A paragraph is a display equation if it contains a single formula and no text content except whitespace - * and an optional sequence number which may be in brackets. - * As a side effect, this method keeps a reference to the equation and the sequence number - * - * @param node the paragraph - * @return true if this is a display equation - */ - public boolean parseDisplayEquation(Node node) { - theEquation = null; - theSequence = null; - return doParseDisplayEquation(node); - } - - private boolean doParseDisplayEquation(Node node) { - Node child = node.getFirstChild(); - while (child!=null) { - Node equation = getFormula(child); - if (equation!=null) { - if (theEquation==null) { - theEquation = (Element) equation; - } - else { // two or more equations -> not a display - return false; - } - } - else if (Misc.isElement(child)) { - String sName = child.getNodeName(); - if (XMLString.TEXT_SEQUENCE.equals(sName)) { - if (theSequence==null) { - theSequence = (Element) child; - } - else { // two sequence numbers -> not a display - return false; - } - } - else if (XMLString.TEXT_SPAN.equals(sName)) { - if (!doParseDisplayEquation(child)) { - return false; - } - } - else if (XMLString.TEXT_S.equals(sName)) { - // Spaces are allowed - } - else if (XMLString.TEXT_TAB.equals(sName)) { - // Tab stops are allowed - } - else if (XMLString.TEXT_TAB_STOP.equals(sName)) { // old - // Tab stops are allowed - } - else if (XMLString.TEXT_SOFT_PAGE_BREAK.equals(sName)) { // since ODF 1.1 - // Soft page breaks are allowed - } - else { - // Other elements -> not a display - return false; - } - } - else if (Misc.isText(child)) { - String s = child.getNodeValue(); - int nLen = s.length(); - for (int i=0; i not a display - return false; - } - } - } - child = child.getNextSibling(); - } - return true; + /** Get a TexMaths equation from a draw:frame (PNG formula) or draw:g element (SVG) + * Such an element is a TexMaths equation if it contains an svg:title element with content "TexMaths" + * The actual formula is the content of an svg:desc element + * + * @param node the draw:frame or draw:g element to check + * @return the TexMaths equation, or null if this is not a TexMaths equation + */ + public Element getTexMathsEquation(Element node) { + Element svgTitle = Misc.getChildByTagName(node, XMLString.SVG_TITLE); + if (svgTitle!=null && "TexMaths".equals(Misc.getPCDATA(svgTitle))) { + return Misc.getChildByTagName(node, XMLString.SVG_DESC); + } + return null; } - // TODO: Extend OfficeReader to handle frames - private Node getFormula(Node node) { - if (Misc.isElement(node,XMLString.DRAW_FRAME)) { + /** Get a MathML formula from a draw:frame + * + * @param node the draw:frame + * @return the MathML element, or null if this is not a MathML formula + */ + public Element getMathmlEquation(Element node) { + if (node.getTagName().equals(XMLString.DRAW_FRAME)) { node=Misc.getFirstChildElement(node); } @@ -283,7 +228,86 @@ public abstract class ConverterBase implements Converter { } return null; } - + + /** Determine whether or not a paragraph contains a display equation. + * A paragraph is a display equation if it contains a single formula and no text content except whitespace + * and an optional sequence number which may be in brackets. + * As a side effect, this method keeps a reference to the equation and the sequence number + * + * @param node the paragraph + * @return true if this is a display equation + */ + public boolean parseDisplayEquation(Node node) { + theEquation = null; + theSequence = null; + return doParseDisplayEquation(node); + } + + private boolean doParseDisplayEquation(Node node) { + Node child = node.getFirstChild(); + while (child!=null) { + if (Misc.isElement(child)) { + Element elm = (Element) child; + String sName = elm.getTagName(); + // First check for MathML or TexMaths equation + Element equation = getMathmlEquation(elm); + if (equation==null) { + equation = getTexMathsEquation(elm); + } + + if (equation!=null) { + if (theEquation==null) { + theEquation = equation; + } + else { // two or more equations -> not a display + return false; + } + } + else if (XMLString.TEXT_SEQUENCE.equals(sName)) { + if (theSequence==null) { + theSequence = elm; + } + else { // two sequence numbers -> not a display + return false; + } + } + else if (XMLString.TEXT_SPAN.equals(sName)) { + if (!doParseDisplayEquation(child)) { + return false; + } + } + else if (XMLString.TEXT_S.equals(sName)) { + // Spaces are allowed + } + else if (XMLString.TEXT_TAB.equals(sName)) { + // Tab stops are allowed + } + else if (XMLString.TEXT_TAB_STOP.equals(sName)) { // old + // Tab stops are allowed + } + else if (XMLString.TEXT_SOFT_PAGE_BREAK.equals(sName)) { // since ODF 1.1 + // Soft page breaks are allowed + } + else { + // Other elements -> not a display + return false; + } + } + else if (Misc.isText(child)) { + String s = child.getNodeValue(); + int nLen = s.length(); + for (int i=0; i not a display + return false; + } + } + } + child = child.getNextSibling(); + } + return true; + } } \ No newline at end of file diff --git a/source/java/writer2latex/latex/DrawConverter.java b/source/java/writer2latex/latex/DrawConverter.java index 9ba500f..4e47b49 100644 --- a/source/java/writer2latex/latex/DrawConverter.java +++ b/source/java/writer2latex/latex/DrawConverter.java @@ -16,11 +16,11 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2012 by Henrik Just + * Copyright: 2002-2014 by Henrik Just * * All Rights Reserved. * - * Version 1.2 (2012-02-23) + * Version 1.4 (2014-08-06) * */ @@ -88,7 +88,7 @@ public class DrawConverter extends ConverterHelper { // TODO: Otherwise try the user settings... } } - + public void handleCaption(Element node, LaTeXDocumentPortion ldp, Context oc) { // Floating frames should be positioned *above* the label, hence // we use a separate ldp for the paragraphs and add this later @@ -108,8 +108,9 @@ public class DrawConverter extends ConverterHelper { ldp.append(capLdp); } + // Process the first child of a draw:frame public void handleDrawElement(Element node, LaTeXDocumentPortion ldp, Context oc) { - // node must be an elment in the draw namespace + // node must be an element in the draw namespace String sName = node.getTagName(); if (sName.equals(XMLString.DRAW_OBJECT)) { handleDrawObject(node,ldp,oc); @@ -128,11 +129,28 @@ public class DrawConverter extends ConverterHelper { palette.getFieldCv().handleAnchor(node,ldp,oc); } else if (sName.equals(XMLString.DRAW_FRAME)) { - // OpenDocument: Get the actual draw element in the frame - handleDrawElement(Misc.getFirstChildElement(node),ldp,oc); + Element equation = palette.getTexMathsEquation(node); + if (equation!=null) { + palette.getMathmlCv().handleTexMathsEquation(equation,ldp,oc); + } + else { + // OpenDocument: Get the actual draw element in the frame + handleDrawElement(Misc.getFirstChildElement(node),ldp,oc); + } + } + else if (sName.equals(XMLString.DRAW_G)) { + Element equation = palette.getTexMathsEquation(node); + if (equation!=null) { + palette.getMathmlCv().handleTexMathsEquation(equation,ldp,oc); + } + else { + // Shapes are currently not supported + ldp.append("[Warning: Draw object ignored]"); + } + } else { - // Other drawing objects (eg. shapes) are currently not supported + // Other drawing objects are currently not supported ldp.append("[Warning: Draw object ignored]"); } } @@ -478,12 +496,12 @@ public class DrawConverter extends ConverterHelper { flushFloatingFrames(ldp,ic); floatingFramesStack.pop(); if (!bIsCaption) { - ldp.append("\\end{minipage}"); + ldp.append("\\end{minipage}"); } if (!oc.isNoFootnotes()) { palette.getNoteCv().flushFootnotes(ldp,oc); } } - + //------------------------------------------------------------------------- //handle any pending floating frames diff --git a/source/java/writer2latex/latex/MathmlConverter.java b/source/java/writer2latex/latex/MathmlConverter.java index 20a0bac..01aadd8 100644 --- a/source/java/writer2latex/latex/MathmlConverter.java +++ b/source/java/writer2latex/latex/MathmlConverter.java @@ -16,39 +16,36 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA * - * Copyright: 2002-2012 by Henrik Just + * Copyright: 2002-2014 by Henrik Just * * All Rights Reserved. * - * Version 1.2 (2012-02-23) + * Version 1.4 (2014-08-08) * */ package writer2latex.latex; -// TODO: Use parseDisplayEquation of ConverterBase -//import java.util.Hashtable; - -import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; -//import writer2latex.latex.i18n.I18n; -import writer2latex.office.EmbeddedObject; -import writer2latex.office.EmbeddedXMLObject; -import writer2latex.office.MIMETypes; +import writer2latex.latex.util.Context; import writer2latex.office.OfficeReader; import writer2latex.office.TableReader; import writer2latex.office.XMLString; import writer2latex.util.Misc; /** - * This class converts mathml nodes to LaTeX. - * (Actually it only converts the starmath annotation currently, if available). + * This class converts MathML nodes to LaTeX. + * The class name is slightly misleading: + * It only converts the StarMath annotation, if available + * and it also converts TexMaths formulas */ public final class MathmlConverter extends ConverterHelper { + + private enum TexMathsStyle {inline, display, latex}; private StarMathConverter smc; @@ -113,11 +110,52 @@ public final class MathmlConverter extends ConverterHelper { } } - - // Data for display equations - private Element theEquation = null; - private Element theSequence = null; + + /** Handle an (inline) TexMaths equation + * + * @param node the equation (an svg:desc element containing the formula) + * @param ldp the LaTeXDocumentPortion to contain the converted equation + * @param oc the current context + */ + public void handleTexMathsEquation(Element node, LaTeXDocumentPortion ldp, Context oc) { + // LaTeX code is contained in svg:desc + // Format is XXXXX + // where X is a paragraph sign + switch (getTexMathsStyle(node)) { + case inline: + ldp.append("$").append(getTexMathsEquation(node)).append("$"); + break; + case display: + ldp.append("$\\displaystyle ").append(getTexMathsEquation(node)).append("$"); + break; + case latex: + ldp.append(getTexMathsEquation(node)); + } + } + private TexMathsStyle getTexMathsStyle(Element node) { + String[] sContent = Misc.getPCDATA(node).split("\u00a7"); + if (sContent.length>=3) { // we only need 3 items of 6 + if ("display".equals(sContent[1])) { + return TexMathsStyle.display; + } + else if ("latex".equals(sContent[1])) { + return TexMathsStyle.latex; + } + } + return TexMathsStyle.inline; + } + + private String getTexMathsEquation(Element node) { + String[] sContent = Misc.getPCDATA(node).split("\u00a7"); + if (sContent.length>=3) { // we only need 3 items of 6 + return sContent[2]; + } + else { + return ""; + } + } + /** Try to convert a table as a display equation: * A 1 row by 2 columns table in which each cell contains exactly one paragraph, * the left cell contains exactly one formula and the right cell contains exactly @@ -132,16 +170,12 @@ public final class MathmlConverter extends ConverterHelper { if (table.getRowCount()==1 && table.getColCount()==2 && OfficeReader.isSingleParagraph(table.getCell(0, 0)) && OfficeReader.isSingleParagraph(table.getCell(0, 1)) ) { // Table of the desired form - theEquation = null; - theSequence = null; - if (parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 0))) && theEquation!=null && theSequence==null) { + if (palette.parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 0))) && palette.getEquation()!=null && palette.getSequence()==null) { // Found equation in first cell - Element myEquation = theEquation; - theEquation = null; - theSequence = null; - if (parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 1))) && theEquation==null && theSequence!=null) { + Element myEquation = palette.getEquation(); + if (palette.parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 1))) && palette.getEquation()==null && palette.getSequence()!=null) { // Found sequence in second cell - handleDisplayEquation(myEquation, theSequence, ldp); + handleDisplayEquation(myEquation, palette.getSequence(), ldp); return true; } } @@ -159,10 +193,8 @@ public final class MathmlConverter extends ConverterHelper { * did not contain a display equation */ public boolean handleDisplayEquation(Element node, LaTeXDocumentPortion ldp) { - theEquation = null; - theSequence = null; - if (parseDisplayEquation(node) && theEquation!=null) { - handleDisplayEquation(theEquation, theSequence, ldp); + if (palette.parseDisplayEquation(node) && palette.getEquation()!=null) { + handleDisplayEquation(palette.getEquation(), palette.getSequence(), ldp); return true; } else { @@ -171,130 +203,47 @@ public final class MathmlConverter extends ConverterHelper { } private void handleDisplayEquation(Element equation, Element sequence, LaTeXDocumentPortion ldp) { - String sLaTeX = convert(null,equation); + boolean bTexMaths = equation.getTagName().equals(XMLString.SVG_DESC); + TexMathsStyle style = TexMathsStyle.inline; + String sLaTeX; + if (bTexMaths) { + // TeXMaths equation + sLaTeX = getTexMathsEquation(equation); + style = getTexMathsStyle(equation); + } + else { + // MathML equation + sLaTeX = convert(null,equation); + } if (!" ".equals(sLaTeX)) { // ignore empty formulas - if (sequence!=null) { - // Numbered equation - ldp.append("\\begin{equation}"); - palette.getFieldCv().handleSequenceLabel(sequence,ldp); - ldp.nl() - .append(sLaTeX).nl() - .append("\\end{equation}").nl(); - if (bAddParAfterDisplay) { ldp.nl(); } + if (!bTexMaths || style!=TexMathsStyle.latex) { + if (sequence!=null) { + // Numbered equation + ldp.append("\\begin{equation}"); + palette.getFieldCv().handleSequenceLabel(sequence,ldp); + if (bTexMaths && style==TexMathsStyle.inline) { + ldp.append("\\textstyle "); + } + ldp.nl() + .append(sLaTeX).nl() + .append("\\end{equation}").nl(); + } + else { + // Unnumbered equation + ldp.append("\\begin{equation*}"); + if (bTexMaths && style==TexMathsStyle.inline) { + ldp.append("\\textstyle "); + } + ldp.nl() + .append(sLaTeX).nl() + .append("\\end{equation*}").nl(); + } } else { - // Unnumbered equation - ldp.append("\\begin{equation*}").nl() - .append(sLaTeX).nl() - .append("\\end{equation*}").nl(); - if (bAddParAfterDisplay) { ldp.nl(); } - } + ldp.append(sLaTeX).nl(); + } + if (bAddParAfterDisplay) { ldp.nl(); } } } - - private boolean parseDisplayEquation(Node node) { - Node child = node.getFirstChild(); - while (child!=null) { - Node equation = getFormula(child); - if (equation!=null) { - if (theEquation==null) { - theEquation = (Element) equation; - } - else { // two or more equations -> not a display - return false; - } - } - else if (Misc.isElement(child)) { - String sName = child.getNodeName(); - if (XMLString.TEXT_SEQUENCE.equals(sName)) { - if (theSequence==null) { - theSequence = (Element) child; - } - else { // two sequence numbers -> not a display - return false; - } - } - else if (XMLString.TEXT_SPAN.equals(sName)) { - if (!parseDisplayEquation(child)) { - return false; - } - } - else if (XMLString.TEXT_S.equals(sName)) { - // Spaces are allowed - } - else if (XMLString.TEXT_TAB.equals(sName)) { - // Tab stops are allowed - } - else if (XMLString.TEXT_TAB_STOP.equals(sName)) { // old - // Tab stops are allowed - } - else if (XMLString.TEXT_SOFT_PAGE_BREAK.equals(sName)) { // since ODF 1.1 - // Soft page breaks are allowed - } - else { - // Other elements -> not a display - return false; - } - } - else if (Misc.isText(child)) { - String s = child.getNodeValue(); - int nLen = s.length(); - for (int i=0; i not a display - return false; - } - } - } - child = child.getNextSibling(); - } - return true; - } - - // TODO: Extend OfficeReader to handle frames - private Node getFormula(Node node) { - if (Misc.isElement(node,XMLString.DRAW_FRAME)) { - node=Misc.getFirstChildElement(node); - } - - String sHref = Misc.getAttribute(node,XMLString.XLINK_HREF); - - if (sHref!=null) { // Embedded object in package or linked object - if (ofr.isInPackage(sHref)) { // Embedded object in package - if (sHref.startsWith("#")) { sHref=sHref.substring(1); } - if (sHref.startsWith("./")) { sHref=sHref.substring(2); } - EmbeddedObject object = palette.getEmbeddedObject(sHref); - if (object!=null) { - if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula! - try { - Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM(); - Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo 3.2 - if (formula==null) { - formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH); - } - return formula; - } - catch (org.xml.sax.SAXException e) { - e.printStackTrace(); - } - catch (java.io.IOException e) { - e.printStackTrace(); - } - } - } - } - } - else { // flat xml, object is contained in node - Element formula = Misc.getChildByTagName(node,XMLString.MATH); // Since OOo 3.2 - if (formula==null) { - formula = Misc.getChildByTagName(node,XMLString.MATH_MATH); - } - return formula; - } - return null; - } - - } \ No newline at end of file diff --git a/source/java/writer2latex/latex/i18n/symbols.xml b/source/java/writer2latex/latex/i18n/symbols.xml index bab9e44..3443c35 100644 --- a/source/java/writer2latex/latex/i18n/symbols.xml +++ b/source/java/writer2latex/latex/i18n/symbols.xml @@ -1,6 +1,6 @@ @@ -3599,7 +3599,7 @@ definitions to the range 00-FF. - +