TexMaths support

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@159 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
henrikjust 2014-08-08 13:08:58 +00:00
parent c639066861
commit 79ae252419
6 changed files with 233 additions and 240 deletions

View file

@ -2,7 +2,7 @@
############################################################################ ############################################################################
# This is the Ant build file for writer2latex # This is the Ant build file for writer2latex
# Original: Sep 2004 (mgn) # Original: Sep 2004 (mgn)
# version 1.4 (2012-03-16) # version 1.4 (2014-08-08)
############################################################################ ############################################################################
--> -->
<project name="w2l" default="help" basedir="."> <project name="w2l" default="help" basedir=".">
@ -69,7 +69,7 @@
<javac srcdir="${src}" <javac srcdir="${src}"
destdir="${classes}" destdir="${classes}"
encoding="us-ascii" encoding="us-ascii"
source="1.5" source="1.6"
debug="on"> debug="on">
<classpath refid="main.class.path"/> <classpath refid="main.class.path"/>
</javac> </javac>

View file

@ -2,6 +2,8 @@ Changelog for Writer2LaTeX version 1.2 -> 1.4
---------- version 1.3.1 alpha ---------- ---------- version 1.3.1 alpha ----------
[w2l] Added support for TexMaths equations
[all] The command line application now gives an explanation if the source file is not in ODF format [all] The command line application now gives an explanation if the source file is not in ODF format
[all] Bugfix: Fixed typo that caused writer2latex.office.MIMETypes.getMagicMIMEType() to fail in some cases [all] Bugfix: Fixed typo that caused writer2latex.office.MIMETypes.getMagicMIMEType() to fail in some cases

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA * MA 02111-1307 USA
* *
* Copyright: 2002-2012 by Henrik Just * Copyright: 2002-2014 by Henrik Just
* *
* All Rights Reserved. * All Rights Reserved.
* *
* Version 1.4 (2012-04-07) * Version 1.4 (2014-08-06)
* *
*/ */
@ -167,6 +167,68 @@ public abstract class ConverterBase implements Converter {
return theSequence; return theSequence;
} }
/** Get a TexMaths equation from a draw:frame (PNG formula) or draw:g element (SVG)
* Such an element is a TexMaths equation if it contains an svg:title element with content "TexMaths"
* The actual formula is the content of an svg:desc element
*
* @param node the draw:frame or draw:g element to check
* @return the TexMaths equation, or null if this is not a TexMaths equation
*/
public Element getTexMathsEquation(Element node) {
Element svgTitle = Misc.getChildByTagName(node, XMLString.SVG_TITLE);
if (svgTitle!=null && "TexMaths".equals(Misc.getPCDATA(svgTitle))) {
return Misc.getChildByTagName(node, XMLString.SVG_DESC);
}
return null;
}
/** Get a MathML formula from a draw:frame
*
* @param node the draw:frame
* @return the MathML element, or null if this is not a MathML formula
*/
public Element getMathmlEquation(Element node) {
if (node.getTagName().equals(XMLString.DRAW_FRAME)) {
node=Misc.getFirstChildElement(node);
}
String sHref = Misc.getAttribute(node,XMLString.XLINK_HREF);
if (sHref!=null) { // Embedded object in package or linked object
if (ofr.isInPackage(sHref)) { // Embedded object in package
if (sHref.startsWith("#")) { sHref=sHref.substring(1); }
if (sHref.startsWith("./")) { sHref=sHref.substring(2); }
EmbeddedObject object = getEmbeddedObject(sHref);
if (object!=null) {
if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula!
try {
Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM();
Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH);
}
return formula;
}
catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
catch (java.io.IOException e) {
e.printStackTrace();
}
}
}
}
}
else { // flat XML, object is contained in node
Element formula = Misc.getChildByTagName(node,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(node,XMLString.MATH_MATH);
}
return formula;
}
return null;
}
/** Determine whether or not a paragraph contains a display equation. /** Determine whether or not a paragraph contains a display equation.
* A paragraph is a display equation if it contains a single formula and no text content except whitespace * A paragraph is a display equation if it contains a single formula and no text content except whitespace
* and an optional sequence number which may be in brackets. * and an optional sequence number which may be in brackets.
@ -184,20 +246,26 @@ public abstract class ConverterBase implements Converter {
private boolean doParseDisplayEquation(Node node) { private boolean doParseDisplayEquation(Node node) {
Node child = node.getFirstChild(); Node child = node.getFirstChild();
while (child!=null) { while (child!=null) {
Node equation = getFormula(child); if (Misc.isElement(child)) {
Element elm = (Element) child;
String sName = elm.getTagName();
// First check for MathML or TexMaths equation
Element equation = getMathmlEquation(elm);
if (equation==null) {
equation = getTexMathsEquation(elm);
}
if (equation!=null) { if (equation!=null) {
if (theEquation==null) { if (theEquation==null) {
theEquation = (Element) equation; theEquation = equation;
} }
else { // two or more equations -> not a display else { // two or more equations -> not a display
return false; return false;
} }
} }
else if (Misc.isElement(child)) { else if (XMLString.TEXT_SEQUENCE.equals(sName)) {
String sName = child.getNodeName();
if (XMLString.TEXT_SEQUENCE.equals(sName)) {
if (theSequence==null) { if (theSequence==null) {
theSequence = (Element) child; theSequence = elm;
} }
else { // two sequence numbers -> not a display else { // two sequence numbers -> not a display
return false; return false;
@ -241,49 +309,5 @@ public abstract class ConverterBase implements Converter {
return true; return true;
} }
// TODO: Extend OfficeReader to handle frames
private Node getFormula(Node node) {
if (Misc.isElement(node,XMLString.DRAW_FRAME)) {
node=Misc.getFirstChildElement(node);
}
String sHref = Misc.getAttribute(node,XMLString.XLINK_HREF);
if (sHref!=null) { // Embedded object in package or linked object
if (ofr.isInPackage(sHref)) { // Embedded object in package
if (sHref.startsWith("#")) { sHref=sHref.substring(1); }
if (sHref.startsWith("./")) { sHref=sHref.substring(2); }
EmbeddedObject object = getEmbeddedObject(sHref);
if (object!=null) {
if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula!
try {
Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM();
Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH);
}
return formula;
}
catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
catch (java.io.IOException e) {
e.printStackTrace();
}
}
}
}
}
else { // flat XML, object is contained in node
Element formula = Misc.getChildByTagName(node,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(node,XMLString.MATH_MATH);
}
return formula;
}
return null;
}
} }

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA * MA 02111-1307 USA
* *
* Copyright: 2002-2012 by Henrik Just * Copyright: 2002-2014 by Henrik Just
* *
* All Rights Reserved. * All Rights Reserved.
* *
* Version 1.2 (2012-02-23) * Version 1.4 (2014-08-06)
* *
*/ */
@ -108,8 +108,9 @@ public class DrawConverter extends ConverterHelper {
ldp.append(capLdp); ldp.append(capLdp);
} }
// Process the first child of a draw:frame
public void handleDrawElement(Element node, LaTeXDocumentPortion ldp, Context oc) { public void handleDrawElement(Element node, LaTeXDocumentPortion ldp, Context oc) {
// node must be an elment in the draw namespace // node must be an element in the draw namespace
String sName = node.getTagName(); String sName = node.getTagName();
if (sName.equals(XMLString.DRAW_OBJECT)) { if (sName.equals(XMLString.DRAW_OBJECT)) {
handleDrawObject(node,ldp,oc); handleDrawObject(node,ldp,oc);
@ -128,11 +129,28 @@ public class DrawConverter extends ConverterHelper {
palette.getFieldCv().handleAnchor(node,ldp,oc); palette.getFieldCv().handleAnchor(node,ldp,oc);
} }
else if (sName.equals(XMLString.DRAW_FRAME)) { else if (sName.equals(XMLString.DRAW_FRAME)) {
Element equation = palette.getTexMathsEquation(node);
if (equation!=null) {
palette.getMathmlCv().handleTexMathsEquation(equation,ldp,oc);
}
else {
// OpenDocument: Get the actual draw element in the frame // OpenDocument: Get the actual draw element in the frame
handleDrawElement(Misc.getFirstChildElement(node),ldp,oc); handleDrawElement(Misc.getFirstChildElement(node),ldp,oc);
} }
}
else if (sName.equals(XMLString.DRAW_G)) {
Element equation = palette.getTexMathsEquation(node);
if (equation!=null) {
palette.getMathmlCv().handleTexMathsEquation(equation,ldp,oc);
}
else { else {
// Other drawing objects (eg. shapes) are currently not supported // Shapes are currently not supported
ldp.append("[Warning: Draw object ignored]");
}
}
else {
// Other drawing objects are currently not supported
ldp.append("[Warning: Draw object ignored]"); ldp.append("[Warning: Draw object ignored]");
} }
} }

View file

@ -16,40 +16,37 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, * Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA * MA 02111-1307 USA
* *
* Copyright: 2002-2012 by Henrik Just * Copyright: 2002-2014 by Henrik Just
* *
* All Rights Reserved. * All Rights Reserved.
* *
* Version 1.2 (2012-02-23) * Version 1.4 (2014-08-08)
* *
*/ */
package writer2latex.latex; package writer2latex.latex;
// TODO: Use parseDisplayEquation of ConverterBase
//import java.util.Hashtable;
import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.Node; import org.w3c.dom.Node;
import org.w3c.dom.NodeList; import org.w3c.dom.NodeList;
//import writer2latex.latex.i18n.I18n; import writer2latex.latex.util.Context;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.EmbeddedXMLObject;
import writer2latex.office.MIMETypes;
import writer2latex.office.OfficeReader; import writer2latex.office.OfficeReader;
import writer2latex.office.TableReader; import writer2latex.office.TableReader;
import writer2latex.office.XMLString; import writer2latex.office.XMLString;
import writer2latex.util.Misc; import writer2latex.util.Misc;
/** /**
* This class converts mathml nodes to LaTeX. * This class converts MathML nodes to LaTeX.
* (Actually it only converts the starmath annotation currently, if available). * The class name is slightly misleading:
* It only converts the StarMath annotation, if available
* and it also converts TexMaths formulas
*/ */
public final class MathmlConverter extends ConverterHelper { public final class MathmlConverter extends ConverterHelper {
private enum TexMathsStyle {inline, display, latex};
private StarMathConverter smc; private StarMathConverter smc;
private boolean bContainsFormulas = false; private boolean bContainsFormulas = false;
@ -114,9 +111,50 @@ public final class MathmlConverter extends ConverterHelper {
} }
// Data for display equations /** Handle an (inline) TexMaths equation
private Element theEquation = null; *
private Element theSequence = null; * @param node the equation (an svg:desc element containing the formula)
* @param ldp the LaTeXDocumentPortion to contain the converted equation
* @param oc the current context
*/
public void handleTexMathsEquation(Element node, LaTeXDocumentPortion ldp, Context oc) {
// LaTeX code is contained in svg:desc
// Format is <point size>X<mode>X<TeX code>X<format>X<resolution>X<transparency>
// where X is a paragraph sign
switch (getTexMathsStyle(node)) {
case inline:
ldp.append("$").append(getTexMathsEquation(node)).append("$");
break;
case display:
ldp.append("$\\displaystyle ").append(getTexMathsEquation(node)).append("$");
break;
case latex:
ldp.append(getTexMathsEquation(node));
}
}
private TexMathsStyle getTexMathsStyle(Element node) {
String[] sContent = Misc.getPCDATA(node).split("\u00a7");
if (sContent.length>=3) { // we only need 3 items of 6
if ("display".equals(sContent[1])) {
return TexMathsStyle.display;
}
else if ("latex".equals(sContent[1])) {
return TexMathsStyle.latex;
}
}
return TexMathsStyle.inline;
}
private String getTexMathsEquation(Element node) {
String[] sContent = Misc.getPCDATA(node).split("\u00a7");
if (sContent.length>=3) { // we only need 3 items of 6
return sContent[2];
}
else {
return "";
}
}
/** Try to convert a table as a display equation: /** Try to convert a table as a display equation:
* A 1 row by 2 columns table in which each cell contains exactly one paragraph, * A 1 row by 2 columns table in which each cell contains exactly one paragraph,
@ -132,16 +170,12 @@ public final class MathmlConverter extends ConverterHelper {
if (table.getRowCount()==1 && table.getColCount()==2 && if (table.getRowCount()==1 && table.getColCount()==2 &&
OfficeReader.isSingleParagraph(table.getCell(0, 0)) && OfficeReader.isSingleParagraph(table.getCell(0, 1)) ) { OfficeReader.isSingleParagraph(table.getCell(0, 0)) && OfficeReader.isSingleParagraph(table.getCell(0, 1)) ) {
// Table of the desired form // Table of the desired form
theEquation = null; if (palette.parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 0))) && palette.getEquation()!=null && palette.getSequence()==null) {
theSequence = null;
if (parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 0))) && theEquation!=null && theSequence==null) {
// Found equation in first cell // Found equation in first cell
Element myEquation = theEquation; Element myEquation = palette.getEquation();
theEquation = null; if (palette.parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 1))) && palette.getEquation()==null && palette.getSequence()!=null) {
theSequence = null;
if (parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 1))) && theEquation==null && theSequence!=null) {
// Found sequence in second cell // Found sequence in second cell
handleDisplayEquation(myEquation, theSequence, ldp); handleDisplayEquation(myEquation, palette.getSequence(), ldp);
return true; return true;
} }
} }
@ -159,10 +193,8 @@ public final class MathmlConverter extends ConverterHelper {
* did not contain a display equation * did not contain a display equation
*/ */
public boolean handleDisplayEquation(Element node, LaTeXDocumentPortion ldp) { public boolean handleDisplayEquation(Element node, LaTeXDocumentPortion ldp) {
theEquation = null; if (palette.parseDisplayEquation(node) && palette.getEquation()!=null) {
theSequence = null; handleDisplayEquation(palette.getEquation(), palette.getSequence(), ldp);
if (parseDisplayEquation(node) && theEquation!=null) {
handleDisplayEquation(theEquation, theSequence, ldp);
return true; return true;
} }
else { else {
@ -171,130 +203,47 @@ public final class MathmlConverter extends ConverterHelper {
} }
private void handleDisplayEquation(Element equation, Element sequence, LaTeXDocumentPortion ldp) { private void handleDisplayEquation(Element equation, Element sequence, LaTeXDocumentPortion ldp) {
String sLaTeX = convert(null,equation); boolean bTexMaths = equation.getTagName().equals(XMLString.SVG_DESC);
TexMathsStyle style = TexMathsStyle.inline;
String sLaTeX;
if (bTexMaths) {
// TeXMaths equation
sLaTeX = getTexMathsEquation(equation);
style = getTexMathsStyle(equation);
}
else {
// MathML equation
sLaTeX = convert(null,equation);
}
if (!" ".equals(sLaTeX)) { // ignore empty formulas if (!" ".equals(sLaTeX)) { // ignore empty formulas
if (!bTexMaths || style!=TexMathsStyle.latex) {
if (sequence!=null) { if (sequence!=null) {
// Numbered equation // Numbered equation
ldp.append("\\begin{equation}"); ldp.append("\\begin{equation}");
palette.getFieldCv().handleSequenceLabel(sequence,ldp); palette.getFieldCv().handleSequenceLabel(sequence,ldp);
if (bTexMaths && style==TexMathsStyle.inline) {
ldp.append("\\textstyle ");
}
ldp.nl() ldp.nl()
.append(sLaTeX).nl() .append(sLaTeX).nl()
.append("\\end{equation}").nl(); .append("\\end{equation}").nl();
if (bAddParAfterDisplay) { ldp.nl(); }
} }
else { else {
// Unnumbered equation // Unnumbered equation
ldp.append("\\begin{equation*}").nl() ldp.append("\\begin{equation*}");
if (bTexMaths && style==TexMathsStyle.inline) {
ldp.append("\\textstyle ");
}
ldp.nl()
.append(sLaTeX).nl() .append(sLaTeX).nl()
.append("\\end{equation*}").nl(); .append("\\end{equation*}").nl();
}
}
else {
ldp.append(sLaTeX).nl();
}
if (bAddParAfterDisplay) { ldp.nl(); } if (bAddParAfterDisplay) { ldp.nl(); }
} }
} }
}
private boolean parseDisplayEquation(Node node) {
Node child = node.getFirstChild();
while (child!=null) {
Node equation = getFormula(child);
if (equation!=null) {
if (theEquation==null) {
theEquation = (Element) equation;
}
else { // two or more equations -> not a display
return false;
}
}
else if (Misc.isElement(child)) {
String sName = child.getNodeName();
if (XMLString.TEXT_SEQUENCE.equals(sName)) {
if (theSequence==null) {
theSequence = (Element) child;
}
else { // two sequence numbers -> not a display
return false;
}
}
else if (XMLString.TEXT_SPAN.equals(sName)) {
if (!parseDisplayEquation(child)) {
return false;
}
}
else if (XMLString.TEXT_S.equals(sName)) {
// Spaces are allowed
}
else if (XMLString.TEXT_TAB.equals(sName)) {
// Tab stops are allowed
}
else if (XMLString.TEXT_TAB_STOP.equals(sName)) { // old
// Tab stops are allowed
}
else if (XMLString.TEXT_SOFT_PAGE_BREAK.equals(sName)) { // since ODF 1.1
// Soft page breaks are allowed
}
else {
// Other elements -> not a display
return false;
}
}
else if (Misc.isText(child)) {
String s = child.getNodeValue();
int nLen = s.length();
for (int i=0; i<nLen; i++) {
char c = s.charAt(i);
if (c!='(' && c!=')' && c!='[' && c!=']' && c!='{' && c!='}' && c!=' ' && c!='\u00A0') {
// Characters except brackets and whitespace -> not a display
return false;
}
}
}
child = child.getNextSibling();
}
return true;
}
// TODO: Extend OfficeReader to handle frames
private Node getFormula(Node node) {
if (Misc.isElement(node,XMLString.DRAW_FRAME)) {
node=Misc.getFirstChildElement(node);
}
String sHref = Misc.getAttribute(node,XMLString.XLINK_HREF);
if (sHref!=null) { // Embedded object in package or linked object
if (ofr.isInPackage(sHref)) { // Embedded object in package
if (sHref.startsWith("#")) { sHref=sHref.substring(1); }
if (sHref.startsWith("./")) { sHref=sHref.substring(2); }
EmbeddedObject object = palette.getEmbeddedObject(sHref);
if (object!=null) {
if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula!
try {
Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM();
Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH);
}
return formula;
}
catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
catch (java.io.IOException e) {
e.printStackTrace();
}
}
}
}
}
else { // flat xml, object is contained in node
Element formula = Misc.getChildByTagName(node,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(node,XMLString.MATH_MATH);
}
return formula;
}
return null;
}
} }

View file

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="ISO-8859-1"?> <?xml version="1.0" encoding="ISO-8859-1"?>
<!-- This is a datafile used by Writer2LaTeX <!-- This is a datafile used by Writer2LaTeX
Version 1.2 (2011-09-13) Version 1.4 (2014-08-05)
The definitions for greek characters are contributed by interzone, info@interzone.gr The definitions for greek characters are contributed by interzone, info@interzone.gr
and extended by Johannis Likos. Additional bugfixes by Alexej Kryukov and extended by Johannis Likos. Additional bugfixes by Alexej Kryukov
@ -286,7 +286,7 @@ PART I: Common symbols, ascii only
<symbol char="0132" text="{\IJ}" /> <symbol char="0132" text="{\IJ}" />
<symbol char="0133" text="{\ij}" /> <symbol char="0133" text="{\ij}" />
<symbol char="0134" text="\^J" /> <symbol char="0134" text="\^J" />
<symbol char="0135" text="\^\j" /> <symbol char="0135" text="\^{\j}" />
<symbol char="0136" text="\c{K}" /> <symbol char="0136" text="\c{K}" />
<symbol char="0137" text="\c{k}" /> <symbol char="0137" text="\c{k}" />
<!-- missing: 138 LATIN SMALL LETTER KRA (greenlandic) --> <!-- missing: 138 LATIN SMALL LETTER KRA (greenlandic) -->