TexMaths support

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@159 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
henrikjust 2014-08-08 13:08:58 +00:00
parent c639066861
commit 79ae252419
6 changed files with 233 additions and 240 deletions

View file

@ -2,7 +2,7 @@
############################################################################
# This is the Ant build file for writer2latex
# Original: Sep 2004 (mgn)
# version 1.4 (2012-03-16)
# version 1.4 (2014-08-08)
############################################################################
-->
<project name="w2l" default="help" basedir=".">
@ -69,7 +69,7 @@
<javac srcdir="${src}"
destdir="${classes}"
encoding="us-ascii"
source="1.5"
source="1.6"
debug="on">
<classpath refid="main.class.path"/>
</javac>

View file

@ -2,6 +2,8 @@ Changelog for Writer2LaTeX version 1.2 -> 1.4
---------- version 1.3.1 alpha ----------
[w2l] Added support for TexMaths equations
[all] The command line application now gives an explanation if the source file is not in ODF format
[all] Bugfix: Fixed typo that caused writer2latex.office.MIMETypes.getMagicMIMEType() to fail in some cases

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
* Copyright: 2002-2014 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.4 (2012-04-07)
* Version 1.4 (2014-08-06)
*
*/
@ -167,83 +167,28 @@ public abstract class ConverterBase implements Converter {
return theSequence;
}
/** Determine whether or not a paragraph contains a display equation.
* A paragraph is a display equation if it contains a single formula and no text content except whitespace
* and an optional sequence number which may be in brackets.
* As a side effect, this method keeps a reference to the equation and the sequence number
*
* @param node the paragraph
* @return true if this is a display equation
*/
public boolean parseDisplayEquation(Node node) {
theEquation = null;
theSequence = null;
return doParseDisplayEquation(node);
}
private boolean doParseDisplayEquation(Node node) {
Node child = node.getFirstChild();
while (child!=null) {
Node equation = getFormula(child);
if (equation!=null) {
if (theEquation==null) {
theEquation = (Element) equation;
}
else { // two or more equations -> not a display
return false;
}
}
else if (Misc.isElement(child)) {
String sName = child.getNodeName();
if (XMLString.TEXT_SEQUENCE.equals(sName)) {
if (theSequence==null) {
theSequence = (Element) child;
}
else { // two sequence numbers -> not a display
return false;
}
}
else if (XMLString.TEXT_SPAN.equals(sName)) {
if (!doParseDisplayEquation(child)) {
return false;
}
}
else if (XMLString.TEXT_S.equals(sName)) {
// Spaces are allowed
}
else if (XMLString.TEXT_TAB.equals(sName)) {
// Tab stops are allowed
}
else if (XMLString.TEXT_TAB_STOP.equals(sName)) { // old
// Tab stops are allowed
}
else if (XMLString.TEXT_SOFT_PAGE_BREAK.equals(sName)) { // since ODF 1.1
// Soft page breaks are allowed
}
else {
// Other elements -> not a display
return false;
}
}
else if (Misc.isText(child)) {
String s = child.getNodeValue();
int nLen = s.length();
for (int i=0; i<nLen; i++) {
char c = s.charAt(i);
if (c!='(' && c!=')' && c!='[' && c!=']' && c!='{' && c!='}' && c!=' ' && c!='\u00A0') {
// Characters except brackets and whitespace -> not a display
return false;
}
}
}
child = child.getNextSibling();
}
return true;
/** Get a TexMaths equation from a draw:frame (PNG formula) or draw:g element (SVG)
* Such an element is a TexMaths equation if it contains an svg:title element with content "TexMaths"
* The actual formula is the content of an svg:desc element
*
* @param node the draw:frame or draw:g element to check
* @return the TexMaths equation, or null if this is not a TexMaths equation
*/
public Element getTexMathsEquation(Element node) {
Element svgTitle = Misc.getChildByTagName(node, XMLString.SVG_TITLE);
if (svgTitle!=null && "TexMaths".equals(Misc.getPCDATA(svgTitle))) {
return Misc.getChildByTagName(node, XMLString.SVG_DESC);
}
return null;
}
// TODO: Extend OfficeReader to handle frames
private Node getFormula(Node node) {
if (Misc.isElement(node,XMLString.DRAW_FRAME)) {
/** Get a MathML formula from a draw:frame
*
* @param node the draw:frame
* @return the MathML element, or null if this is not a MathML formula
*/
public Element getMathmlEquation(Element node) {
if (node.getTagName().equals(XMLString.DRAW_FRAME)) {
node=Misc.getFirstChildElement(node);
}
@ -283,7 +228,86 @@ public abstract class ConverterBase implements Converter {
}
return null;
}
/** Determine whether or not a paragraph contains a display equation.
* A paragraph is a display equation if it contains a single formula and no text content except whitespace
* and an optional sequence number which may be in brackets.
* As a side effect, this method keeps a reference to the equation and the sequence number
*
* @param node the paragraph
* @return true if this is a display equation
*/
public boolean parseDisplayEquation(Node node) {
theEquation = null;
theSequence = null;
return doParseDisplayEquation(node);
}
private boolean doParseDisplayEquation(Node node) {
Node child = node.getFirstChild();
while (child!=null) {
if (Misc.isElement(child)) {
Element elm = (Element) child;
String sName = elm.getTagName();
// First check for MathML or TexMaths equation
Element equation = getMathmlEquation(elm);
if (equation==null) {
equation = getTexMathsEquation(elm);
}
if (equation!=null) {
if (theEquation==null) {
theEquation = equation;
}
else { // two or more equations -> not a display
return false;
}
}
else if (XMLString.TEXT_SEQUENCE.equals(sName)) {
if (theSequence==null) {
theSequence = elm;
}
else { // two sequence numbers -> not a display
return false;
}
}
else if (XMLString.TEXT_SPAN.equals(sName)) {
if (!doParseDisplayEquation(child)) {
return false;
}
}
else if (XMLString.TEXT_S.equals(sName)) {
// Spaces are allowed
}
else if (XMLString.TEXT_TAB.equals(sName)) {
// Tab stops are allowed
}
else if (XMLString.TEXT_TAB_STOP.equals(sName)) { // old
// Tab stops are allowed
}
else if (XMLString.TEXT_SOFT_PAGE_BREAK.equals(sName)) { // since ODF 1.1
// Soft page breaks are allowed
}
else {
// Other elements -> not a display
return false;
}
}
else if (Misc.isText(child)) {
String s = child.getNodeValue();
int nLen = s.length();
for (int i=0; i<nLen; i++) {
char c = s.charAt(i);
if (c!='(' && c!=')' && c!='[' && c!=']' && c!='{' && c!='}' && c!=' ' && c!='\u00A0') {
// Characters except brackets and whitespace -> not a display
return false;
}
}
}
child = child.getNextSibling();
}
return true;
}
}

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
* Copyright: 2002-2014 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2012-02-23)
* Version 1.4 (2014-08-06)
*
*/
@ -88,7 +88,7 @@ public class DrawConverter extends ConverterHelper {
// TODO: Otherwise try the user settings...
}
}
public void handleCaption(Element node, LaTeXDocumentPortion ldp, Context oc) {
// Floating frames should be positioned *above* the label, hence
// we use a separate ldp for the paragraphs and add this later
@ -108,8 +108,9 @@ public class DrawConverter extends ConverterHelper {
ldp.append(capLdp);
}
// Process the first child of a draw:frame
public void handleDrawElement(Element node, LaTeXDocumentPortion ldp, Context oc) {
// node must be an elment in the draw namespace
// node must be an element in the draw namespace
String sName = node.getTagName();
if (sName.equals(XMLString.DRAW_OBJECT)) {
handleDrawObject(node,ldp,oc);
@ -128,11 +129,28 @@ public class DrawConverter extends ConverterHelper {
palette.getFieldCv().handleAnchor(node,ldp,oc);
}
else if (sName.equals(XMLString.DRAW_FRAME)) {
// OpenDocument: Get the actual draw element in the frame
handleDrawElement(Misc.getFirstChildElement(node),ldp,oc);
Element equation = palette.getTexMathsEquation(node);
if (equation!=null) {
palette.getMathmlCv().handleTexMathsEquation(equation,ldp,oc);
}
else {
// OpenDocument: Get the actual draw element in the frame
handleDrawElement(Misc.getFirstChildElement(node),ldp,oc);
}
}
else if (sName.equals(XMLString.DRAW_G)) {
Element equation = palette.getTexMathsEquation(node);
if (equation!=null) {
palette.getMathmlCv().handleTexMathsEquation(equation,ldp,oc);
}
else {
// Shapes are currently not supported
ldp.append("[Warning: Draw object ignored]");
}
}
else {
// Other drawing objects (eg. shapes) are currently not supported
// Other drawing objects are currently not supported
ldp.append("[Warning: Draw object ignored]");
}
}
@ -478,12 +496,12 @@ public class DrawConverter extends ConverterHelper {
flushFloatingFrames(ldp,ic);
floatingFramesStack.pop();
if (!bIsCaption) {
ldp.append("\\end{minipage}");
ldp.append("\\end{minipage}");
}
if (!oc.isNoFootnotes()) { palette.getNoteCv().flushFootnotes(ldp,oc); }
}
//-------------------------------------------------------------------------
//handle any pending floating frames

View file

@ -16,39 +16,36 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2012 by Henrik Just
* Copyright: 2002-2014 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2012-02-23)
* Version 1.4 (2014-08-08)
*
*/
package writer2latex.latex;
// TODO: Use parseDisplayEquation of ConverterBase
//import java.util.Hashtable;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
//import writer2latex.latex.i18n.I18n;
import writer2latex.office.EmbeddedObject;
import writer2latex.office.EmbeddedXMLObject;
import writer2latex.office.MIMETypes;
import writer2latex.latex.util.Context;
import writer2latex.office.OfficeReader;
import writer2latex.office.TableReader;
import writer2latex.office.XMLString;
import writer2latex.util.Misc;
/**
* This class converts mathml nodes to LaTeX.
* (Actually it only converts the starmath annotation currently, if available).
* This class converts MathML nodes to LaTeX.
* The class name is slightly misleading:
* It only converts the StarMath annotation, if available
* and it also converts TexMaths formulas
*/
public final class MathmlConverter extends ConverterHelper {
private enum TexMathsStyle {inline, display, latex};
private StarMathConverter smc;
@ -113,11 +110,52 @@ public final class MathmlConverter extends ConverterHelper {
}
}
// Data for display equations
private Element theEquation = null;
private Element theSequence = null;
/** Handle an (inline) TexMaths equation
*
* @param node the equation (an svg:desc element containing the formula)
* @param ldp the LaTeXDocumentPortion to contain the converted equation
* @param oc the current context
*/
public void handleTexMathsEquation(Element node, LaTeXDocumentPortion ldp, Context oc) {
// LaTeX code is contained in svg:desc
// Format is <point size>X<mode>X<TeX code>X<format>X<resolution>X<transparency>
// where X is a paragraph sign
switch (getTexMathsStyle(node)) {
case inline:
ldp.append("$").append(getTexMathsEquation(node)).append("$");
break;
case display:
ldp.append("$\\displaystyle ").append(getTexMathsEquation(node)).append("$");
break;
case latex:
ldp.append(getTexMathsEquation(node));
}
}
private TexMathsStyle getTexMathsStyle(Element node) {
String[] sContent = Misc.getPCDATA(node).split("\u00a7");
if (sContent.length>=3) { // we only need 3 items of 6
if ("display".equals(sContent[1])) {
return TexMathsStyle.display;
}
else if ("latex".equals(sContent[1])) {
return TexMathsStyle.latex;
}
}
return TexMathsStyle.inline;
}
private String getTexMathsEquation(Element node) {
String[] sContent = Misc.getPCDATA(node).split("\u00a7");
if (sContent.length>=3) { // we only need 3 items of 6
return sContent[2];
}
else {
return "";
}
}
/** Try to convert a table as a display equation:
* A 1 row by 2 columns table in which each cell contains exactly one paragraph,
* the left cell contains exactly one formula and the right cell contains exactly
@ -132,16 +170,12 @@ public final class MathmlConverter extends ConverterHelper {
if (table.getRowCount()==1 && table.getColCount()==2 &&
OfficeReader.isSingleParagraph(table.getCell(0, 0)) && OfficeReader.isSingleParagraph(table.getCell(0, 1)) ) {
// Table of the desired form
theEquation = null;
theSequence = null;
if (parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 0))) && theEquation!=null && theSequence==null) {
if (palette.parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 0))) && palette.getEquation()!=null && palette.getSequence()==null) {
// Found equation in first cell
Element myEquation = theEquation;
theEquation = null;
theSequence = null;
if (parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 1))) && theEquation==null && theSequence!=null) {
Element myEquation = palette.getEquation();
if (palette.parseDisplayEquation(Misc.getFirstChildElement(table.getCell(0, 1))) && palette.getEquation()==null && palette.getSequence()!=null) {
// Found sequence in second cell
handleDisplayEquation(myEquation, theSequence, ldp);
handleDisplayEquation(myEquation, palette.getSequence(), ldp);
return true;
}
}
@ -159,10 +193,8 @@ public final class MathmlConverter extends ConverterHelper {
* did not contain a display equation
*/
public boolean handleDisplayEquation(Element node, LaTeXDocumentPortion ldp) {
theEquation = null;
theSequence = null;
if (parseDisplayEquation(node) && theEquation!=null) {
handleDisplayEquation(theEquation, theSequence, ldp);
if (palette.parseDisplayEquation(node) && palette.getEquation()!=null) {
handleDisplayEquation(palette.getEquation(), palette.getSequence(), ldp);
return true;
}
else {
@ -171,130 +203,47 @@ public final class MathmlConverter extends ConverterHelper {
}
private void handleDisplayEquation(Element equation, Element sequence, LaTeXDocumentPortion ldp) {
String sLaTeX = convert(null,equation);
boolean bTexMaths = equation.getTagName().equals(XMLString.SVG_DESC);
TexMathsStyle style = TexMathsStyle.inline;
String sLaTeX;
if (bTexMaths) {
// TeXMaths equation
sLaTeX = getTexMathsEquation(equation);
style = getTexMathsStyle(equation);
}
else {
// MathML equation
sLaTeX = convert(null,equation);
}
if (!" ".equals(sLaTeX)) { // ignore empty formulas
if (sequence!=null) {
// Numbered equation
ldp.append("\\begin{equation}");
palette.getFieldCv().handleSequenceLabel(sequence,ldp);
ldp.nl()
.append(sLaTeX).nl()
.append("\\end{equation}").nl();
if (bAddParAfterDisplay) { ldp.nl(); }
if (!bTexMaths || style!=TexMathsStyle.latex) {
if (sequence!=null) {
// Numbered equation
ldp.append("\\begin{equation}");
palette.getFieldCv().handleSequenceLabel(sequence,ldp);
if (bTexMaths && style==TexMathsStyle.inline) {
ldp.append("\\textstyle ");
}
ldp.nl()
.append(sLaTeX).nl()
.append("\\end{equation}").nl();
}
else {
// Unnumbered equation
ldp.append("\\begin{equation*}");
if (bTexMaths && style==TexMathsStyle.inline) {
ldp.append("\\textstyle ");
}
ldp.nl()
.append(sLaTeX).nl()
.append("\\end{equation*}").nl();
}
}
else {
// Unnumbered equation
ldp.append("\\begin{equation*}").nl()
.append(sLaTeX).nl()
.append("\\end{equation*}").nl();
if (bAddParAfterDisplay) { ldp.nl(); }
}
ldp.append(sLaTeX).nl();
}
if (bAddParAfterDisplay) { ldp.nl(); }
}
}
private boolean parseDisplayEquation(Node node) {
Node child = node.getFirstChild();
while (child!=null) {
Node equation = getFormula(child);
if (equation!=null) {
if (theEquation==null) {
theEquation = (Element) equation;
}
else { // two or more equations -> not a display
return false;
}
}
else if (Misc.isElement(child)) {
String sName = child.getNodeName();
if (XMLString.TEXT_SEQUENCE.equals(sName)) {
if (theSequence==null) {
theSequence = (Element) child;
}
else { // two sequence numbers -> not a display
return false;
}
}
else if (XMLString.TEXT_SPAN.equals(sName)) {
if (!parseDisplayEquation(child)) {
return false;
}
}
else if (XMLString.TEXT_S.equals(sName)) {
// Spaces are allowed
}
else if (XMLString.TEXT_TAB.equals(sName)) {
// Tab stops are allowed
}
else if (XMLString.TEXT_TAB_STOP.equals(sName)) { // old
// Tab stops are allowed
}
else if (XMLString.TEXT_SOFT_PAGE_BREAK.equals(sName)) { // since ODF 1.1
// Soft page breaks are allowed
}
else {
// Other elements -> not a display
return false;
}
}
else if (Misc.isText(child)) {
String s = child.getNodeValue();
int nLen = s.length();
for (int i=0; i<nLen; i++) {
char c = s.charAt(i);
if (c!='(' && c!=')' && c!='[' && c!=']' && c!='{' && c!='}' && c!=' ' && c!='\u00A0') {
// Characters except brackets and whitespace -> not a display
return false;
}
}
}
child = child.getNextSibling();
}
return true;
}
// TODO: Extend OfficeReader to handle frames
private Node getFormula(Node node) {
if (Misc.isElement(node,XMLString.DRAW_FRAME)) {
node=Misc.getFirstChildElement(node);
}
String sHref = Misc.getAttribute(node,XMLString.XLINK_HREF);
if (sHref!=null) { // Embedded object in package or linked object
if (ofr.isInPackage(sHref)) { // Embedded object in package
if (sHref.startsWith("#")) { sHref=sHref.substring(1); }
if (sHref.startsWith("./")) { sHref=sHref.substring(2); }
EmbeddedObject object = palette.getEmbeddedObject(sHref);
if (object!=null) {
if (MIMETypes.MATH.equals(object.getType()) || MIMETypes.ODF.equals(object.getType())) { // Formula!
try {
Document formuladoc = ((EmbeddedXMLObject) object).getContentDOM();
Element formula = Misc.getChildByTagName(formuladoc,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(formuladoc,XMLString.MATH_MATH);
}
return formula;
}
catch (org.xml.sax.SAXException e) {
e.printStackTrace();
}
catch (java.io.IOException e) {
e.printStackTrace();
}
}
}
}
}
else { // flat xml, object is contained in node
Element formula = Misc.getChildByTagName(node,XMLString.MATH); // Since OOo 3.2
if (formula==null) {
formula = Misc.getChildByTagName(node,XMLString.MATH_MATH);
}
return formula;
}
return null;
}
}

View file

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!-- This is a datafile used by Writer2LaTeX
Version 1.2 (2011-09-13)
Version 1.4 (2014-08-05)
The definitions for greek characters are contributed by interzone, info@interzone.gr
and extended by Johannis Likos. Additional bugfixes by Alexej Kryukov
@ -286,7 +286,7 @@ PART I: Common symbols, ascii only
<symbol char="0132" text="{\IJ}" />
<symbol char="0133" text="{\ij}" />
<symbol char="0134" text="\^J" />
<symbol char="0135" text="\^\j" />
<symbol char="0135" text="\^{\j}" />
<symbol char="0136" text="\c{K}" />
<symbol char="0137" text="\c{k}" />
<!-- missing: 138 LATIN SMALL LETTER KRA (greenlandic) -->
@ -3599,7 +3599,7 @@ definitions to the range 00-FF.
<symbol char="F048" text="H" />
<symbol char="F049" text="I" />
<symbol char="F04a" text="J" />
<symbol char="F04b" text="\j" />
<symbol char="F04b" text="\j " />
<symbol char="F04c" text="\textlyoghlig " />
<symbol char="F04d" text="M" />
<symbol char="F04e" text="N" />