Zotero integration + EPUB split + a few other fixes

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@76 f0f2a975-2e09-46c8-9428-3b39399b9f3c
This commit is contained in:
henrikjust 2010-10-30 10:35:46 +00:00
parent b415705e47
commit 2174e5cbf5
13 changed files with 2323 additions and 74 deletions

View file

@ -16,14 +16,13 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2009 by Henrik Just
* Copyright: 2002-2010 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.0 (2009-03-08)
* Version 1.2 (2010-10-27)
*/
//
package org.openoffice.da.comp.w2lcommon.filter;
@ -78,7 +77,7 @@ public class GraphicConverterImpl1 implements GraphicConverter {
if (bCrop || bResize) { return false; }
// We can convert vector formats to eps:
if (MIMETypes.EPS.equals(sTargetMime) && (MIMETypes.WMF.equals(sSourceMime) && MIMETypes.SVM.equals(sSourceMime))) {
if (MIMETypes.EPS.equals(sTargetMime) && (MIMETypes.WMF.equals(sSourceMime) || MIMETypes.SVM.equals(sSourceMime))) {
return true;
}

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.2 (2010-10-27)
* Version 1.2 (2010-10-30)
*
*/
@ -33,7 +33,7 @@ public class ConverterFactory {
// Version information
private static final String VERSION = "1.1.5";
private static final String DATE = "2010-10-27";
private static final String DATE = "2010-10-30";
/** Return the Writer2LaTeX version in the form
* (major version).(minor version).(patch level)<br/>

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.2 (2010-10-04)
* Version 1.2 (2010-10-30)
*
*/
@ -583,6 +583,8 @@ public class InlineConverter extends ConverterHelper {
case LaTeXConfig.IGNORE: return;
case LaTeXConfig.COMMENT:
// Get the unformatted text of all paragraphs and insert each paragraph as a single comment
Element creator = null;
Element date = null;
ldp.append("%").nl();
Node child = node.getFirstChild();
while (child!=null) {
@ -591,8 +593,24 @@ public class InlineConverter extends ConverterHelper {
traversePlainInlineText((Element)child, ldp, oc);
ldp.nl();
}
else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
creator = (Element) child;
}
else if (Misc.isElement(child, XMLString.DC_DATE)) {
date = (Element) child;
}
child = child.getNextSibling();
}
if (creator!=null) {
ldp.append("%");
traversePlainInlineText(creator, ldp, oc);
ldp.nl();
}
if (date!=null) {
ldp.append("%")
.append(Misc.formatDate(ofr.getTextContent(date), palette.getI18n().getDefaultLanguage(), null))
.nl();
}
return;
case LaTeXConfig.PDFANNOTATION:
bHasPdfannotation = true;
@ -608,6 +626,8 @@ public class InlineConverter extends ConverterHelper {
// Get the unformatted text of all paragraphs, separated by spaces
ldp.append(sCommand).append("{");
Element creator = null;
Element date = null;
boolean bFirst = true;
Node child = node.getFirstChild();
while (child!=null) {
@ -616,9 +636,24 @@ public class InlineConverter extends ConverterHelper {
traversePlainInlineText((Element)child, ldp, oc);
bFirst = false;
}
child = child.getNextSibling();
else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
creator = (Element) child;
}
else if (Misc.isElement(child, XMLString.DC_DATE)) {
date = (Element) child;
}
child = child.getNextSibling();
}
if (creator!=null) {
if (!bFirst) ldp.append(" - ");
traversePlainInlineText(creator, ldp, oc);
}
if (date!=null) {
if (creator!=null) ldp.append(", ");
else if (!bFirst) ldp.append(" ");
ldp.append(Misc.formatDate(ofr.getTextContent(date), palette.getI18n().getDefaultLanguage(), null));
}
ldp.append("}");
}

View file

@ -16,11 +16,11 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
* Copyright: 2002-2009 by Henrik Just
* Copyright: 2002-2010 by Henrik Just
*
* All Rights Reserved.
*
* Version 1.2 (2009-03-26)
* Version 1.2 (2010-10-30)
*
*/
@ -44,11 +44,11 @@ public abstract class I18n {
// Configuration items
protected LaTeXConfig config;
protected ReplacementTrie stringReplace;
protected boolean bGreekMath; // Use math mode for greek letters
protected boolean bGreekMath; // Use math mode for Greek letters
protected boolean bAlwaysUseDefaultLang; // Ignore sLang parameter to convert()
// Collected data
protected String sDefaultLanguage; // The default iso language to use
protected String sDefaultLanguage; // The default ISO language to use
protected HashSet<String> languages = new HashSet<String>(); // All languages used
// **** Constructors ****
@ -92,7 +92,7 @@ public abstract class I18n {
public abstract void appendDeclarations(LaTeXDocumentPortion pack, LaTeXDocumentPortion decl);
/** Apply a language language
* @param style the OOo style to read attributesfrom
* @param style the OOo style to read attributes from
* @param bDecl true if declaration form is required
* @param bInherit true if inherited properties should be used
* @param ba the <code>BeforeAfter</code> to add LaTeX code to.
@ -111,8 +111,16 @@ public abstract class I18n {
/** Convert a string of characters into LaTeX
* @param s the source string
* @param bMathMode true if the string should be rendered in math mode
* @param sLang the iso language of the string
* @param sLang the ISO language of the string
* @return the LaTeX string
*/
public abstract String convert(String s, boolean bMathMode, String sLang);
/** Get the default language (either the document language or the most used language)
*
* @param the default language
*/
public String getDefaultLanguage() {
return sDefaultLanguage;
}
}

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.2 (2010-10-27)
* Version 1.2 (2010-10-30)
*
*/
@ -34,7 +34,12 @@ import java.io.UnsupportedEncodingException;
import java.lang.Math;
import java.net.URLEncoder;
import java.net.URLDecoder;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.Locale;
//import java.util.Hashtable;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
@ -52,6 +57,20 @@ public class Misc{
for (int i=0; i<n; i++) { newArray[i] = array[i]; }
return newArray;
}
public static final String formatDate(String sDate, String sLanguage, String sCountry) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss");
Date date = null;
try {
date = sdf.parse(sDate);
} catch (ParseException e) {
// If the date cannot be parsed according to the given pattern, return the original string
return sDate;
}
// Return using a default format for the given locale
Locale locale = sCountry!=null ? new Locale(sLanguage,sCountry) : new Locale(sLanguage);
return DateFormat.getDateTimeInstance(DateFormat.LONG, DateFormat.SHORT, locale).format(date);
}
public static final String int2roman(int number) {
assert number>0; // Only works for positive numbers!

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.2 (2010-05-17)
* Version 1.2 (2010-10-30)
*
*/
@ -201,12 +201,7 @@ public class Converter extends ConverterBase {
// Set locale to document language
StyleWithProperties style = ofr.isSpreadsheet() ? ofr.getDefaultCellStyle() : ofr.getDefaultParStyle();
if (style!=null) {
String sLang = style.getProperty(XMLString.FO_LANGUAGE);
String sCountry = style.getProperty(XMLString.FO_COUNTRY);
if (sLang!=null) {
if (sCountry==null) { l10n.setLocale(sLang); }
else { l10n.setLocale(sLang+"-"+sCountry); }
}
l10n.setLocale(style.getProperty(XMLString.FO_LANGUAGE), style.getProperty(XMLString.FO_COUNTRY));
}
// Traverse the body
@ -463,16 +458,32 @@ public class Converter extends ConverterBase {
public void handleOfficeAnnotation(Node onode, Node hnode) {
if (config.xhtmlNotes()) {
// Extract the text from the paragraphs, seperate paragraphs with newline
// Extract the text from the paragraphs, separate paragraphs with newline
StringBuffer buf = new StringBuffer();
Element creator = null;
Element date = null;
Node child = onode.getFirstChild();
while (child!=null) {
if (Misc.isElement(child, XMLString.TEXT_P)) {
if (buf.length()>0) { buf.append('\n'); }
buf.append(getPlainInlineText(child));
}
else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
creator = (Element) child;
}
else if (Misc.isElement(child, XMLString.DC_DATE)) {
date = (Element) child;
}
child = child.getNextSibling();
}
if (creator!=null) {
if (buf.length()>0) { buf.append('\n'); }
buf.append(getPlainInlineText(creator));
}
if (date!=null) {
if (buf.length()>0) { buf.append('\n'); }
buf.append(Misc.formatDate(ofr.getTextContent(date), l10n.getLocale().getLanguage(), l10n.getLocale().getCountry()));
}
Node commentNode = htmlDOM.createComment(buf.toString());
hnode.appendChild(commentNode);
}

View file

@ -26,6 +26,8 @@
package writer2latex.xhtml;
import java.util.Locale;
// This class handles localized strings (used for navigation)
public class L10n {
public final static int UP = 0;
@ -39,21 +41,36 @@ public class L10n {
public final static int DIRECTORY = 8;
public final static int DOCUMENT = 9;
private Locale locale = null;
private String sLocale="en-US";
public void setLocale(String sLocale) {
if (sLocale!=null) { this.sLocale = sLocale;}
}
public void setLocale(String sLanguage, String sCountry) {
if (sLanguage!=null) {
if (sCountry!=null) { sLocale = sLanguage + "-" + sCountry; }
else { sLocale = sLanguage; }
if (sCountry!=null) {
locale = new Locale(sLanguage,sCountry);
}
else {
locale = new Locale(sLanguage);
}
}
else {
locale = Locale.getDefault();
}
if (locale.getCountry().length()>0) {
sLocale = locale.getLanguage()+"-"+locale.getCountry();
}
else {
sLocale = locale.getLanguage();
}
}
public Locale getLocale() {
return locale;
}
public String get(int nString) {
if (sLocale.startsWith("de")) { // german
if (sLocale.startsWith("de")) { // German
switch (nString) {
case UP: return "Nach oben";
case FIRST : return "Anfang";
@ -67,7 +84,7 @@ public class L10n {
case DOCUMENT: return "Dokument";
}
}
if (sLocale.startsWith("fr")) { // french
if (sLocale.startsWith("fr")) { // French
switch (nString) {
case UP: return "Haut";
case FIRST : return "D\u00e9but";
@ -81,7 +98,7 @@ public class L10n {
case DOCUMENT: return "Document";
}
}
if (sLocale.startsWith("es")) { // spanish
if (sLocale.startsWith("es")) { // Spanish
switch (nString) {
case UP: return "Arriba";
case FIRST : return "Primero";
@ -95,7 +112,7 @@ public class L10n {
case DOCUMENT: return "Documento";
}
}
if (sLocale.startsWith("it")) { // italian
if (sLocale.startsWith("it")) { // Italian
switch (nString) {
case UP: return "Su";
case FIRST : return "Inizio";
@ -109,7 +126,7 @@ public class L10n {
case DOCUMENT: return "Documento";
}
}
if (sLocale.startsWith("pt")) { // (brazilian) portuguese
if (sLocale.startsWith("pt")) { // (Brazilian) Portuguese
switch (nString) {
case UP: return "Acima";
case FIRST : return "Primeiro";
@ -123,7 +140,7 @@ public class L10n {
case DOCUMENT: return "Documento";
}
}
if (sLocale.startsWith("cs")) { // czech
if (sLocale.startsWith("cs")) { // Czech
switch (nString) {
case UP: return "Nahoru";
case FIRST : return "Prvn\u00ed";
@ -137,7 +154,7 @@ public class L10n {
case DOCUMENT: return "Dokument";
}
}
if (sLocale.startsWith("nl")) { // dutch
if (sLocale.startsWith("nl")) { // Dutch
switch (nString) {
case UP: return "Omhoog";
case FIRST : return "Eerste";
@ -151,7 +168,7 @@ public class L10n {
case DOCUMENT: return "Document";
}
}
if (sLocale.startsWith("da")) { // danish
if (sLocale.startsWith("da")) { // Danish
switch (nString) {
case UP: return "Op";
case FIRST : return "F\u00F8rste";
@ -165,7 +182,7 @@ public class L10n {
case DOCUMENT: return "Dokument";
}
}
if (sLocale.startsWith("nn")) { // nynorsk
if (sLocale.startsWith("nn")) { // Nynorsk
switch (nString) {
case UP: return "Opp";
case FIRST : return "F\u00f8rste";
@ -179,7 +196,7 @@ public class L10n {
case DOCUMENT: return "Dokument";
}
}
if (sLocale.startsWith("pl")) { // polish
if (sLocale.startsWith("pl")) { // Polish
switch (nString) {
case UP: return "W g\u00f3r\u0119";
case FIRST : return "Pierwsza";
@ -193,7 +210,7 @@ public class L10n {
case DOCUMENT: return "Dokument";
}
}
if (sLocale.startsWith("fi")) { // finnish
if (sLocale.startsWith("fi")) { // Finnish
switch (nString) {
case UP: return "Yl\u00f6s";
case FIRST : return "Ensimm\u00e4inen";
@ -207,7 +224,7 @@ public class L10n {
case DOCUMENT: return "Dokumentti";
}
}
if (sLocale.startsWith("ru")) { // russian
if (sLocale.startsWith("ru")) { // Russian
switch (nString) {
case UP: return "\u0412\u0432\u0435\u0440\u0445";
case FIRST : return "\u041f\u0435\u0440\u0432\u0430\u044f";
@ -221,7 +238,7 @@ public class L10n {
case DOCUMENT: return "\u0414\u043e\u043a\u0443\u043c\u0435\u043d\u0442";
}
}
if (sLocale.startsWith("uk")) { // ukrainian
if (sLocale.startsWith("uk")) { // Ukrainian
switch (nString) {
case UP: return "\u041d\u0430\u0433\u043e\u0440\u0443";
case FIRST : return "\u041f\u0435\u0440\u0448\u0430";
@ -235,7 +252,7 @@ public class L10n {
case DOCUMENT: return "\u0414\u043e\u043a\u0443\u043c\u0435\u043d\u0442";
}
}
if (sLocale.startsWith("tr")) { // turkish
if (sLocale.startsWith("tr")) { // Turkish
switch (nString) {
case UP: return "Yukar\u0131";
case FIRST : return "\u0130lk";
@ -249,7 +266,7 @@ public class L10n {
case DOCUMENT: return "D\u00f6k\u00fcman";
}
}
if (sLocale.startsWith("hr")) { // croatian
if (sLocale.startsWith("hr")) { // Croatian
switch (nString) {
case UP: return "Up";
case FIRST : return "Prvi";
@ -262,7 +279,7 @@ public class L10n {
case DOCUMENT: return "Document";
}
}
// english - default
// English - default
switch (nString) {
case UP: return "Up";
case FIRST : return "First";

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.2 (2010-07-02)
* Version 1.2 (2010-10-30)
*
*/
@ -79,6 +79,10 @@ public class TextConverter extends ConverterHelper {
// Data used to handle splitting over several files
// TODO: Accessor methods for sections
// Some (Sony?) EPUB readers have a limit on the file size of individual files
// In any case very large files could be a performance problem, hence we do automatic splitting
// after this number of characters. TODO: Make configurable.
private static final int EPUB_CHARACTER_COUNT_TRESHOLD = 150000;
int nSplit = 0; // The outline level at which to split files (0=no split)
int nRepeatLevels = 5; // The number of levels to repeat when splitting (0=no repeat)
private int nLastSplitLevel = 1; // The outline level at which the last split occured
@ -86,6 +90,7 @@ public class TextConverter extends ConverterHelper {
boolean bAfterHeading=false; // last element was a top level heading
protected Stack<Node> sections = new Stack<Node>(); // stack of nested sections
Element[] currentHeading = new Element[7]; // Last headings (repeated when splitting)
private int nCharacterCount = 0; // The number of text characters in the current document
// Counters for generated numbers
private ListCounter outlineNumbering;
@ -352,6 +357,8 @@ public class TextConverter extends ConverterHelper {
getDrawCv().handleDrawElement((Element)child,(Element)hnode,null,nFloatMode);
}
else if (nodeName.equals(XMLString.TEXT_P)) {
hnode = maybeSplit(hnode);
nCharacterCount+=OfficeReader.getCharacterCount(child);
// is there a block element, we should use?
XhtmlStyleMap xpar = config.getXParStyleMap();
String sDisplayName = ofr.getParStyles().getDisplayName(Misc.getAttribute(child,XMLString.TEXT_STYLE_NAME));
@ -395,14 +402,16 @@ public class TextConverter extends ConverterHelper {
else if(nodeName.equals(XMLString.TEXT_H)) {
int nOutlineLevel = getOutlineLevel((Element)child);
Node rememberNode = hnode;
hnode = maybeSplit(hnode,nOutlineLevel,bAfterHeading);
hnode = maybeSplit(hnode,nOutlineLevel);
nCharacterCount+=OfficeReader.getCharacterCount(child);
handleHeading((Element)child,hnode,rememberNode!=hnode);
}
else if (nodeName.equals(XMLString.TEXT_LIST) || // oasis
nodeName.equals(XMLString.TEXT_UNORDERED_LIST) || // old
nodeName.equals(XMLString.TEXT_ORDERED_LIST)) // old
{
if (listIsOnlyHeadings(child)) {
hnode = maybeSplit(hnode);
if (listIsOnlyHeadings(child)) {
nDontSplitLevel--;
hnode = handleFakeList(child,nLevel+1,styleName,hnode);
nDontSplitLevel++;
@ -412,19 +421,21 @@ public class TextConverter extends ConverterHelper {
}
}
else if (nodeName.equals(XMLString.TABLE_TABLE)) {
hnode = maybeSplit(hnode);
getTableCv().handleTable(child,hnode);
}
else if (nodeName.equals(XMLString.TABLE_SUB_TABLE)) {
getTableCv().handleTable(child,hnode);
}
else if (nodeName.equals(XMLString.TEXT_SECTION)) {
hnode = maybeSplit(hnode);
nDontSplitLevel--;
hnode = handleSection(child,hnode);
nDontSplitLevel++;
}
else if (nodeName.equals(XMLString.TEXT_TABLE_OF_CONTENT)) {
if (!ofr.getTocReader((Element)child).isByChapter()) {
hnode = maybeSplit(hnode,1,bAfterHeading);
hnode = maybeSplit(hnode,1);
}
handleTOC(child,hnode);
}
@ -441,11 +452,11 @@ public class TextConverter extends ConverterHelper {
handleUserIndex(child,hnode);
}
else if (nodeName.equals(XMLString.TEXT_ALPHABETICAL_INDEX)) {
hnode = maybeSplit(hnode,1,bAfterHeading);
hnode = maybeSplit(hnode,1);
handleAlphabeticalIndex(child,hnode);
}
else if (nodeName.equals(XMLString.TEXT_BIBLIOGRAPHY)) {
hnode = maybeSplit(hnode,1,bAfterHeading);
hnode = maybeSplit(hnode,1);
handleBibliography(child,hnode);
}
else if (nodeName.equals(XMLString.OFFICE_ANNOTATION)) {
@ -466,7 +477,21 @@ public class TextConverter extends ConverterHelper {
return hnode;
}
private Node maybeSplit(Node node, int nLevel, boolean bAfterHeading) {
private Node maybeSplit(Node node) {
if (converter.isOPS() && nCharacterCount>EPUB_CHARACTER_COUNT_TRESHOLD) {
return doMaybeSplit(node, 0);
}
return node;
}
private Node maybeSplit(Node node, int nLevel) {
if (converter.isOPS() && nCharacterCount>EPUB_CHARACTER_COUNT_TRESHOLD) {
return doMaybeSplit(node, 0);
}
return doMaybeSplit(node, nLevel);
}
private Node doMaybeSplit(Node node, int nLevel) {
if (nDontSplitLevel>1) { // we cannot split due to a nested structure
return node;
}
@ -478,6 +503,7 @@ public class TextConverter extends ConverterHelper {
}
if (nSplit>=nLevel && converter.outFileHasContent()) {
// No objections, this is a level that causes splitting
nCharacterCount = 0;
return converter.nextOutFile();
}
return node;
@ -636,7 +662,7 @@ public class TextConverter extends ConverterHelper {
boolean bIsEmpty = OfficeReader.isWhitespaceContent(onode);
if (config.ignoreEmptyParagraphs() && bIsEmpty) { return; }
String sStyleName = Misc.getAttribute(onode,XMLString.TEXT_STYLE_NAME);
Element par;
if (ofr.isSpreadsheet()) { // attach inline text directly to parent (always a table cell)
par = (Element) hnode;
@ -1044,7 +1070,7 @@ public class TextConverter extends ConverterHelper {
nDontSplitLevel++;
int nOutlineLevel = getOutlineLevel((Element)onode);
Node rememberNode = hnode;
hnode = maybeSplit(hnode,nOutlineLevel,bAfterHeading);
hnode = maybeSplit(hnode,nOutlineLevel);
handleHeading((Element)child, hnode, rememberNode!=hnode,
ofr.getListStyle(sStyleName), nLevel,
bUnNumbered, bRestart, nStartValue);

View file

@ -20,7 +20,7 @@
*
* All Rights Reserved.
*
* Version 1.2 (2010-06-19)
* Version 1.2 (2010-10-27)
*
*/
@ -598,11 +598,16 @@ public class XhtmlDocument extends DOMDocument {
else if (node.hasChildNodes()) {
int nNextLevel = (nLevel<0 || blockThis((Element)node)) ? -1 : nLevel+1;
// Print start tag
if (nLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("<"+node.getNodeName());
writeAttributes(node,osw);
osw.write(">");
if (nNextLevel>=0) { osw.write("\n"); }
boolean bRedundantElement = !node.hasAttributes() &&
(node.getNodeName().equals("a") || node.getNodeName().equals("span"));
if (!bRedundantElement) {
// Writer2xhtml may produce <a> and <span> without attributes, these are removed here
if (nLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("<"+node.getNodeName());
writeAttributes(node,osw);
osw.write(">");
if (nNextLevel>=0) { osw.write("\n"); }
}
// Print children
Node child = node.getFirstChild();
while (child!=null) {
@ -610,9 +615,11 @@ public class XhtmlDocument extends DOMDocument {
child = child.getNextSibling();
}
// Print end tag
if (nNextLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("</"+node.getNodeName()+">");
if (nLevel>=0) { osw.write("\n"); }
if (!bRedundantElement) {
if (nNextLevel>=0) { writeSpaces(nLevel,osw); }
osw.write("</"+node.getNodeName()+">");
if (nLevel>=0) { osw.write("\n"); }
}
}
else { // empty element
if (nLevel>=0) { writeSpaces(nLevel,osw); }