Zotero integration + EPUB split + a few other fixes

git-svn-id: svn://svn.code.sf.net/p/writer2latex/code/trunk@76 f0f2a975-2e09-46c8-9428-3b39399b9f3c
2010-10-30 10:35:46 +00:00 · 2010-10-30 10:35:46 +00:00 · 2174e5cbf5
commit 2174e5cbf5
parent b415705e47
13 changed files with 2323 additions and 74 deletions
--- a/source/java/writer2latex/xhtml/Converter.java
+++ b/source/java/writer2latex/xhtml/Converter.java
@ -20,7 +20,7 @@
 *
 *  All Rights Reserved.
 * 
- *  Version 1.2 (2010-05-17)
+ *  Version 1.2 (2010-10-30)
 *
 */

@ -201,12 +201,7 @@ public class Converter extends ConverterBase {
        // Set locale to document language
        StyleWithProperties style = ofr.isSpreadsheet() ? ofr.getDefaultCellStyle() : ofr.getDefaultParStyle();
        if (style!=null) {
-            String sLang = style.getProperty(XMLString.FO_LANGUAGE);
-            String sCountry = style.getProperty(XMLString.FO_COUNTRY);
-            if (sLang!=null) {
-                if (sCountry==null) { l10n.setLocale(sLang); }
-                else { l10n.setLocale(sLang+"-"+sCountry); }
-            }
+            l10n.setLocale(style.getProperty(XMLString.FO_LANGUAGE), style.getProperty(XMLString.FO_COUNTRY));
        }

        // Traverse the body
@ -463,16 +458,32 @@ public class Converter extends ConverterBase {

    public void handleOfficeAnnotation(Node onode, Node hnode) {
        if (config.xhtmlNotes()) {
-            // Extract the text from the paragraphs, seperate paragraphs with newline
+            // Extract the text from the paragraphs, separate paragraphs with newline
        	StringBuffer buf = new StringBuffer();
+        	Element creator = null;
+        	Element date = null;
        	Node child = onode.getFirstChild();
        	while (child!=null) {
        		if (Misc.isElement(child, XMLString.TEXT_P)) {
        			if (buf.length()>0) { buf.append('\n'); }
        			buf.append(getPlainInlineText(child));
        		}
+        		else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
+        			creator = (Element) child;
+        		}
+        		else if (Misc.isElement(child, XMLString.DC_DATE)) {
+        			date = (Element) child;
+        		}
        		child = child.getNextSibling();
        	}
+        	if (creator!=null) {
+    			if (buf.length()>0) { buf.append('\n'); }
+    			buf.append(getPlainInlineText(creator));        		
+        	}
+        	if (date!=null) {
+    			if (buf.length()>0) { buf.append('\n'); }
+    			buf.append(Misc.formatDate(ofr.getTextContent(date), l10n.getLocale().getLanguage(), l10n.getLocale().getCountry()));
+        	}
            Node commentNode = htmlDOM.createComment(buf.toString());
            hnode.appendChild(commentNode);
        }
--- a/source/java/writer2latex/xhtml/L10n.java
+++ b/source/java/writer2latex/xhtml/L10n.java
@ -26,6 +26,8 @@

 package writer2latex.xhtml;

+import java.util.Locale;
+
 // This class handles localized strings (used for navigation)
 public class L10n {
    public final static int UP = 0;
@ -39,21 +41,36 @@ public class L10n {
    public final static int DIRECTORY = 8;
    public final static int DOCUMENT = 9;

+    private Locale locale = null;
    private String sLocale="en-US";
 	
-    public void setLocale(String sLocale) {
-        if (sLocale!=null) { this.sLocale = sLocale;}
-    }
-	
    public void setLocale(String sLanguage, String sCountry) {
        if (sLanguage!=null) {
-            if (sCountry!=null) { sLocale = sLanguage + "-" + sCountry; }
-            else  { sLocale = sLanguage; }
+            if (sCountry!=null) {
+            	locale = new Locale(sLanguage,sCountry);
+            }
+            else  {
+            	locale = new Locale(sLanguage);
+            }
        }
+        else {
+        	locale = Locale.getDefault();
+        }
+
+        if (locale.getCountry().length()>0) {
+        	sLocale = locale.getLanguage()+"-"+locale.getCountry();
+        }
+        else {
+        	sLocale = locale.getLanguage();
+        }   
+    }
+    
+    public Locale getLocale() {
+    	return locale;
    }
 	
    public String get(int nString) {
-        if (sLocale.startsWith("de")) { // german
+        if (sLocale.startsWith("de")) { // German
            switch (nString) {
                case UP: return "Nach oben";
                case FIRST : return "Anfang";
@ -67,7 +84,7 @@ public class L10n {
                case DOCUMENT: return "Dokument";
            }
        }
-        if (sLocale.startsWith("fr")) { // french
+        if (sLocale.startsWith("fr")) { // French
            switch (nString) {
            	case UP: return "Haut";
            	case FIRST : return "D\u00e9but";
@ -81,7 +98,7 @@ public class L10n {
            	case DOCUMENT: return "Document";
            }
        }
-        if (sLocale.startsWith("es")) { // spanish
+        if (sLocale.startsWith("es")) { // Spanish
            switch (nString) {
                case UP: return "Arriba";
                case FIRST : return "Primero";
@ -95,7 +112,7 @@ public class L10n {
                case DOCUMENT: return "Documento";
            }
        }
-        if (sLocale.startsWith("it")) { // italian
+        if (sLocale.startsWith("it")) { // Italian
            switch (nString) {
            	case UP: return "Su";
            	case FIRST : return "Inizio";
@ -109,7 +126,7 @@ public class L10n {
            	case DOCUMENT: return "Documento";     
            }
        }
-        if (sLocale.startsWith("pt")) { // (brazilian) portuguese
+        if (sLocale.startsWith("pt")) { // (Brazilian) Portuguese
            switch (nString) {
            	case UP: return "Acima";
            	case FIRST : return "Primeiro";
@ -123,7 +140,7 @@ public class L10n {
            	case DOCUMENT: return "Documento";     
            }
        }
-        if (sLocale.startsWith("cs")) { // czech
+        if (sLocale.startsWith("cs")) { // Czech
            switch (nString) {
            	case UP: return "Nahoru";
            	case FIRST : return "Prvn\u00ed";
@ -137,7 +154,7 @@ public class L10n {
            	case DOCUMENT: return "Dokument";     
            }
        }
-        if (sLocale.startsWith("nl")) { // dutch
+        if (sLocale.startsWith("nl")) { // Dutch
            switch (nString) {
            case UP: return "Omhoog";
            case FIRST : return "Eerste";
@ -151,7 +168,7 @@ public class L10n {
            case DOCUMENT: return "Document";  
            }
        }
-        if (sLocale.startsWith("da")) { // danish
+        if (sLocale.startsWith("da")) { // Danish
            switch (nString) {
                case UP: return "Op";
                case FIRST : return "F\u00F8rste";
@ -165,7 +182,7 @@ public class L10n {
                case DOCUMENT: return "Dokument";
            }
        }
-        if (sLocale.startsWith("nn")) { // nynorsk
+        if (sLocale.startsWith("nn")) { // Nynorsk
            switch (nString) {
                case UP: return "Opp";
                case FIRST : return "F\u00f8rste";
@ -179,7 +196,7 @@ public class L10n {
                case DOCUMENT: return "Dokument";
            }
        }
-        if (sLocale.startsWith("pl")) { // polish
+        if (sLocale.startsWith("pl")) { // Polish
        	switch (nString) {
        		case UP: return "W g\u00f3r\u0119";
        		case FIRST : return "Pierwsza";
@ -193,7 +210,7 @@ public class L10n {
        		case DOCUMENT: return "Dokument";
        	}
        }
-        if (sLocale.startsWith("fi")) { // finnish
+        if (sLocale.startsWith("fi")) { // Finnish
        	switch (nString) {
        		case UP: return "Yl\u00f6s";
        		case FIRST : return "Ensimm\u00e4inen";
@ -207,7 +224,7 @@ public class L10n {
        		case DOCUMENT: return "Dokumentti";
        	}
        }
-        if (sLocale.startsWith("ru")) { // russian
+        if (sLocale.startsWith("ru")) { // Russian
            switch (nString) {
            	case UP: return "\u0412\u0432\u0435\u0440\u0445";
            	case FIRST : return "\u041f\u0435\u0440\u0432\u0430\u044f";
@ -221,7 +238,7 @@ public class L10n {
            	case DOCUMENT: return "\u0414\u043e\u043a\u0443\u043c\u0435\u043d\u0442";
            }
        }
-        if (sLocale.startsWith("uk")) { // ukrainian
+        if (sLocale.startsWith("uk")) { // Ukrainian
            switch (nString) {
            	case UP: return "\u041d\u0430\u0433\u043e\u0440\u0443";
            	case FIRST : return "\u041f\u0435\u0440\u0448\u0430";
@ -235,7 +252,7 @@ public class L10n {
            	case DOCUMENT: return "\u0414\u043e\u043a\u0443\u043c\u0435\u043d\u0442";
            }
        }
-        if (sLocale.startsWith("tr")) { // turkish
+        if (sLocale.startsWith("tr")) { // Turkish
            switch (nString) {
            	case UP: return "Yukar\u0131";
            	case FIRST : return "\u0130lk";
@ -249,7 +266,7 @@ public class L10n {
            	case DOCUMENT: return "D\u00f6k\u00fcman";
            }        	
        }
-        if (sLocale.startsWith("hr")) { // croatian
+        if (sLocale.startsWith("hr")) { // Croatian
            switch (nString) {
                case UP: return "Up";
                case FIRST : return "Prvi";
@ -262,7 +279,7 @@ public class L10n {
                case DOCUMENT: return "Document";
            }
        }
-        // english - default
+        // English - default
        switch (nString) {
            case UP: return "Up";
            case FIRST : return "First";
--- a/source/java/writer2latex/xhtml/TextConverter.java
+++ b/source/java/writer2latex/xhtml/TextConverter.java
@ -20,7 +20,7 @@
 *
 *  All Rights Reserved.
 * 
- *  Version 1.2 (2010-07-02)
+ *  Version 1.2 (2010-10-30)
 *
 */

@ -79,6 +79,10 @@ public class TextConverter extends ConverterHelper {

    // Data used to handle splitting over several files
    // TODO: Accessor methods for sections
+	// Some (Sony?) EPUB readers have a limit on the file size of individual files
+	// In any case very large files could be a performance problem, hence we do automatic splitting
+	// after this number of characters. TODO: Make configurable.
+	private static final int EPUB_CHARACTER_COUNT_TRESHOLD = 150000;
    int nSplit = 0;  // The outline level at which to split files (0=no split)
    int nRepeatLevels = 5; // The number of levels to repeat when splitting (0=no repeat)
    private int nLastSplitLevel = 1; // The outline level at which the last split occured
@ -86,6 +90,7 @@ public class TextConverter extends ConverterHelper {
    boolean bAfterHeading=false; // last element was a top level heading
    protected Stack<Node> sections = new Stack<Node>(); // stack of nested sections
    Element[] currentHeading = new Element[7]; // Last headings (repeated when splitting)
+    private int nCharacterCount = 0; // The number of text characters in the current document

    // Counters for generated numbers
    private ListCounter outlineNumbering;
@ -352,6 +357,8 @@ public class TextConverter extends ConverterHelper {
                    getDrawCv().handleDrawElement((Element)child,(Element)hnode,null,nFloatMode);
                }
                else if (nodeName.equals(XMLString.TEXT_P)) {
+                	hnode = maybeSplit(hnode);
+                	nCharacterCount+=OfficeReader.getCharacterCount(child);
                    // is there a block element, we should use?
                    XhtmlStyleMap xpar = config.getXParStyleMap();
                    String sDisplayName = ofr.getParStyles().getDisplayName(Misc.getAttribute(child,XMLString.TEXT_STYLE_NAME));
@ -395,14 +402,16 @@ public class TextConverter extends ConverterHelper {
                else if(nodeName.equals(XMLString.TEXT_H)) {
                    int nOutlineLevel = getOutlineLevel((Element)child);
                    Node rememberNode = hnode;
-                    hnode = maybeSplit(hnode,nOutlineLevel,bAfterHeading);
+                    hnode = maybeSplit(hnode,nOutlineLevel);
+                	nCharacterCount+=OfficeReader.getCharacterCount(child);
                    handleHeading((Element)child,hnode,rememberNode!=hnode);
                }
                else if (nodeName.equals(XMLString.TEXT_LIST) || // oasis
                         nodeName.equals(XMLString.TEXT_UNORDERED_LIST) || // old
                         nodeName.equals(XMLString.TEXT_ORDERED_LIST)) // old
                    {
-                    if (listIsOnlyHeadings(child)) {
+                	hnode = maybeSplit(hnode);
+                	if (listIsOnlyHeadings(child)) {
                        nDontSplitLevel--;
                        hnode = handleFakeList(child,nLevel+1,styleName,hnode);
                        nDontSplitLevel++;
@ -412,19 +421,21 @@ public class TextConverter extends ConverterHelper {
                    }
                }
                else if (nodeName.equals(XMLString.TABLE_TABLE)) {
+                	hnode = maybeSplit(hnode);
                    getTableCv().handleTable(child,hnode);
                }
                else if (nodeName.equals(XMLString.TABLE_SUB_TABLE)) {
                    getTableCv().handleTable(child,hnode);
                }
                else if (nodeName.equals(XMLString.TEXT_SECTION)) {
+                	hnode = maybeSplit(hnode);
                    nDontSplitLevel--;
                    hnode = handleSection(child,hnode);
                    nDontSplitLevel++;
                }
                else if (nodeName.equals(XMLString.TEXT_TABLE_OF_CONTENT)) {
                    if (!ofr.getTocReader((Element)child).isByChapter()) {
-                        hnode = maybeSplit(hnode,1,bAfterHeading);
+                        hnode = maybeSplit(hnode,1);
                    }
                    handleTOC(child,hnode);
                }
@ -441,11 +452,11 @@ public class TextConverter extends ConverterHelper {
                    handleUserIndex(child,hnode);
                }
                else if (nodeName.equals(XMLString.TEXT_ALPHABETICAL_INDEX)) {
-                    hnode = maybeSplit(hnode,1,bAfterHeading);
+                    hnode = maybeSplit(hnode,1);
                    handleAlphabeticalIndex(child,hnode);
                }
                else if (nodeName.equals(XMLString.TEXT_BIBLIOGRAPHY)) {
-                    hnode = maybeSplit(hnode,1,bAfterHeading);
+                    hnode = maybeSplit(hnode,1);
                    handleBibliography(child,hnode);
                }
                else if (nodeName.equals(XMLString.OFFICE_ANNOTATION)) {
@ -466,7 +477,21 @@ public class TextConverter extends ConverterHelper {
        return hnode;
    }
    
-    private Node maybeSplit(Node node, int nLevel, boolean bAfterHeading) {
+    private Node maybeSplit(Node node) {
+    	if (converter.isOPS() && nCharacterCount>EPUB_CHARACTER_COUNT_TRESHOLD) {
+    		return doMaybeSplit(node, 0);
+    	}
+    	return node;
+    }
+    
+    private Node maybeSplit(Node node, int nLevel) {
+    	if (converter.isOPS() && nCharacterCount>EPUB_CHARACTER_COUNT_TRESHOLD) {
+    		return doMaybeSplit(node, 0);
+    	}
+    	return doMaybeSplit(node, nLevel);
+    }
+
+    private Node doMaybeSplit(Node node, int nLevel) {
        if (nDontSplitLevel>1) { // we cannot split due to a nested structure
            return node;
        }
@ -478,6 +503,7 @@ public class TextConverter extends ConverterHelper {
        }
        if (nSplit>=nLevel && converter.outFileHasContent()) {
            // No objections, this is a level that causes splitting
+        	nCharacterCount = 0;
            return converter.nextOutFile();
        }
        return node;
@ -636,7 +662,7 @@ public class TextConverter extends ConverterHelper {
        boolean bIsEmpty = OfficeReader.isWhitespaceContent(onode);
        if (config.ignoreEmptyParagraphs() && bIsEmpty) { return; }
        String sStyleName = Misc.getAttribute(onode,XMLString.TEXT_STYLE_NAME);
-
+        
        Element par;
        if (ofr.isSpreadsheet()) { // attach inline text directly to parent (always a table cell)
            par = (Element) hnode;
@ -1044,7 +1070,7 @@ public class TextConverter extends ConverterHelper {
                    nDontSplitLevel++;
                    int nOutlineLevel = getOutlineLevel((Element)onode);
                    Node rememberNode = hnode;
-                    hnode = maybeSplit(hnode,nOutlineLevel,bAfterHeading);
+                    hnode = maybeSplit(hnode,nOutlineLevel);
                    handleHeading((Element)child, hnode, rememberNode!=hnode,
                        ofr.getListStyle(sStyleName), nLevel,
                        bUnNumbered, bRestart, nStartValue);
--- a/source/java/writer2latex/xhtml/XhtmlDocument.java
+++ b/source/java/writer2latex/xhtml/XhtmlDocument.java
@ -20,7 +20,7 @@
 *
 *  All Rights Reserved.
 * 
- *  Version 1.2 (2010-06-19)
+ *  Version 1.2 (2010-10-27)
 *
 */
 
@ -598,11 +598,16 @@ public class XhtmlDocument extends DOMDocument {
                else if (node.hasChildNodes()) {
                    int nNextLevel = (nLevel<0 || blockThis((Element)node)) ? -1 : nLevel+1;
                    // Print start tag
-                    if (nLevel>=0) { writeSpaces(nLevel,osw); }
-                    osw.write("<"+node.getNodeName());
-                    writeAttributes(node,osw);
-                    osw.write(">");
-                    if (nNextLevel>=0) { osw.write("\n"); }
+                    boolean bRedundantElement = !node.hasAttributes() &&
+                    	(node.getNodeName().equals("a") || node.getNodeName().equals("span")); 
+                    if (!bRedundantElement) {
+                    	// Writer2xhtml may produce <a> and <span> without attributes, these are removed here
+                    	if (nLevel>=0) { writeSpaces(nLevel,osw); }
+                    	osw.write("<"+node.getNodeName());
+                    	writeAttributes(node,osw);
+                    	osw.write(">");
+                    	if (nNextLevel>=0) { osw.write("\n"); }
+                    }
                    // Print children
                    Node child = node.getFirstChild();
                    while (child!=null) {
@ -610,9 +615,11 @@ public class XhtmlDocument extends DOMDocument {
                        child = child.getNextSibling();
                    }
                    // Print end tag
-                    if (nNextLevel>=0) { writeSpaces(nLevel,osw); }
-                    osw.write("</"+node.getNodeName()+">");
-                    if (nLevel>=0) { osw.write("\n"); }
+                    if (!bRedundantElement) {
+                    	if (nNextLevel>=0) { writeSpaces(nLevel,osw); }
+                    	osw.write("</"+node.getNodeName()+">");
+                    	if (nLevel>=0) { osw.write("\n"); }
+                    }
                }
                else { // empty element
                    if (nLevel>=0) { writeSpaces(nLevel,osw); }