Anntotaion metadata

2020-04-22 15:34:20 +02:00 · 2020-04-22 15:34:20 +02:00 · 0a598de7ce
commit 0a598de7ce
parent ad8bfd03b2
5 changed files with 180 additions and 70 deletions
--- a/src/main/java/w2phtml/project.properties
+++ b/src/main/java/w2phtml/project.properties
@ -1,3 +1,3 @@
-#Sun Apr 12 12:38:00 CEST 2020
-releaseVersion=0.5.5
-releaseDate=12\:38\:00 12-04-2020
+#Wed Apr 22 15:32:34 CEST 2020
+releaseVersion=0.5.7
+releaseDate=15\:32\:34 22-04-2020
--- a/src/main/java/w2phtml/xhtml/Converter.java
+++ b/src/main/java/w2phtml/xhtml/Converter.java
@ -41,9 +41,12 @@ import java.io.InputStream;
 import java.io.IOException;

 import org.hamcrest.core.IsEqual;
+import org.json.JSONException;
+import org.json.JSONObject;
 import org.w3c.dom.Document;
 import org.w3c.dom.Element;
 import org.w3c.dom.Node;
+import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;

 import w2phtml.api.Config;
@ -103,7 +106,7 @@ public class Converter extends BasicConverter {
    
    // The xhtml output file(s)
    public int nType = XhtmlDocument.XHTML10; // the doctype
-    private boolean isOpenPublicationStructure = false; // Do we need to be OPS conforming?
+    private boolean isEPUB = false; // Do we need to be OPS conforming?
    public Vector<XhtmlDocument> outFiles;
    private int outFileIndex;
    private XhtmlDocument htmlDoc; // current outfile
@ -249,9 +252,9 @@ public class Converter extends BasicConverter {
 	
    public L10n getL10n() { return l10n; }
    
-    public void setOpenPubStructure(boolean b) { isOpenPublicationStructure = true; }
+    public void setOpenPubStructure(boolean b) { isEPUB = true; }
    
-    public boolean isOPS() { return isOpenPublicationStructure; }
+    public boolean isOPS() { return isEPUB; }
    
    @Override public void convertInner() throws IOException {      
        sTargetFileName = Misc.trimDocumentName(sTargetFileName,XhtmlDocument.getExtension(nType));
@ -345,7 +348,7 @@ public class Converter extends BasicConverter {
        }

        // Add included style sheet, if any - and we are creating OPS content
-        if (isOpenPublicationStructure && styleSheet!=null) {
+        if (isEPUB && styleSheet!=null) {
        	converterResult.addDocument(styleSheet);
        	for (ResourceDocument doc : resources) {
        		converterResult.addDocument(doc);
@ -540,40 +543,10 @@ public class Converter extends BasicConverter {
    }


-    public void handleOfficeAnnotation(Node onode, Node hnode) {
-        if (config.xhtmlNotes()) {
-            // Extract the text from the paragraphs, separate paragraphs with newline
-        	StringBuilder buf = new StringBuilder();
-        	Element creator = null;
-        	Element date = null;
-        	Node child = onode.getFirstChild();
-        	while (child!=null) {
-        		if (Misc.isElement(child, XMLString.TEXT_P)) {
-        			if (buf.length()>0) { buf.append('\n'); }
-        			buf.append(getPlainInlineText(child));
-        		}
-        		else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
-        			creator = (Element) child;
-        		}
-        		else if (Misc.isElement(child, XMLString.DC_DATE)) {
-        			date = (Element) child;
-        		}
-        		child = child.getNextSibling();
-        	}
-        	if (creator!=null) {
-    			if (buf.length()>0) { buf.append('\n'); }
-    			buf.append(getPlainInlineText(creator));        		
-        	}
-        	if (date!=null) {
-    			if (buf.length()>0) { buf.append('\n'); }
-    			buf.append(Misc.formatDate(OfficeReader.getTextContent(date), l10n.getLocale().getLanguage(), l10n.getLocale().getCountry()));
-        	}
-            Node commentNode = htmlDOM.createComment(buf.toString());
-            hnode.appendChild(commentNode);
-        }
-    }
+		
 	
-    /////////////////////////////////////////////////////////////////////////
+
+		/////////////////////////////////////////////////////////////////////////
    // UTILITY METHODS
 	
    // Create output file name (docname.html, docname1.html, docname2.html etc.)
@ -597,15 +570,16 @@ public class Converter extends BasicConverter {
    public Element getPanelNode() {
        return htmlDoc.getPanelNode();
    }
-	public String getTitle(){
-		String title = metadata.getTitle();
-    	if (title==null) { 
-    		// use filename as fallback
-    		title = htmlDoc.getFileName();
-    	}
-		return title;
-		
-	}
+  	public String getTitle(){
+  		String title = metadata.getTitle();
+      	if (title==null) { 
+      		// use filename as fallback
+      		title = htmlDoc.getFileName();
+      	}
+  		return title;
+  		
+  	}
+
    // Prepare next output file
    public Element nextOutFile() {
        htmlDoc = new XhtmlDocument(getOutFileName(++outFileIndex,false),nType);
@ -667,7 +641,7 @@ public class Converter extends BasicConverter {
 				addMetadata(head);

 				// Add link to custom stylesheet, if producing normal XHTML
-				if (!isOpenPublicationStructure && config.xhtmlCustomStylesheet().length()>0) {
+				if (!isEPUB && config.xhtmlCustomStylesheet().length()>0) {
 					Element htmlStyle = htmlDOM.createElement("link");
 					htmlStyle.setAttribute("rel","stylesheet");
 					htmlStyle.setAttribute("type","text/css");
@ -677,7 +651,7 @@ public class Converter extends BasicConverter {
 				}
 				
 				// Add link to generated stylesheet if producing normal XHTML and the user wants separate css
-				if (!isOpenPublicationStructure && config.separateStylesheet()) {
+				if (!isEPUB && config.separateStylesheet()) {
 					Element htmlStyle = htmlDOM.createElement("link");
 					htmlStyle.setAttribute("rel","stylesheet");
 					htmlStyle.setAttribute("type","text/css");
@ -687,7 +661,7 @@ public class Converter extends BasicConverter {
 				}

 				// Add link to included style sheet if producing OPS content
-				if (isOpenPublicationStructure && styleSheet!=null) {
+				if (isEPUB && styleSheet!=null) {
 					Element sty = htmlDOM.createElement("link");
 					sty.setAttribute("rel", "stylesheet");
 					sty.setAttribute("type", "text/css");
@ -708,17 +682,19 @@ public class Converter extends BasicConverter {
 		}

 		private void addMetadata(Element head) {
-			if (!isOpenPublicationStructure) {
-				if (config.xhtmlUseDublinCore()) {
-					addDublinCoreMetadata(head);
-				} else {
-					addStandardMetadata(head);
-					addCustomMetadata(head);
-					
+			if (!isEPUB) {
+				if (!config.useAnnotationMetadata()) {
+					if (config.xhtmlUseDublinCore()) {
+						addDublinCoreMetadata(head);
+					} else {
+						addStandardMetadata(head);
+						addCustomMetadata(head);
+					}
 				}
 			}
 		}

+
 		private void addCustomMetadata(Element head) {
 			Map<String, String> customMetadata = metadata.getCustomMetaData();
 			for (String name : customMetadata.keySet()) {
@ -732,6 +708,7 @@ public class Converter extends BasicConverter {
 			createMetadataEntry(head,"subject",metadata.getSubject());
 			createMetadataEntry(head,"keywords",metadata.getKeywords());
 			createMetadataEntry(head,"creator",metadata.getCreator());
+			createMetadataEntry(head,"title",metadata.getTitle());
 		}

 		private void addDublinCoreMetadata(Element head) {
@ -761,14 +738,14 @@ public class Converter extends BasicConverter {
    
    // Add epub namespace for the purpose of semantic inflection in EPUB 3
    public void addEpubNs(Element elm) {
-    	if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5) {
+    	if (isEPUB && nType==XhtmlDocument.HTML5) {
           	elm.setAttribute("xmlns:epub", "http://www.idpf.org/2007/ops");
    	}    	
    }
    
 	// Add a type from the structural semantics vocabulary of EPUB 3
    public void addEpubType(Element elm, String sType) {
-    	if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5 && sType!=null) {
+    	if (isEPUB && nType==XhtmlDocument.HTML5 && sType!=null) {
    		elm.setAttribute("epub:type", sType);
    	}
    }
@ -854,7 +831,12 @@ public class Converter extends BasicConverter {
        return anchor;
    }

-	
+    public void addHeadMeta(String name, String value) {
+    	if (!isEPUB) {
+    		createMetadataEntry(htmlDoc.getHeadNode(), name, value);	
+    	}
+    }
+    
    private void createMetadataEntry(Element head, String name, String value) {
        if (value==null || value.isEmpty()) { return; }
        if (name==null || name.isEmpty()) { return; }
--- a/src/main/java/w2phtml/xhtml/XhtmlConfig.java
+++ b/src/main/java/w2phtml/xhtml/XhtmlConfig.java
@ -40,7 +40,7 @@ import w2phtml.util.Misc;

 public class XhtmlConfig extends w2phtml.base.ConfigBase {
    // Implement configuration methods
-    protected int getOptionCount() { return 63; }
+    protected int getOptionCount() { return 64; }
    protected String getDefaultConfigPath() { return "/writer2latex/xhtml/config/"; }
 	
    // Override setOption: To be backwards compatible, we must accept options
@ -162,6 +162,7 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
    private static final int PAGINATION = 60;
    private static final int MIN_LETTER_SPACING = 61;
    private static final int PAGE_BREAK_STYLE = 62;
+    private static final int ANNOTATION_METADATA = 63;
    
    protected ComplexOption xheading = addComplexOption("heading-map");
    protected ComplexOption xpar = addComplexOption("paragraph-map");
@ -293,6 +294,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {

        options[CSS_INLINE] = new BooleanOption("css_inline","true");
        options[ALIGN_SPLITS_TO_PAGES] = new BooleanOption("align_splits_to_pages","false");
+        options[ANNOTATION_METADATA] = new BooleanOption("annotation_metad","true");
+


    }
@ -473,5 +476,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
    	return map;

    }
+		public boolean useAnnotationMetadata() {
+			return ((BooleanOption) options[ANNOTATION_METADATA]).getValue();
+		}
 }

--- a/src/main/java/w2phtml/xhtml/content/Separator.java
+++ b/src/main/java/w2phtml/xhtml/content/Separator.java
@ -18,6 +18,8 @@ import w2phtml.xhtml.XhtmlConfig;
 //LinkedList<String>   stringList = new LinkedList<String>();
 public class Separator {

+	private static final String COMMENT_END = "</Description>";
+	public Node currentComment = null; 
 	private static final String NONE = "none";
 	private static final String SECTIONS = "sections";
 	private static final String DIV = "div";
@ -38,6 +40,7 @@ public class Separator {
 	private static Converter converter = null;
 	private Node prevPageContainer = null;
 	private PageContainer pageContainer = null;
+	private Node metadataComment = null;

 	public Separator(XhtmlConfig config, Converter converter) {
 		this.converter = converter;
@ -295,10 +298,20 @@ public class Separator {
 		replaceWithSpaces(lineBreaks);
 		replaceWithSpaces(textTabs);
 		deleteNotesIn(content);
+		deleteCommentsIn(content);
 		title = content.getTextContent().trim();
 		return title;
 	}

+	private static void deleteCommentsIn(Node content) {
+		NodeList notes = ((Element) content).getElementsByTagName(XMLString.OFFICE_ANNOTATION);
+		int j = 0;
+		while (j < notes.getLength()) {
+			Node note = notes.item(j);
+			note.getParentNode().removeChild(note);
+		}
+	}
+	
 	private static void deleteNotesIn(Node content) {
 		NodeList notes = ((Element) content).getElementsByTagName(XMLString.TEXT_NOTE);
 		int j = 0;
@ -320,10 +333,11 @@ public class Separator {
 		}
 	}

-	private static void openCommentHeading(Node hnode, String title) {
+	private void openCommentHeading(Node hnode, String title) {
 		Document doc = hnode.getOwnerDocument();
 		Node openSection = doc.createComment(openHeadingCommentText(title));
 		// insert open section comment before header node
+		this.currentComment = openSection;
 		hnode.appendChild(openSection);
 	}

@ -398,12 +412,29 @@ public class Separator {
 	}

 	private static String openHeadingCommentText(String title) {
-		String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + title + "</Metadata>\n</Description>";
+		String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + title + "</Metadata>\n" + COMMENT_END;
 		return comment;
 	}
+	
+	public void appendMetadata(String name, String value) {
+		if (currentComment == null) {
+			System.out.println("Error. No current Comment");
+			return;
+		}
+		String content = currentComment.getTextContent();
+		System.out.println(content);
+		if (content.endsWith(COMMENT_END)) {
+			String firstPart = content.substring(0, content.length()- COMMENT_END.length());
+			String metadata = "<Metadata name=\"" + name + "\">" + value + "</Metadata>\n";
+			currentComment.setTextContent(firstPart + metadata + COMMENT_END);
+			System.out.println("Comment updated.");
+		} else {
+			System.out.println("Error. Comment doesn't end with " + COMMENT_END);
+		}
+	}

 	private static String openPageCommentText(Integer pageNum) {
-		String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + pageNum + "</Metadata>\n<Metadata name=\"Page\">" + pageNum + "</Metadata>\n</Description>";
+		String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + pageNum + "</Metadata>\n<Metadata name=\"Page\">" + pageNum + "</Metadata>\n"+COMMENT_END;
 		return comment;
 	}

--- a/src/main/java/w2phtml/xhtml/content/TextParser.java
+++ b/src/main/java/w2phtml/xhtml/content/TextParser.java
@ -28,6 +28,7 @@ package w2phtml.xhtml.content;
 import static w2phtml.office.XMLString.*;

 import java.util.Hashtable;
+import java.util.Iterator;
 import java.util.Stack;

 import org.w3c.dom.Node;
@ -42,6 +43,7 @@ import w2phtml.office.OfficeReader;
 import w2phtml.office.OfficeStyle;
 import w2phtml.office.PageLayout;
 import w2phtml.office.StyleWithProperties;
+import w2phtml.office.XMLString;
 import w2phtml.util.Misc;
 import w2phtml.xhtml.Converter;
 import w2phtml.xhtml.ODFPageSplitter;
@ -51,6 +53,10 @@ import w2phtml.xhtml.XhtmlConfig;
 import w2phtml.xhtml.XhtmlStyleMap;
 import w2phtml.xhtml.XhtmlStyleMapItem;

+import org.hamcrest.core.IsEqual;
+import org.json.JSONException;
+import org.json.JSONObject;
+import org.w3c.dom.Document;
 import org.w3c.dom.Element;


@ -117,6 +123,7 @@ public class TextParser extends Parser {
 	private String endnotesContext = null;
 	private String footnotesContext = null;
 	PageContainer pageContainer = null;
+	private boolean applyAnnotationMetadata;

    public TextParser(OfficeReader ofr, XhtmlConfig config, Converter converter) {
        super(ofr,config,converter);
@ -133,6 +140,7 @@ public class TextParser extends Parser {
        nFloatMode = ofr.isText() && config.xhtmlFloatObjects() ? 
        DrawParser.FLOATING : DrawParser.ABSOLUTE;
        displayHiddenText = config.displayHiddenText();
+        applyAnnotationMetadata = config.useAnnotationMetadata();
        pageContainer = converter.pageContainer;
        docSep = new Separator(config, converter);
    }
@ -402,7 +410,7 @@ public class TextParser extends Parser {
                		breakBeforeNextNode = true;
                }
                else if (nodeName.equals(OFFICE_ANNOTATION)) {
-                    converter.handleOfficeAnnotation(child,hnode);
+                    handleOfficeAnnotation(child,hnode);
                }
                else if (nodeName.equals(TEXT_SEQUENCE_DECLS)) {
                    //handleSeqeuenceDecls(child);
@ -823,7 +831,7 @@ public class TextParser extends Parser {
  					} else if (sName.equals(TEXT_BIBLIOGRAPHY_MARK)) {
  						handleBibliographyMark(child, hnode);
  					} else if (sName.equals(OFFICE_ANNOTATION)) {
-  						converter.handleOfficeAnnotation(child, hnode);
+  						handleOfficeAnnotation(child, hnode);
  					} else if (sName.startsWith("text:")) {
  						traverseInlineText(child, hnode);
  					}
@ -1433,7 +1441,7 @@ public class TextParser extends Parser {
 		if (Misc.isElement(node)
 				&& Misc.getAttribute(node, TEXT_OUTLINE_LEVEL) != null
 				&& !Misc.getAttribute(node, TEXT_OUTLINE_LEVEL).isEmpty()) {
-				String title = docSep.getTitle(node).trim();
+				String title = Separator.getTitle(node).trim();
 				if (title == null || title.isEmpty()) {
 					return false;
 				}
@ -1441,6 +1449,89 @@ public class TextParser extends Parser {
 		}
 		return false;
 	}
-	
+	public void handleOfficeAnnotation(Node onode, Node hnode) {
+		if (applyAnnotationMetadata) {
+			parseAnnotationMetadata(onode);
+		}
+		if (config.xhtmlNotes()) {
+			// Extract the text from the paragraphs, separate paragraphs with newline
+			StringBuilder buf = new StringBuilder();
+			Element creator = null;
+			Element date = null;
+			Node child = onode.getFirstChild();
+			while (child != null) {
+				if (Misc.isElement(child, XMLString.TEXT_P)) {
+					if (buf.length() > 0) {
+						buf.append('\n');
+					}
+					buf.append(converter.getPlainInlineText(child));
+				} else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
+					creator = (Element) child;
+				} else if (Misc.isElement(child, XMLString.DC_DATE)) {
+					date = (Element) child;
+				}
+				child = child.getNextSibling();
+			}
+			if (creator != null) {
+				if (buf.length() > 0) {
+					buf.append('\n');
+				}
+				buf.append(converter.getPlainInlineText(creator));
+			}
+			if (date != null) {
+				if (buf.length() > 0) {
+					buf.append('\n');
+				}
+				buf.append(date);
+			}
+			Document doc = hnode.getOwnerDocument();
+			Node commentNode = doc.createComment(buf.toString());
+			hnode.appendChild(commentNode);
+		}
+  }
+  private void parseAnnotationMetadata(Node onode) {
+  	NodeList annotationNodes = onode.getChildNodes();
+  	String jsonMetadata = null;
+  	String author = null;
+  	for (int i = 0 ; i < annotationNodes.getLength(); i++) {
+  		Node node = annotationNodes.item(i);
+  		if (Misc.isElement(node, XMLString.TEXT_P)) {
+  			jsonMetadata = converter.getPlainInlineText(node);
+  		} else 
+  		if (Misc.isElement(node, XMLString.DC_CREATOR)){
+  			author = converter.getPlainInlineText(node);
+  		}
+		}
+  	if (author != null && author.equals("Metadata Extension")) {
+  		addJSONMetadata(jsonMetadata);
+  	}
+	}
+  
+	public void addJSONMetadata(String data) {
+		Iterator<String> names = null;
+		JSONObject json = null;
+		try {
+  		json = new JSONObject(data);
+  		names = json.keys();
+  	} catch (JSONException e) {
+			e.printStackTrace();
+		} catch (Exception e) {
+			e.printStackTrace();
+		}
+		if (names == null || json == null) {
+			return;
+		}
+		while (names.hasNext()) {
+			String metaName = names.next();
+			String metaValue = (String) json.get(metaName);
+			writeMetadata(metaName, metaValue);
+		}
+	}
+
+	private void writeMetadata(String name, String value) {
+		converter.addHeadMeta(name, value);
+		docSep.appendMetadata(name, value);
+	}
+

 }