From 0a598de7cef75b58a739fb1a7ad164cf43c268b2 Mon Sep 17 00:00:00 2001 From: Georgy Litvinov Date: Wed, 22 Apr 2020 15:34:20 +0200 Subject: [PATCH] Anntotaion metadata --- src/main/java/w2phtml/project.properties | 6 +- src/main/java/w2phtml/xhtml/Converter.java | 100 +++++++----------- src/main/java/w2phtml/xhtml/XhtmlConfig.java | 8 +- .../java/w2phtml/xhtml/content/Separator.java | 37 ++++++- .../w2phtml/xhtml/content/TextParser.java | 99 ++++++++++++++++- 5 files changed, 180 insertions(+), 70 deletions(-) diff --git a/src/main/java/w2phtml/project.properties b/src/main/java/w2phtml/project.properties index 9dc4146..0ad234b 100644 --- a/src/main/java/w2phtml/project.properties +++ b/src/main/java/w2phtml/project.properties @@ -1,3 +1,3 @@ -#Sun Apr 12 12:38:00 CEST 2020 -releaseVersion=0.5.5 -releaseDate=12\:38\:00 12-04-2020 +#Wed Apr 22 15:32:34 CEST 2020 +releaseVersion=0.5.7 +releaseDate=15\:32\:34 22-04-2020 diff --git a/src/main/java/w2phtml/xhtml/Converter.java b/src/main/java/w2phtml/xhtml/Converter.java index d905200..98ed9ff 100644 --- a/src/main/java/w2phtml/xhtml/Converter.java +++ b/src/main/java/w2phtml/xhtml/Converter.java @@ -41,9 +41,12 @@ import java.io.InputStream; import java.io.IOException; import org.hamcrest.core.IsEqual; +import org.json.JSONException; +import org.json.JSONObject; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.w3c.dom.Text; import w2phtml.api.Config; @@ -103,7 +106,7 @@ public class Converter extends BasicConverter { // The xhtml output file(s) public int nType = XhtmlDocument.XHTML10; // the doctype - private boolean isOpenPublicationStructure = false; // Do we need to be OPS conforming? + private boolean isEPUB = false; // Do we need to be OPS conforming? public Vector outFiles; private int outFileIndex; private XhtmlDocument htmlDoc; // current outfile @@ -249,9 +252,9 @@ public class Converter extends BasicConverter { public L10n getL10n() { return l10n; } - public void setOpenPubStructure(boolean b) { isOpenPublicationStructure = true; } + public void setOpenPubStructure(boolean b) { isEPUB = true; } - public boolean isOPS() { return isOpenPublicationStructure; } + public boolean isOPS() { return isEPUB; } @Override public void convertInner() throws IOException { sTargetFileName = Misc.trimDocumentName(sTargetFileName,XhtmlDocument.getExtension(nType)); @@ -345,7 +348,7 @@ public class Converter extends BasicConverter { } // Add included style sheet, if any - and we are creating OPS content - if (isOpenPublicationStructure && styleSheet!=null) { + if (isEPUB && styleSheet!=null) { converterResult.addDocument(styleSheet); for (ResourceDocument doc : resources) { converterResult.addDocument(doc); @@ -540,40 +543,10 @@ public class Converter extends BasicConverter { } - public void handleOfficeAnnotation(Node onode, Node hnode) { - if (config.xhtmlNotes()) { - // Extract the text from the paragraphs, separate paragraphs with newline - StringBuilder buf = new StringBuilder(); - Element creator = null; - Element date = null; - Node child = onode.getFirstChild(); - while (child!=null) { - if (Misc.isElement(child, XMLString.TEXT_P)) { - if (buf.length()>0) { buf.append('\n'); } - buf.append(getPlainInlineText(child)); - } - else if (Misc.isElement(child, XMLString.DC_CREATOR)) { - creator = (Element) child; - } - else if (Misc.isElement(child, XMLString.DC_DATE)) { - date = (Element) child; - } - child = child.getNextSibling(); - } - if (creator!=null) { - if (buf.length()>0) { buf.append('\n'); } - buf.append(getPlainInlineText(creator)); - } - if (date!=null) { - if (buf.length()>0) { buf.append('\n'); } - buf.append(Misc.formatDate(OfficeReader.getTextContent(date), l10n.getLocale().getLanguage(), l10n.getLocale().getCountry())); - } - Node commentNode = htmlDOM.createComment(buf.toString()); - hnode.appendChild(commentNode); - } - } + - ///////////////////////////////////////////////////////////////////////// + + ///////////////////////////////////////////////////////////////////////// // UTILITY METHODS // Create output file name (docname.html, docname1.html, docname2.html etc.) @@ -597,15 +570,16 @@ public class Converter extends BasicConverter { public Element getPanelNode() { return htmlDoc.getPanelNode(); } - public String getTitle(){ - String title = metadata.getTitle(); - if (title==null) { - // use filename as fallback - title = htmlDoc.getFileName(); - } - return title; - - } + public String getTitle(){ + String title = metadata.getTitle(); + if (title==null) { + // use filename as fallback + title = htmlDoc.getFileName(); + } + return title; + + } + // Prepare next output file public Element nextOutFile() { htmlDoc = new XhtmlDocument(getOutFileName(++outFileIndex,false),nType); @@ -667,7 +641,7 @@ public class Converter extends BasicConverter { addMetadata(head); // Add link to custom stylesheet, if producing normal XHTML - if (!isOpenPublicationStructure && config.xhtmlCustomStylesheet().length()>0) { + if (!isEPUB && config.xhtmlCustomStylesheet().length()>0) { Element htmlStyle = htmlDOM.createElement("link"); htmlStyle.setAttribute("rel","stylesheet"); htmlStyle.setAttribute("type","text/css"); @@ -677,7 +651,7 @@ public class Converter extends BasicConverter { } // Add link to generated stylesheet if producing normal XHTML and the user wants separate css - if (!isOpenPublicationStructure && config.separateStylesheet()) { + if (!isEPUB && config.separateStylesheet()) { Element htmlStyle = htmlDOM.createElement("link"); htmlStyle.setAttribute("rel","stylesheet"); htmlStyle.setAttribute("type","text/css"); @@ -687,7 +661,7 @@ public class Converter extends BasicConverter { } // Add link to included style sheet if producing OPS content - if (isOpenPublicationStructure && styleSheet!=null) { + if (isEPUB && styleSheet!=null) { Element sty = htmlDOM.createElement("link"); sty.setAttribute("rel", "stylesheet"); sty.setAttribute("type", "text/css"); @@ -708,17 +682,19 @@ public class Converter extends BasicConverter { } private void addMetadata(Element head) { - if (!isOpenPublicationStructure) { - if (config.xhtmlUseDublinCore()) { - addDublinCoreMetadata(head); - } else { - addStandardMetadata(head); - addCustomMetadata(head); - + if (!isEPUB) { + if (!config.useAnnotationMetadata()) { + if (config.xhtmlUseDublinCore()) { + addDublinCoreMetadata(head); + } else { + addStandardMetadata(head); + addCustomMetadata(head); + } } } } + private void addCustomMetadata(Element head) { Map customMetadata = metadata.getCustomMetaData(); for (String name : customMetadata.keySet()) { @@ -732,6 +708,7 @@ public class Converter extends BasicConverter { createMetadataEntry(head,"subject",metadata.getSubject()); createMetadataEntry(head,"keywords",metadata.getKeywords()); createMetadataEntry(head,"creator",metadata.getCreator()); + createMetadataEntry(head,"title",metadata.getTitle()); } private void addDublinCoreMetadata(Element head) { @@ -761,14 +738,14 @@ public class Converter extends BasicConverter { // Add epub namespace for the purpose of semantic inflection in EPUB 3 public void addEpubNs(Element elm) { - if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5) { + if (isEPUB && nType==XhtmlDocument.HTML5) { elm.setAttribute("xmlns:epub", "http://www.idpf.org/2007/ops"); } } // Add a type from the structural semantics vocabulary of EPUB 3 public void addEpubType(Element elm, String sType) { - if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5 && sType!=null) { + if (isEPUB && nType==XhtmlDocument.HTML5 && sType!=null) { elm.setAttribute("epub:type", sType); } } @@ -854,7 +831,12 @@ public class Converter extends BasicConverter { return anchor; } - + public void addHeadMeta(String name, String value) { + if (!isEPUB) { + createMetadataEntry(htmlDoc.getHeadNode(), name, value); + } + } + private void createMetadataEntry(Element head, String name, String value) { if (value==null || value.isEmpty()) { return; } if (name==null || name.isEmpty()) { return; } diff --git a/src/main/java/w2phtml/xhtml/XhtmlConfig.java b/src/main/java/w2phtml/xhtml/XhtmlConfig.java index 9661f94..410666d 100644 --- a/src/main/java/w2phtml/xhtml/XhtmlConfig.java +++ b/src/main/java/w2phtml/xhtml/XhtmlConfig.java @@ -40,7 +40,7 @@ import w2phtml.util.Misc; public class XhtmlConfig extends w2phtml.base.ConfigBase { // Implement configuration methods - protected int getOptionCount() { return 63; } + protected int getOptionCount() { return 64; } protected String getDefaultConfigPath() { return "/writer2latex/xhtml/config/"; } // Override setOption: To be backwards compatible, we must accept options @@ -162,6 +162,7 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase { private static final int PAGINATION = 60; private static final int MIN_LETTER_SPACING = 61; private static final int PAGE_BREAK_STYLE = 62; + private static final int ANNOTATION_METADATA = 63; protected ComplexOption xheading = addComplexOption("heading-map"); protected ComplexOption xpar = addComplexOption("paragraph-map"); @@ -293,6 +294,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase { options[CSS_INLINE] = new BooleanOption("css_inline","true"); options[ALIGN_SPLITS_TO_PAGES] = new BooleanOption("align_splits_to_pages","false"); + options[ANNOTATION_METADATA] = new BooleanOption("annotation_metad","true"); + } @@ -473,5 +476,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase { return map; } + public boolean useAnnotationMetadata() { + return ((BooleanOption) options[ANNOTATION_METADATA]).getValue(); + } } diff --git a/src/main/java/w2phtml/xhtml/content/Separator.java b/src/main/java/w2phtml/xhtml/content/Separator.java index 1406b49..af7826a 100644 --- a/src/main/java/w2phtml/xhtml/content/Separator.java +++ b/src/main/java/w2phtml/xhtml/content/Separator.java @@ -18,6 +18,8 @@ import w2phtml.xhtml.XhtmlConfig; //LinkedList stringList = new LinkedList(); public class Separator { + private static final String COMMENT_END = ""; + public Node currentComment = null; private static final String NONE = "none"; private static final String SECTIONS = "sections"; private static final String DIV = "div"; @@ -38,6 +40,7 @@ public class Separator { private static Converter converter = null; private Node prevPageContainer = null; private PageContainer pageContainer = null; + private Node metadataComment = null; public Separator(XhtmlConfig config, Converter converter) { this.converter = converter; @@ -295,10 +298,20 @@ public class Separator { replaceWithSpaces(lineBreaks); replaceWithSpaces(textTabs); deleteNotesIn(content); + deleteCommentsIn(content); title = content.getTextContent().trim(); return title; } + private static void deleteCommentsIn(Node content) { + NodeList notes = ((Element) content).getElementsByTagName(XMLString.OFFICE_ANNOTATION); + int j = 0; + while (j < notes.getLength()) { + Node note = notes.item(j); + note.getParentNode().removeChild(note); + } + } + private static void deleteNotesIn(Node content) { NodeList notes = ((Element) content).getElementsByTagName(XMLString.TEXT_NOTE); int j = 0; @@ -320,10 +333,11 @@ public class Separator { } } - private static void openCommentHeading(Node hnode, String title) { + private void openCommentHeading(Node hnode, String title) { Document doc = hnode.getOwnerDocument(); Node openSection = doc.createComment(openHeadingCommentText(title)); // insert open section comment before header node + this.currentComment = openSection; hnode.appendChild(openSection); } @@ -398,12 +412,29 @@ public class Separator { } private static String openHeadingCommentText(String title) { - String comment = "
\n\n" + title + "\n"; + String comment = "
\n\n" + title + "\n" + COMMENT_END; return comment; } + + public void appendMetadata(String name, String value) { + if (currentComment == null) { + System.out.println("Error. No current Comment"); + return; + } + String content = currentComment.getTextContent(); + System.out.println(content); + if (content.endsWith(COMMENT_END)) { + String firstPart = content.substring(0, content.length()- COMMENT_END.length()); + String metadata = "" + value + "\n"; + currentComment.setTextContent(firstPart + metadata + COMMENT_END); + System.out.println("Comment updated."); + } else { + System.out.println("Error. Comment doesn't end with " + COMMENT_END); + } + } private static String openPageCommentText(Integer pageNum) { - String comment = "
\n\n" + pageNum + "\n" + pageNum + "\n"; + String comment = "
\n\n" + pageNum + "\n" + pageNum + "\n"+COMMENT_END; return comment; } diff --git a/src/main/java/w2phtml/xhtml/content/TextParser.java b/src/main/java/w2phtml/xhtml/content/TextParser.java index 2d2075b..56c23f1 100644 --- a/src/main/java/w2phtml/xhtml/content/TextParser.java +++ b/src/main/java/w2phtml/xhtml/content/TextParser.java @@ -28,6 +28,7 @@ package w2phtml.xhtml.content; import static w2phtml.office.XMLString.*; import java.util.Hashtable; +import java.util.Iterator; import java.util.Stack; import org.w3c.dom.Node; @@ -42,6 +43,7 @@ import w2phtml.office.OfficeReader; import w2phtml.office.OfficeStyle; import w2phtml.office.PageLayout; import w2phtml.office.StyleWithProperties; +import w2phtml.office.XMLString; import w2phtml.util.Misc; import w2phtml.xhtml.Converter; import w2phtml.xhtml.ODFPageSplitter; @@ -51,6 +53,10 @@ import w2phtml.xhtml.XhtmlConfig; import w2phtml.xhtml.XhtmlStyleMap; import w2phtml.xhtml.XhtmlStyleMapItem; +import org.hamcrest.core.IsEqual; +import org.json.JSONException; +import org.json.JSONObject; +import org.w3c.dom.Document; import org.w3c.dom.Element; @@ -117,6 +123,7 @@ public class TextParser extends Parser { private String endnotesContext = null; private String footnotesContext = null; PageContainer pageContainer = null; + private boolean applyAnnotationMetadata; public TextParser(OfficeReader ofr, XhtmlConfig config, Converter converter) { super(ofr,config,converter); @@ -133,6 +140,7 @@ public class TextParser extends Parser { nFloatMode = ofr.isText() && config.xhtmlFloatObjects() ? DrawParser.FLOATING : DrawParser.ABSOLUTE; displayHiddenText = config.displayHiddenText(); + applyAnnotationMetadata = config.useAnnotationMetadata(); pageContainer = converter.pageContainer; docSep = new Separator(config, converter); } @@ -402,7 +410,7 @@ public class TextParser extends Parser { breakBeforeNextNode = true; } else if (nodeName.equals(OFFICE_ANNOTATION)) { - converter.handleOfficeAnnotation(child,hnode); + handleOfficeAnnotation(child,hnode); } else if (nodeName.equals(TEXT_SEQUENCE_DECLS)) { //handleSeqeuenceDecls(child); @@ -823,7 +831,7 @@ public class TextParser extends Parser { } else if (sName.equals(TEXT_BIBLIOGRAPHY_MARK)) { handleBibliographyMark(child, hnode); } else if (sName.equals(OFFICE_ANNOTATION)) { - converter.handleOfficeAnnotation(child, hnode); + handleOfficeAnnotation(child, hnode); } else if (sName.startsWith("text:")) { traverseInlineText(child, hnode); } @@ -1433,7 +1441,7 @@ public class TextParser extends Parser { if (Misc.isElement(node) && Misc.getAttribute(node, TEXT_OUTLINE_LEVEL) != null && !Misc.getAttribute(node, TEXT_OUTLINE_LEVEL).isEmpty()) { - String title = docSep.getTitle(node).trim(); + String title = Separator.getTitle(node).trim(); if (title == null || title.isEmpty()) { return false; } @@ -1441,6 +1449,89 @@ public class TextParser extends Parser { } return false; } - + public void handleOfficeAnnotation(Node onode, Node hnode) { + if (applyAnnotationMetadata) { + parseAnnotationMetadata(onode); + } + if (config.xhtmlNotes()) { + // Extract the text from the paragraphs, separate paragraphs with newline + StringBuilder buf = new StringBuilder(); + Element creator = null; + Element date = null; + Node child = onode.getFirstChild(); + while (child != null) { + if (Misc.isElement(child, XMLString.TEXT_P)) { + if (buf.length() > 0) { + buf.append('\n'); + } + buf.append(converter.getPlainInlineText(child)); + } else if (Misc.isElement(child, XMLString.DC_CREATOR)) { + creator = (Element) child; + } else if (Misc.isElement(child, XMLString.DC_DATE)) { + date = (Element) child; + } + child = child.getNextSibling(); + } + if (creator != null) { + if (buf.length() > 0) { + buf.append('\n'); + } + buf.append(converter.getPlainInlineText(creator)); + } + if (date != null) { + if (buf.length() > 0) { + buf.append('\n'); + } + buf.append(date); + } + Document doc = hnode.getOwnerDocument(); + Node commentNode = doc.createComment(buf.toString()); + hnode.appendChild(commentNode); + } + } + private void parseAnnotationMetadata(Node onode) { + NodeList annotationNodes = onode.getChildNodes(); + String jsonMetadata = null; + String author = null; + for (int i = 0 ; i < annotationNodes.getLength(); i++) { + Node node = annotationNodes.item(i); + if (Misc.isElement(node, XMLString.TEXT_P)) { + jsonMetadata = converter.getPlainInlineText(node); + } else + if (Misc.isElement(node, XMLString.DC_CREATOR)){ + author = converter.getPlainInlineText(node); + } + } + if (author != null && author.equals("Metadata Extension")) { + addJSONMetadata(jsonMetadata); + } + } + + public void addJSONMetadata(String data) { + Iterator names = null; + JSONObject json = null; + try { + json = new JSONObject(data); + names = json.keys(); + } catch (JSONException e) { + e.printStackTrace(); + } catch (Exception e) { + e.printStackTrace(); + } + if (names == null || json == null) { + return; + } + while (names.hasNext()) { + String metaName = names.next(); + String metaValue = (String) json.get(metaName); + writeMetadata(metaName, metaValue); + } + } + + private void writeMetadata(String name, String value) { + converter.addHeadMeta(name, value); + docSep.appendMetadata(name, value); + } + }