Anntotaion metadata

This commit is contained in:
Georgy Litvinov 2020-04-22 15:34:20 +02:00
parent ad8bfd03b2
commit 0a598de7ce
5 changed files with 180 additions and 70 deletions

View file

@ -1,3 +1,3 @@
#Sun Apr 12 12:38:00 CEST 2020
releaseVersion=0.5.5
releaseDate=12\:38\:00 12-04-2020
#Wed Apr 22 15:32:34 CEST 2020
releaseVersion=0.5.7
releaseDate=15\:32\:34 22-04-2020

View file

@ -41,9 +41,12 @@ import java.io.InputStream;
import java.io.IOException;
import org.hamcrest.core.IsEqual;
import org.json.JSONException;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import w2phtml.api.Config;
@ -103,7 +106,7 @@ public class Converter extends BasicConverter {
// The xhtml output file(s)
public int nType = XhtmlDocument.XHTML10; // the doctype
private boolean isOpenPublicationStructure = false; // Do we need to be OPS conforming?
private boolean isEPUB = false; // Do we need to be OPS conforming?
public Vector<XhtmlDocument> outFiles;
private int outFileIndex;
private XhtmlDocument htmlDoc; // current outfile
@ -249,9 +252,9 @@ public class Converter extends BasicConverter {
public L10n getL10n() { return l10n; }
public void setOpenPubStructure(boolean b) { isOpenPublicationStructure = true; }
public void setOpenPubStructure(boolean b) { isEPUB = true; }
public boolean isOPS() { return isOpenPublicationStructure; }
public boolean isOPS() { return isEPUB; }
@Override public void convertInner() throws IOException {
sTargetFileName = Misc.trimDocumentName(sTargetFileName,XhtmlDocument.getExtension(nType));
@ -345,7 +348,7 @@ public class Converter extends BasicConverter {
}
// Add included style sheet, if any - and we are creating OPS content
if (isOpenPublicationStructure && styleSheet!=null) {
if (isEPUB && styleSheet!=null) {
converterResult.addDocument(styleSheet);
for (ResourceDocument doc : resources) {
converterResult.addDocument(doc);
@ -540,40 +543,10 @@ public class Converter extends BasicConverter {
}
public void handleOfficeAnnotation(Node onode, Node hnode) {
if (config.xhtmlNotes()) {
// Extract the text from the paragraphs, separate paragraphs with newline
StringBuilder buf = new StringBuilder();
Element creator = null;
Element date = null;
Node child = onode.getFirstChild();
while (child!=null) {
if (Misc.isElement(child, XMLString.TEXT_P)) {
if (buf.length()>0) { buf.append('\n'); }
buf.append(getPlainInlineText(child));
}
else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
creator = (Element) child;
}
else if (Misc.isElement(child, XMLString.DC_DATE)) {
date = (Element) child;
}
child = child.getNextSibling();
}
if (creator!=null) {
if (buf.length()>0) { buf.append('\n'); }
buf.append(getPlainInlineText(creator));
}
if (date!=null) {
if (buf.length()>0) { buf.append('\n'); }
buf.append(Misc.formatDate(OfficeReader.getTextContent(date), l10n.getLocale().getLanguage(), l10n.getLocale().getCountry()));
}
Node commentNode = htmlDOM.createComment(buf.toString());
hnode.appendChild(commentNode);
}
}
/////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////
// UTILITY METHODS
// Create output file name (docname.html, docname1.html, docname2.html etc.)
@ -597,15 +570,16 @@ public class Converter extends BasicConverter {
public Element getPanelNode() {
return htmlDoc.getPanelNode();
}
public String getTitle(){
String title = metadata.getTitle();
if (title==null) {
// use filename as fallback
title = htmlDoc.getFileName();
}
return title;
}
public String getTitle(){
String title = metadata.getTitle();
if (title==null) {
// use filename as fallback
title = htmlDoc.getFileName();
}
return title;
}
// Prepare next output file
public Element nextOutFile() {
htmlDoc = new XhtmlDocument(getOutFileName(++outFileIndex,false),nType);
@ -667,7 +641,7 @@ public class Converter extends BasicConverter {
addMetadata(head);
// Add link to custom stylesheet, if producing normal XHTML
if (!isOpenPublicationStructure && config.xhtmlCustomStylesheet().length()>0) {
if (!isEPUB && config.xhtmlCustomStylesheet().length()>0) {
Element htmlStyle = htmlDOM.createElement("link");
htmlStyle.setAttribute("rel","stylesheet");
htmlStyle.setAttribute("type","text/css");
@ -677,7 +651,7 @@ public class Converter extends BasicConverter {
}
// Add link to generated stylesheet if producing normal XHTML and the user wants separate css
if (!isOpenPublicationStructure && config.separateStylesheet()) {
if (!isEPUB && config.separateStylesheet()) {
Element htmlStyle = htmlDOM.createElement("link");
htmlStyle.setAttribute("rel","stylesheet");
htmlStyle.setAttribute("type","text/css");
@ -687,7 +661,7 @@ public class Converter extends BasicConverter {
}
// Add link to included style sheet if producing OPS content
if (isOpenPublicationStructure && styleSheet!=null) {
if (isEPUB && styleSheet!=null) {
Element sty = htmlDOM.createElement("link");
sty.setAttribute("rel", "stylesheet");
sty.setAttribute("type", "text/css");
@ -708,17 +682,19 @@ public class Converter extends BasicConverter {
}
private void addMetadata(Element head) {
if (!isOpenPublicationStructure) {
if (config.xhtmlUseDublinCore()) {
addDublinCoreMetadata(head);
} else {
addStandardMetadata(head);
addCustomMetadata(head);
if (!isEPUB) {
if (!config.useAnnotationMetadata()) {
if (config.xhtmlUseDublinCore()) {
addDublinCoreMetadata(head);
} else {
addStandardMetadata(head);
addCustomMetadata(head);
}
}
}
}
private void addCustomMetadata(Element head) {
Map<String, String> customMetadata = metadata.getCustomMetaData();
for (String name : customMetadata.keySet()) {
@ -732,6 +708,7 @@ public class Converter extends BasicConverter {
createMetadataEntry(head,"subject",metadata.getSubject());
createMetadataEntry(head,"keywords",metadata.getKeywords());
createMetadataEntry(head,"creator",metadata.getCreator());
createMetadataEntry(head,"title",metadata.getTitle());
}
private void addDublinCoreMetadata(Element head) {
@ -761,14 +738,14 @@ public class Converter extends BasicConverter {
// Add epub namespace for the purpose of semantic inflection in EPUB 3
public void addEpubNs(Element elm) {
if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5) {
if (isEPUB && nType==XhtmlDocument.HTML5) {
elm.setAttribute("xmlns:epub", "http://www.idpf.org/2007/ops");
}
}
// Add a type from the structural semantics vocabulary of EPUB 3
public void addEpubType(Element elm, String sType) {
if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5 && sType!=null) {
if (isEPUB && nType==XhtmlDocument.HTML5 && sType!=null) {
elm.setAttribute("epub:type", sType);
}
}
@ -854,7 +831,12 @@ public class Converter extends BasicConverter {
return anchor;
}
public void addHeadMeta(String name, String value) {
if (!isEPUB) {
createMetadataEntry(htmlDoc.getHeadNode(), name, value);
}
}
private void createMetadataEntry(Element head, String name, String value) {
if (value==null || value.isEmpty()) { return; }
if (name==null || name.isEmpty()) { return; }

View file

@ -40,7 +40,7 @@ import w2phtml.util.Misc;
public class XhtmlConfig extends w2phtml.base.ConfigBase {
// Implement configuration methods
protected int getOptionCount() { return 63; }
protected int getOptionCount() { return 64; }
protected String getDefaultConfigPath() { return "/writer2latex/xhtml/config/"; }
// Override setOption: To be backwards compatible, we must accept options
@ -162,6 +162,7 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
private static final int PAGINATION = 60;
private static final int MIN_LETTER_SPACING = 61;
private static final int PAGE_BREAK_STYLE = 62;
private static final int ANNOTATION_METADATA = 63;
protected ComplexOption xheading = addComplexOption("heading-map");
protected ComplexOption xpar = addComplexOption("paragraph-map");
@ -293,6 +294,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
options[CSS_INLINE] = new BooleanOption("css_inline","true");
options[ALIGN_SPLITS_TO_PAGES] = new BooleanOption("align_splits_to_pages","false");
options[ANNOTATION_METADATA] = new BooleanOption("annotation_metad","true");
}
@ -473,5 +476,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
return map;
}
public boolean useAnnotationMetadata() {
return ((BooleanOption) options[ANNOTATION_METADATA]).getValue();
}
}

View file

@ -18,6 +18,8 @@ import w2phtml.xhtml.XhtmlConfig;
//LinkedList<String> stringList = new LinkedList<String>();
public class Separator {
private static final String COMMENT_END = "</Description>";
public Node currentComment = null;
private static final String NONE = "none";
private static final String SECTIONS = "sections";
private static final String DIV = "div";
@ -38,6 +40,7 @@ public class Separator {
private static Converter converter = null;
private Node prevPageContainer = null;
private PageContainer pageContainer = null;
private Node metadataComment = null;
public Separator(XhtmlConfig config, Converter converter) {
this.converter = converter;
@ -295,10 +298,20 @@ public class Separator {
replaceWithSpaces(lineBreaks);
replaceWithSpaces(textTabs);
deleteNotesIn(content);
deleteCommentsIn(content);
title = content.getTextContent().trim();
return title;
}
private static void deleteCommentsIn(Node content) {
NodeList notes = ((Element) content).getElementsByTagName(XMLString.OFFICE_ANNOTATION);
int j = 0;
while (j < notes.getLength()) {
Node note = notes.item(j);
note.getParentNode().removeChild(note);
}
}
private static void deleteNotesIn(Node content) {
NodeList notes = ((Element) content).getElementsByTagName(XMLString.TEXT_NOTE);
int j = 0;
@ -320,10 +333,11 @@ public class Separator {
}
}
private static void openCommentHeading(Node hnode, String title) {
private void openCommentHeading(Node hnode, String title) {
Document doc = hnode.getOwnerDocument();
Node openSection = doc.createComment(openHeadingCommentText(title));
// insert open section comment before header node
this.currentComment = openSection;
hnode.appendChild(openSection);
}
@ -398,12 +412,29 @@ public class Separator {
}
private static String openHeadingCommentText(String title) {
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + title + "</Metadata>\n</Description>";
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + title + "</Metadata>\n" + COMMENT_END;
return comment;
}
public void appendMetadata(String name, String value) {
if (currentComment == null) {
System.out.println("Error. No current Comment");
return;
}
String content = currentComment.getTextContent();
System.out.println(content);
if (content.endsWith(COMMENT_END)) {
String firstPart = content.substring(0, content.length()- COMMENT_END.length());
String metadata = "<Metadata name=\"" + name + "\">" + value + "</Metadata>\n";
currentComment.setTextContent(firstPart + metadata + COMMENT_END);
System.out.println("Comment updated.");
} else {
System.out.println("Error. Comment doesn't end with " + COMMENT_END);
}
}
private static String openPageCommentText(Integer pageNum) {
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + pageNum + "</Metadata>\n<Metadata name=\"Page\">" + pageNum + "</Metadata>\n</Description>";
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + pageNum + "</Metadata>\n<Metadata name=\"Page\">" + pageNum + "</Metadata>\n"+COMMENT_END;
return comment;
}

View file

@ -28,6 +28,7 @@ package w2phtml.xhtml.content;
import static w2phtml.office.XMLString.*;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.Stack;
import org.w3c.dom.Node;
@ -42,6 +43,7 @@ import w2phtml.office.OfficeReader;
import w2phtml.office.OfficeStyle;
import w2phtml.office.PageLayout;
import w2phtml.office.StyleWithProperties;
import w2phtml.office.XMLString;
import w2phtml.util.Misc;
import w2phtml.xhtml.Converter;
import w2phtml.xhtml.ODFPageSplitter;
@ -51,6 +53,10 @@ import w2phtml.xhtml.XhtmlConfig;
import w2phtml.xhtml.XhtmlStyleMap;
import w2phtml.xhtml.XhtmlStyleMapItem;
import org.hamcrest.core.IsEqual;
import org.json.JSONException;
import org.json.JSONObject;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
@ -117,6 +123,7 @@ public class TextParser extends Parser {
private String endnotesContext = null;
private String footnotesContext = null;
PageContainer pageContainer = null;
private boolean applyAnnotationMetadata;
public TextParser(OfficeReader ofr, XhtmlConfig config, Converter converter) {
super(ofr,config,converter);
@ -133,6 +140,7 @@ public class TextParser extends Parser {
nFloatMode = ofr.isText() && config.xhtmlFloatObjects() ?
DrawParser.FLOATING : DrawParser.ABSOLUTE;
displayHiddenText = config.displayHiddenText();
applyAnnotationMetadata = config.useAnnotationMetadata();
pageContainer = converter.pageContainer;
docSep = new Separator(config, converter);
}
@ -402,7 +410,7 @@ public class TextParser extends Parser {
breakBeforeNextNode = true;
}
else if (nodeName.equals(OFFICE_ANNOTATION)) {
converter.handleOfficeAnnotation(child,hnode);
handleOfficeAnnotation(child,hnode);
}
else if (nodeName.equals(TEXT_SEQUENCE_DECLS)) {
//handleSeqeuenceDecls(child);
@ -823,7 +831,7 @@ public class TextParser extends Parser {
} else if (sName.equals(TEXT_BIBLIOGRAPHY_MARK)) {
handleBibliographyMark(child, hnode);
} else if (sName.equals(OFFICE_ANNOTATION)) {
converter.handleOfficeAnnotation(child, hnode);
handleOfficeAnnotation(child, hnode);
} else if (sName.startsWith("text:")) {
traverseInlineText(child, hnode);
}
@ -1433,7 +1441,7 @@ public class TextParser extends Parser {
if (Misc.isElement(node)
&& Misc.getAttribute(node, TEXT_OUTLINE_LEVEL) != null
&& !Misc.getAttribute(node, TEXT_OUTLINE_LEVEL).isEmpty()) {
String title = docSep.getTitle(node).trim();
String title = Separator.getTitle(node).trim();
if (title == null || title.isEmpty()) {
return false;
}
@ -1441,6 +1449,89 @@ public class TextParser extends Parser {
}
return false;
}
public void handleOfficeAnnotation(Node onode, Node hnode) {
if (applyAnnotationMetadata) {
parseAnnotationMetadata(onode);
}
if (config.xhtmlNotes()) {
// Extract the text from the paragraphs, separate paragraphs with newline
StringBuilder buf = new StringBuilder();
Element creator = null;
Element date = null;
Node child = onode.getFirstChild();
while (child != null) {
if (Misc.isElement(child, XMLString.TEXT_P)) {
if (buf.length() > 0) {
buf.append('\n');
}
buf.append(converter.getPlainInlineText(child));
} else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
creator = (Element) child;
} else if (Misc.isElement(child, XMLString.DC_DATE)) {
date = (Element) child;
}
child = child.getNextSibling();
}
if (creator != null) {
if (buf.length() > 0) {
buf.append('\n');
}
buf.append(converter.getPlainInlineText(creator));
}
if (date != null) {
if (buf.length() > 0) {
buf.append('\n');
}
buf.append(date);
}
Document doc = hnode.getOwnerDocument();
Node commentNode = doc.createComment(buf.toString());
hnode.appendChild(commentNode);
}
}
private void parseAnnotationMetadata(Node onode) {
NodeList annotationNodes = onode.getChildNodes();
String jsonMetadata = null;
String author = null;
for (int i = 0 ; i < annotationNodes.getLength(); i++) {
Node node = annotationNodes.item(i);
if (Misc.isElement(node, XMLString.TEXT_P)) {
jsonMetadata = converter.getPlainInlineText(node);
} else
if (Misc.isElement(node, XMLString.DC_CREATOR)){
author = converter.getPlainInlineText(node);
}
}
if (author != null && author.equals("Metadata Extension")) {
addJSONMetadata(jsonMetadata);
}
}
public void addJSONMetadata(String data) {
Iterator<String> names = null;
JSONObject json = null;
try {
json = new JSONObject(data);
names = json.keys();
} catch (JSONException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
if (names == null || json == null) {
return;
}
while (names.hasNext()) {
String metaName = names.next();
String metaValue = (String) json.get(metaName);
writeMetadata(metaName, metaValue);
}
}
private void writeMetadata(String name, String value) {
converter.addHeadMeta(name, value);
docSep.appendMetadata(name, value);
}
}