Anntotaion metadata
This commit is contained in:
parent
ad8bfd03b2
commit
0a598de7ce
5 changed files with 180 additions and 70 deletions
src/main/java/w2phtml
|
@ -1,3 +1,3 @@
|
|||
#Sun Apr 12 12:38:00 CEST 2020
|
||||
releaseVersion=0.5.5
|
||||
releaseDate=12\:38\:00 12-04-2020
|
||||
#Wed Apr 22 15:32:34 CEST 2020
|
||||
releaseVersion=0.5.7
|
||||
releaseDate=15\:32\:34 22-04-2020
|
||||
|
|
|
@ -41,9 +41,12 @@ import java.io.InputStream;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.hamcrest.core.IsEqual;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
import w2phtml.api.Config;
|
||||
|
@ -103,7 +106,7 @@ public class Converter extends BasicConverter {
|
|||
|
||||
// The xhtml output file(s)
|
||||
public int nType = XhtmlDocument.XHTML10; // the doctype
|
||||
private boolean isOpenPublicationStructure = false; // Do we need to be OPS conforming?
|
||||
private boolean isEPUB = false; // Do we need to be OPS conforming?
|
||||
public Vector<XhtmlDocument> outFiles;
|
||||
private int outFileIndex;
|
||||
private XhtmlDocument htmlDoc; // current outfile
|
||||
|
@ -249,9 +252,9 @@ public class Converter extends BasicConverter {
|
|||
|
||||
public L10n getL10n() { return l10n; }
|
||||
|
||||
public void setOpenPubStructure(boolean b) { isOpenPublicationStructure = true; }
|
||||
public void setOpenPubStructure(boolean b) { isEPUB = true; }
|
||||
|
||||
public boolean isOPS() { return isOpenPublicationStructure; }
|
||||
public boolean isOPS() { return isEPUB; }
|
||||
|
||||
@Override public void convertInner() throws IOException {
|
||||
sTargetFileName = Misc.trimDocumentName(sTargetFileName,XhtmlDocument.getExtension(nType));
|
||||
|
@ -345,7 +348,7 @@ public class Converter extends BasicConverter {
|
|||
}
|
||||
|
||||
// Add included style sheet, if any - and we are creating OPS content
|
||||
if (isOpenPublicationStructure && styleSheet!=null) {
|
||||
if (isEPUB && styleSheet!=null) {
|
||||
converterResult.addDocument(styleSheet);
|
||||
for (ResourceDocument doc : resources) {
|
||||
converterResult.addDocument(doc);
|
||||
|
@ -540,40 +543,10 @@ public class Converter extends BasicConverter {
|
|||
}
|
||||
|
||||
|
||||
public void handleOfficeAnnotation(Node onode, Node hnode) {
|
||||
if (config.xhtmlNotes()) {
|
||||
// Extract the text from the paragraphs, separate paragraphs with newline
|
||||
StringBuilder buf = new StringBuilder();
|
||||
Element creator = null;
|
||||
Element date = null;
|
||||
Node child = onode.getFirstChild();
|
||||
while (child!=null) {
|
||||
if (Misc.isElement(child, XMLString.TEXT_P)) {
|
||||
if (buf.length()>0) { buf.append('\n'); }
|
||||
buf.append(getPlainInlineText(child));
|
||||
}
|
||||
else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
|
||||
creator = (Element) child;
|
||||
}
|
||||
else if (Misc.isElement(child, XMLString.DC_DATE)) {
|
||||
date = (Element) child;
|
||||
}
|
||||
child = child.getNextSibling();
|
||||
}
|
||||
if (creator!=null) {
|
||||
if (buf.length()>0) { buf.append('\n'); }
|
||||
buf.append(getPlainInlineText(creator));
|
||||
}
|
||||
if (date!=null) {
|
||||
if (buf.length()>0) { buf.append('\n'); }
|
||||
buf.append(Misc.formatDate(OfficeReader.getTextContent(date), l10n.getLocale().getLanguage(), l10n.getLocale().getCountry()));
|
||||
}
|
||||
Node commentNode = htmlDOM.createComment(buf.toString());
|
||||
hnode.appendChild(commentNode);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// UTILITY METHODS
|
||||
|
||||
// Create output file name (docname.html, docname1.html, docname2.html etc.)
|
||||
|
@ -597,15 +570,16 @@ public class Converter extends BasicConverter {
|
|||
public Element getPanelNode() {
|
||||
return htmlDoc.getPanelNode();
|
||||
}
|
||||
public String getTitle(){
|
||||
String title = metadata.getTitle();
|
||||
if (title==null) {
|
||||
// use filename as fallback
|
||||
title = htmlDoc.getFileName();
|
||||
}
|
||||
return title;
|
||||
|
||||
}
|
||||
public String getTitle(){
|
||||
String title = metadata.getTitle();
|
||||
if (title==null) {
|
||||
// use filename as fallback
|
||||
title = htmlDoc.getFileName();
|
||||
}
|
||||
return title;
|
||||
|
||||
}
|
||||
|
||||
// Prepare next output file
|
||||
public Element nextOutFile() {
|
||||
htmlDoc = new XhtmlDocument(getOutFileName(++outFileIndex,false),nType);
|
||||
|
@ -667,7 +641,7 @@ public class Converter extends BasicConverter {
|
|||
addMetadata(head);
|
||||
|
||||
// Add link to custom stylesheet, if producing normal XHTML
|
||||
if (!isOpenPublicationStructure && config.xhtmlCustomStylesheet().length()>0) {
|
||||
if (!isEPUB && config.xhtmlCustomStylesheet().length()>0) {
|
||||
Element htmlStyle = htmlDOM.createElement("link");
|
||||
htmlStyle.setAttribute("rel","stylesheet");
|
||||
htmlStyle.setAttribute("type","text/css");
|
||||
|
@ -677,7 +651,7 @@ public class Converter extends BasicConverter {
|
|||
}
|
||||
|
||||
// Add link to generated stylesheet if producing normal XHTML and the user wants separate css
|
||||
if (!isOpenPublicationStructure && config.separateStylesheet()) {
|
||||
if (!isEPUB && config.separateStylesheet()) {
|
||||
Element htmlStyle = htmlDOM.createElement("link");
|
||||
htmlStyle.setAttribute("rel","stylesheet");
|
||||
htmlStyle.setAttribute("type","text/css");
|
||||
|
@ -687,7 +661,7 @@ public class Converter extends BasicConverter {
|
|||
}
|
||||
|
||||
// Add link to included style sheet if producing OPS content
|
||||
if (isOpenPublicationStructure && styleSheet!=null) {
|
||||
if (isEPUB && styleSheet!=null) {
|
||||
Element sty = htmlDOM.createElement("link");
|
||||
sty.setAttribute("rel", "stylesheet");
|
||||
sty.setAttribute("type", "text/css");
|
||||
|
@ -708,17 +682,19 @@ public class Converter extends BasicConverter {
|
|||
}
|
||||
|
||||
private void addMetadata(Element head) {
|
||||
if (!isOpenPublicationStructure) {
|
||||
if (config.xhtmlUseDublinCore()) {
|
||||
addDublinCoreMetadata(head);
|
||||
} else {
|
||||
addStandardMetadata(head);
|
||||
addCustomMetadata(head);
|
||||
|
||||
if (!isEPUB) {
|
||||
if (!config.useAnnotationMetadata()) {
|
||||
if (config.xhtmlUseDublinCore()) {
|
||||
addDublinCoreMetadata(head);
|
||||
} else {
|
||||
addStandardMetadata(head);
|
||||
addCustomMetadata(head);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void addCustomMetadata(Element head) {
|
||||
Map<String, String> customMetadata = metadata.getCustomMetaData();
|
||||
for (String name : customMetadata.keySet()) {
|
||||
|
@ -732,6 +708,7 @@ public class Converter extends BasicConverter {
|
|||
createMetadataEntry(head,"subject",metadata.getSubject());
|
||||
createMetadataEntry(head,"keywords",metadata.getKeywords());
|
||||
createMetadataEntry(head,"creator",metadata.getCreator());
|
||||
createMetadataEntry(head,"title",metadata.getTitle());
|
||||
}
|
||||
|
||||
private void addDublinCoreMetadata(Element head) {
|
||||
|
@ -761,14 +738,14 @@ public class Converter extends BasicConverter {
|
|||
|
||||
// Add epub namespace for the purpose of semantic inflection in EPUB 3
|
||||
public void addEpubNs(Element elm) {
|
||||
if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5) {
|
||||
if (isEPUB && nType==XhtmlDocument.HTML5) {
|
||||
elm.setAttribute("xmlns:epub", "http://www.idpf.org/2007/ops");
|
||||
}
|
||||
}
|
||||
|
||||
// Add a type from the structural semantics vocabulary of EPUB 3
|
||||
public void addEpubType(Element elm, String sType) {
|
||||
if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5 && sType!=null) {
|
||||
if (isEPUB && nType==XhtmlDocument.HTML5 && sType!=null) {
|
||||
elm.setAttribute("epub:type", sType);
|
||||
}
|
||||
}
|
||||
|
@ -854,7 +831,12 @@ public class Converter extends BasicConverter {
|
|||
return anchor;
|
||||
}
|
||||
|
||||
|
||||
public void addHeadMeta(String name, String value) {
|
||||
if (!isEPUB) {
|
||||
createMetadataEntry(htmlDoc.getHeadNode(), name, value);
|
||||
}
|
||||
}
|
||||
|
||||
private void createMetadataEntry(Element head, String name, String value) {
|
||||
if (value==null || value.isEmpty()) { return; }
|
||||
if (name==null || name.isEmpty()) { return; }
|
||||
|
|
|
@ -40,7 +40,7 @@ import w2phtml.util.Misc;
|
|||
|
||||
public class XhtmlConfig extends w2phtml.base.ConfigBase {
|
||||
// Implement configuration methods
|
||||
protected int getOptionCount() { return 63; }
|
||||
protected int getOptionCount() { return 64; }
|
||||
protected String getDefaultConfigPath() { return "/writer2latex/xhtml/config/"; }
|
||||
|
||||
// Override setOption: To be backwards compatible, we must accept options
|
||||
|
@ -162,6 +162,7 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
|
|||
private static final int PAGINATION = 60;
|
||||
private static final int MIN_LETTER_SPACING = 61;
|
||||
private static final int PAGE_BREAK_STYLE = 62;
|
||||
private static final int ANNOTATION_METADATA = 63;
|
||||
|
||||
protected ComplexOption xheading = addComplexOption("heading-map");
|
||||
protected ComplexOption xpar = addComplexOption("paragraph-map");
|
||||
|
@ -293,6 +294,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
|
|||
|
||||
options[CSS_INLINE] = new BooleanOption("css_inline","true");
|
||||
options[ALIGN_SPLITS_TO_PAGES] = new BooleanOption("align_splits_to_pages","false");
|
||||
options[ANNOTATION_METADATA] = new BooleanOption("annotation_metad","true");
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
@ -473,5 +476,8 @@ public class XhtmlConfig extends w2phtml.base.ConfigBase {
|
|||
return map;
|
||||
|
||||
}
|
||||
public boolean useAnnotationMetadata() {
|
||||
return ((BooleanOption) options[ANNOTATION_METADATA]).getValue();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -18,6 +18,8 @@ import w2phtml.xhtml.XhtmlConfig;
|
|||
//LinkedList<String> stringList = new LinkedList<String>();
|
||||
public class Separator {
|
||||
|
||||
private static final String COMMENT_END = "</Description>";
|
||||
public Node currentComment = null;
|
||||
private static final String NONE = "none";
|
||||
private static final String SECTIONS = "sections";
|
||||
private static final String DIV = "div";
|
||||
|
@ -38,6 +40,7 @@ public class Separator {
|
|||
private static Converter converter = null;
|
||||
private Node prevPageContainer = null;
|
||||
private PageContainer pageContainer = null;
|
||||
private Node metadataComment = null;
|
||||
|
||||
public Separator(XhtmlConfig config, Converter converter) {
|
||||
this.converter = converter;
|
||||
|
@ -295,10 +298,20 @@ public class Separator {
|
|||
replaceWithSpaces(lineBreaks);
|
||||
replaceWithSpaces(textTabs);
|
||||
deleteNotesIn(content);
|
||||
deleteCommentsIn(content);
|
||||
title = content.getTextContent().trim();
|
||||
return title;
|
||||
}
|
||||
|
||||
private static void deleteCommentsIn(Node content) {
|
||||
NodeList notes = ((Element) content).getElementsByTagName(XMLString.OFFICE_ANNOTATION);
|
||||
int j = 0;
|
||||
while (j < notes.getLength()) {
|
||||
Node note = notes.item(j);
|
||||
note.getParentNode().removeChild(note);
|
||||
}
|
||||
}
|
||||
|
||||
private static void deleteNotesIn(Node content) {
|
||||
NodeList notes = ((Element) content).getElementsByTagName(XMLString.TEXT_NOTE);
|
||||
int j = 0;
|
||||
|
@ -320,10 +333,11 @@ public class Separator {
|
|||
}
|
||||
}
|
||||
|
||||
private static void openCommentHeading(Node hnode, String title) {
|
||||
private void openCommentHeading(Node hnode, String title) {
|
||||
Document doc = hnode.getOwnerDocument();
|
||||
Node openSection = doc.createComment(openHeadingCommentText(title));
|
||||
// insert open section comment before header node
|
||||
this.currentComment = openSection;
|
||||
hnode.appendChild(openSection);
|
||||
}
|
||||
|
||||
|
@ -398,12 +412,29 @@ public class Separator {
|
|||
}
|
||||
|
||||
private static String openHeadingCommentText(String title) {
|
||||
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + title + "</Metadata>\n</Description>";
|
||||
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + title + "</Metadata>\n" + COMMENT_END;
|
||||
return comment;
|
||||
}
|
||||
|
||||
public void appendMetadata(String name, String value) {
|
||||
if (currentComment == null) {
|
||||
System.out.println("Error. No current Comment");
|
||||
return;
|
||||
}
|
||||
String content = currentComment.getTextContent();
|
||||
System.out.println(content);
|
||||
if (content.endsWith(COMMENT_END)) {
|
||||
String firstPart = content.substring(0, content.length()- COMMENT_END.length());
|
||||
String metadata = "<Metadata name=\"" + name + "\">" + value + "</Metadata>\n";
|
||||
currentComment.setTextContent(firstPart + metadata + COMMENT_END);
|
||||
System.out.println("Comment updated.");
|
||||
} else {
|
||||
System.out.println("Error. Comment doesn't end with " + COMMENT_END);
|
||||
}
|
||||
}
|
||||
|
||||
private static String openPageCommentText(Integer pageNum) {
|
||||
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + pageNum + "</Metadata>\n<Metadata name=\"Page\">" + pageNum + "</Metadata>\n</Description>";
|
||||
String comment = "<Section>\n<Description>\n<Metadata name=\"Title\">" + pageNum + "</Metadata>\n<Metadata name=\"Page\">" + pageNum + "</Metadata>\n"+COMMENT_END;
|
||||
return comment;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ package w2phtml.xhtml.content;
|
|||
import static w2phtml.office.XMLString.*;
|
||||
|
||||
import java.util.Hashtable;
|
||||
import java.util.Iterator;
|
||||
import java.util.Stack;
|
||||
|
||||
import org.w3c.dom.Node;
|
||||
|
@ -42,6 +43,7 @@ import w2phtml.office.OfficeReader;
|
|||
import w2phtml.office.OfficeStyle;
|
||||
import w2phtml.office.PageLayout;
|
||||
import w2phtml.office.StyleWithProperties;
|
||||
import w2phtml.office.XMLString;
|
||||
import w2phtml.util.Misc;
|
||||
import w2phtml.xhtml.Converter;
|
||||
import w2phtml.xhtml.ODFPageSplitter;
|
||||
|
@ -51,6 +53,10 @@ import w2phtml.xhtml.XhtmlConfig;
|
|||
import w2phtml.xhtml.XhtmlStyleMap;
|
||||
import w2phtml.xhtml.XhtmlStyleMapItem;
|
||||
|
||||
import org.hamcrest.core.IsEqual;
|
||||
import org.json.JSONException;
|
||||
import org.json.JSONObject;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
|
||||
|
@ -117,6 +123,7 @@ public class TextParser extends Parser {
|
|||
private String endnotesContext = null;
|
||||
private String footnotesContext = null;
|
||||
PageContainer pageContainer = null;
|
||||
private boolean applyAnnotationMetadata;
|
||||
|
||||
public TextParser(OfficeReader ofr, XhtmlConfig config, Converter converter) {
|
||||
super(ofr,config,converter);
|
||||
|
@ -133,6 +140,7 @@ public class TextParser extends Parser {
|
|||
nFloatMode = ofr.isText() && config.xhtmlFloatObjects() ?
|
||||
DrawParser.FLOATING : DrawParser.ABSOLUTE;
|
||||
displayHiddenText = config.displayHiddenText();
|
||||
applyAnnotationMetadata = config.useAnnotationMetadata();
|
||||
pageContainer = converter.pageContainer;
|
||||
docSep = new Separator(config, converter);
|
||||
}
|
||||
|
@ -402,7 +410,7 @@ public class TextParser extends Parser {
|
|||
breakBeforeNextNode = true;
|
||||
}
|
||||
else if (nodeName.equals(OFFICE_ANNOTATION)) {
|
||||
converter.handleOfficeAnnotation(child,hnode);
|
||||
handleOfficeAnnotation(child,hnode);
|
||||
}
|
||||
else if (nodeName.equals(TEXT_SEQUENCE_DECLS)) {
|
||||
//handleSeqeuenceDecls(child);
|
||||
|
@ -823,7 +831,7 @@ public class TextParser extends Parser {
|
|||
} else if (sName.equals(TEXT_BIBLIOGRAPHY_MARK)) {
|
||||
handleBibliographyMark(child, hnode);
|
||||
} else if (sName.equals(OFFICE_ANNOTATION)) {
|
||||
converter.handleOfficeAnnotation(child, hnode);
|
||||
handleOfficeAnnotation(child, hnode);
|
||||
} else if (sName.startsWith("text:")) {
|
||||
traverseInlineText(child, hnode);
|
||||
}
|
||||
|
@ -1433,7 +1441,7 @@ public class TextParser extends Parser {
|
|||
if (Misc.isElement(node)
|
||||
&& Misc.getAttribute(node, TEXT_OUTLINE_LEVEL) != null
|
||||
&& !Misc.getAttribute(node, TEXT_OUTLINE_LEVEL).isEmpty()) {
|
||||
String title = docSep.getTitle(node).trim();
|
||||
String title = Separator.getTitle(node).trim();
|
||||
if (title == null || title.isEmpty()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -1441,6 +1449,89 @@ public class TextParser extends Parser {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public void handleOfficeAnnotation(Node onode, Node hnode) {
|
||||
if (applyAnnotationMetadata) {
|
||||
parseAnnotationMetadata(onode);
|
||||
}
|
||||
if (config.xhtmlNotes()) {
|
||||
// Extract the text from the paragraphs, separate paragraphs with newline
|
||||
StringBuilder buf = new StringBuilder();
|
||||
Element creator = null;
|
||||
Element date = null;
|
||||
Node child = onode.getFirstChild();
|
||||
while (child != null) {
|
||||
if (Misc.isElement(child, XMLString.TEXT_P)) {
|
||||
if (buf.length() > 0) {
|
||||
buf.append('\n');
|
||||
}
|
||||
buf.append(converter.getPlainInlineText(child));
|
||||
} else if (Misc.isElement(child, XMLString.DC_CREATOR)) {
|
||||
creator = (Element) child;
|
||||
} else if (Misc.isElement(child, XMLString.DC_DATE)) {
|
||||
date = (Element) child;
|
||||
}
|
||||
child = child.getNextSibling();
|
||||
}
|
||||
if (creator != null) {
|
||||
if (buf.length() > 0) {
|
||||
buf.append('\n');
|
||||
}
|
||||
buf.append(converter.getPlainInlineText(creator));
|
||||
}
|
||||
if (date != null) {
|
||||
if (buf.length() > 0) {
|
||||
buf.append('\n');
|
||||
}
|
||||
buf.append(date);
|
||||
}
|
||||
Document doc = hnode.getOwnerDocument();
|
||||
Node commentNode = doc.createComment(buf.toString());
|
||||
hnode.appendChild(commentNode);
|
||||
}
|
||||
}
|
||||
private void parseAnnotationMetadata(Node onode) {
|
||||
NodeList annotationNodes = onode.getChildNodes();
|
||||
String jsonMetadata = null;
|
||||
String author = null;
|
||||
for (int i = 0 ; i < annotationNodes.getLength(); i++) {
|
||||
Node node = annotationNodes.item(i);
|
||||
if (Misc.isElement(node, XMLString.TEXT_P)) {
|
||||
jsonMetadata = converter.getPlainInlineText(node);
|
||||
} else
|
||||
if (Misc.isElement(node, XMLString.DC_CREATOR)){
|
||||
author = converter.getPlainInlineText(node);
|
||||
}
|
||||
}
|
||||
if (author != null && author.equals("Metadata Extension")) {
|
||||
addJSONMetadata(jsonMetadata);
|
||||
}
|
||||
}
|
||||
|
||||
public void addJSONMetadata(String data) {
|
||||
Iterator<String> names = null;
|
||||
JSONObject json = null;
|
||||
try {
|
||||
json = new JSONObject(data);
|
||||
names = json.keys();
|
||||
} catch (JSONException e) {
|
||||
e.printStackTrace();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
if (names == null || json == null) {
|
||||
return;
|
||||
}
|
||||
while (names.hasNext()) {
|
||||
String metaName = names.next();
|
||||
String metaValue = (String) json.get(metaName);
|
||||
writeMetadata(metaName, metaValue);
|
||||
}
|
||||
}
|
||||
|
||||
private void writeMetadata(String name, String value) {
|
||||
converter.addHeadMeta(name, value);
|
||||
docSep.appendMetadata(name, value);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue