Metadata export

This commit is contained in:
Georgy Litvinov 2020-04-12 12:06:03 +02:00
parent aed436fb2d
commit ac4dc83b2b
6 changed files with 83 additions and 60 deletions

View file

@ -77,5 +77,5 @@ public interface MetaData {
* *
* @return the user-defined meta data as a name-value map * @return the user-defined meta data as a name-value map
*/ */
public Map<String,String> getUserDefinedMetaData(); public Map<String,String> getCustomMetaData();
} }

View file

@ -56,7 +56,7 @@ public abstract class BasicConverter implements Converter {
// The source document // The source document
protected OfficeDocument odDoc; protected OfficeDocument odDoc;
protected OfficeReader ofr; protected OfficeReader ofr;
protected MetaData metaData; protected MetaData metadata;
protected ImageConverter imageConverter; protected ImageConverter imageConverter;
// The output file(s) // The output file(s)
@ -112,7 +112,7 @@ public abstract class BasicConverter implements Converter {
private ConverterResult convert(String sTargetFileName, boolean bDestructive) throws IOException { private ConverterResult convert(String sTargetFileName, boolean bDestructive) throws IOException {
ofr = new OfficeReader(odDoc,false); ofr = new OfficeReader(odDoc,false);
metaData = new MetaData(odDoc); metadata = new MetaData(odDoc);
imageConverter = new ImageConverter(ofr,bDestructive,true); imageConverter = new ImageConverter(ofr,bDestructive,true);
imageConverter.setGraphicConverter(graphicConverter); imageConverter.setGraphicConverter(graphicConverter);
@ -120,9 +120,9 @@ public abstract class BasicConverter implements Converter {
this.sTargetFileName = sTargetFileName; this.sTargetFileName = sTargetFileName;
converterResult.reset(); converterResult.reset();
converterResult.setMetaData(metaData); converterResult.setMetaData(metadata);
if (metaData.getLanguage()==null || metaData.getLanguage().length()==0) { if (metadata.getLanguage()==null || metadata.getLanguage().length()==0) {
metaData.setLanguage(ofr.getMajorityLanguage()); metadata.setLanguage(ofr.getMajorityLanguage());
} }
convertInner(); convertInner();
@ -133,7 +133,7 @@ public abstract class BasicConverter implements Converter {
// The subclass must provide the implementation // The subclass must provide the implementation
public abstract void convertInner() throws IOException; public abstract void convertInner() throws IOException;
public MetaData getMetaData() { return metaData; } public MetaData getMetaData() { return metadata; }
public ImageConverter getImageCv() { return imageConverter; } public ImageConverter getImageCv() { return imageConverter; }

View file

@ -127,7 +127,7 @@ public class OPFWriter extends DOMDocument {
boolean bHasCreator = false; boolean bHasCreator = false;
boolean bHasDate = false; boolean bHasDate = false;
// First rearrange the user-defined meta data // First rearrange the user-defined meta data
Map<String,String> userDefinedMetaData = cr.getMetaData().getUserDefinedMetaData(); Map<String,String> userDefinedMetaData = cr.getMetaData().getCustomMetaData();
Map<String,String[]> dc = new HashMap<String,String[]>(); Map<String,String[]> dc = new HashMap<String,String[]>();
for (String sKey : userDefinedMetaData.keySet()) { for (String sKey : userDefinedMetaData.keySet()) {
if (sKey.length()>0) { if (sKey.length()>0) {

View file

@ -169,7 +169,7 @@ public class MetaData implements w2phtml.api.MetaData {
* *
* @return the user-defined meta data as a name-value map * @return the user-defined meta data as a name-value map
*/ */
public Map<String,String> getUserDefinedMetaData() { return userdefined; } public Map<String,String> getCustomMetaData() { return userdefined; }
private String getContent(Node node) { private String getContent(Node node) {
StringBuilder buf = new StringBuilder(); StringBuilder buf = new StringBuilder();

View file

@ -1,3 +1,3 @@
#Thu Mar 19 13:31:16 CET 2020 #Sun Mar 29 16:01:24 CEST 2020
releaseVersion=0.5.5 releaseVersion=0.5.5
releaseDate=13\:31\:16 19-03-2020 releaseDate=16\:01\:24 29-03-2020

View file

@ -29,6 +29,7 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.util.HashSet; import java.util.HashSet;
import java.util.ListIterator; import java.util.ListIterator;
import java.util.Map;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.Set; import java.util.Set;
import java.util.Stack; import java.util.Stack;
@ -39,6 +40,7 @@ import java.util.Iterator;
import java.io.InputStream; import java.io.InputStream;
import java.io.IOException; import java.io.IOException;
import org.hamcrest.core.IsEqual;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.Node; import org.w3c.dom.Node;
@ -101,7 +103,7 @@ public class Converter extends BasicConverter {
// The xhtml output file(s) // The xhtml output file(s)
public int nType = XhtmlDocument.XHTML10; // the doctype public int nType = XhtmlDocument.XHTML10; // the doctype
private boolean bOPS = false; // Do we need to be OPS conforming? private boolean isOpenPublicationStructure = false; // Do we need to be OPS conforming?
public Vector<XhtmlDocument> outFiles; public Vector<XhtmlDocument> outFiles;
private int outFileIndex; private int outFileIndex;
private XhtmlDocument htmlDoc; // current outfile private XhtmlDocument htmlDoc; // current outfile
@ -247,9 +249,9 @@ public class Converter extends BasicConverter {
public L10n getL10n() { return l10n; } public L10n getL10n() { return l10n; }
public void setOpenPubStructure(boolean b) { bOPS = true; } public void setOpenPubStructure(boolean b) { isOpenPublicationStructure = true; }
public boolean isOPS() { return bOPS; } public boolean isOPS() { return isOpenPublicationStructure; }
@Override public void convertInner() throws IOException { @Override public void convertInner() throws IOException {
sTargetFileName = Misc.trimDocumentName(sTargetFileName,XhtmlDocument.getExtension(nType)); sTargetFileName = Misc.trimDocumentName(sTargetFileName,XhtmlDocument.getExtension(nType));
@ -343,7 +345,7 @@ public class Converter extends BasicConverter {
} }
// Add included style sheet, if any - and we are creating OPS content // Add included style sheet, if any - and we are creating OPS content
if (bOPS && styleSheet!=null) { if (isOpenPublicationStructure && styleSheet!=null) {
converterResult.addDocument(styleSheet); converterResult.addDocument(styleSheet);
for (ResourceDocument doc : resources) { for (ResourceDocument doc : resources) {
converterResult.addDocument(doc); converterResult.addDocument(doc);
@ -596,7 +598,7 @@ public class Converter extends BasicConverter {
return htmlDoc.getPanelNode(); return htmlDoc.getPanelNode();
} }
public String getTitle(){ public String getTitle(){
String title = metaData.getTitle(); String title = metadata.getTitle();
if (title==null) { if (title==null) {
// use filename as fallback // use filename as fallback
title = htmlDoc.getFileName(); title = htmlDoc.getFileName();
@ -622,7 +624,7 @@ public class Converter extends BasicConverter {
private void addTitle() { private void addTitle() {
Element title = htmlDoc.getTitleNode(); Element title = htmlDoc.getTitleNode();
if (title!=null) { if (title!=null) {
String sTitle = metaData.getTitle(); String sTitle = metadata.getTitle();
if (sTitle==null) { // use filename as fallback if (sTitle==null) { // use filename as fallback
sTitle = htmlDoc.getFileName(); sTitle = htmlDoc.getFileName();
} }
@ -662,43 +664,10 @@ public class Converter extends BasicConverter {
} }
// Add meta data (for EPUB the meta data belongs to the .opf file) // Add meta data (for EPUB the meta data belongs to the .opf file)
if (!bOPS) { addMetadata(head);
// "Traditional" meta data
//createMeta("generator","Writer2LaTeX "+Misc.VERSION);
createMeta(head,"description",metaData.getDescription());
createMeta(head,"keywords",metaData.getKeywords());
// Dublin core meta data (optional)
// Format as recommended on dublincore.org (http://dublincore.org/documents/dc-html/)
// Declare meta data profile
if (config.xhtmlUseDublinCore()) {
head.setAttribute("profile","http://dublincore.org/documents/2008/08/04/dc-html/");
// Add link to declare namespace
Element dclink = htmlDOM.createElement("link");
dclink.setAttribute("rel","schema.DC");
dclink.setAttribute("href","http://purl.org/dc/elements/1.1/");
head.appendChild(dclink);
// Insert the actual meta data
createMeta(head,"DC.title",metaData.getTitle());
// DC.subject actually contains subject+keywords, so we merge them
String sDCSubject = "";
if (metaData.getSubject()!=null && metaData.getSubject().length()>0) {
sDCSubject = metaData.getSubject();
}
if (metaData.getKeywords()!=null && metaData.getKeywords().length()>0) {
if (sDCSubject.length()>0) { sDCSubject+=", "; }
sDCSubject += metaData.getKeywords();
}
createMeta(head,"DC.subject",sDCSubject);
createMeta(head,"DC.description",metaData.getDescription());
createMeta(head,"DC.creator",metaData.getCreator());
createMeta(head,"DC.date",metaData.getDate());
createMeta(head,"DC.language",metaData.getLanguage());
}
}
// Add link to custom stylesheet, if producing normal XHTML // Add link to custom stylesheet, if producing normal XHTML
if (!bOPS && config.xhtmlCustomStylesheet().length()>0) { if (!isOpenPublicationStructure && config.xhtmlCustomStylesheet().length()>0) {
Element htmlStyle = htmlDOM.createElement("link"); Element htmlStyle = htmlDOM.createElement("link");
htmlStyle.setAttribute("rel","stylesheet"); htmlStyle.setAttribute("rel","stylesheet");
htmlStyle.setAttribute("type","text/css"); htmlStyle.setAttribute("type","text/css");
@ -708,7 +677,7 @@ public class Converter extends BasicConverter {
} }
// Add link to generated stylesheet if producing normal XHTML and the user wants separate css // Add link to generated stylesheet if producing normal XHTML and the user wants separate css
if (!bOPS && config.separateStylesheet()) { if (!isOpenPublicationStructure && config.separateStylesheet()) {
Element htmlStyle = htmlDOM.createElement("link"); Element htmlStyle = htmlDOM.createElement("link");
htmlStyle.setAttribute("rel","stylesheet"); htmlStyle.setAttribute("rel","stylesheet");
htmlStyle.setAttribute("type","text/css"); htmlStyle.setAttribute("type","text/css");
@ -718,7 +687,7 @@ public class Converter extends BasicConverter {
} }
// Add link to included style sheet if producing OPS content // Add link to included style sheet if producing OPS content
if (bOPS && styleSheet!=null) { if (isOpenPublicationStructure && styleSheet!=null) {
Element sty = htmlDOM.createElement("link"); Element sty = htmlDOM.createElement("link");
sty.setAttribute("rel", "stylesheet"); sty.setAttribute("rel", "stylesheet");
sty.setAttribute("type", "text/css"); sty.setAttribute("type", "text/css");
@ -737,17 +706,70 @@ public class Converter extends BasicConverter {
} }
} }
private void addMetadata(Element head) {
if (!isOpenPublicationStructure) {
if (config.xhtmlUseDublinCore()) {
addDublinCoreMetadata(head);
} else {
addStandardMetadata(head);
addCustomMetadata(head);
}
}
}
private void addCustomMetadata(Element head) {
Map<String, String> customMetadata = metadata.getCustomMetaData();
for (String name : customMetadata.keySet()) {
String value = customMetadata.get(name);
createMetadataEntry(head, name, value);
}
}
private void addStandardMetadata(Element head) {
createMetadataEntry(head,"description",metadata.getDescription());
createMetadataEntry(head,"subject",metadata.getSubject());
createMetadataEntry(head,"keywords",metadata.getKeywords());
createMetadataEntry(head,"date",metadata.getDate());
createMetadataEntry(head,"creator",metadata.getCreator());
}
private void addDublinCoreMetadata(Element head) {
head.setAttribute("profile","http://dublincore.org/documents/2008/08/04/dc-html/");
// Add link to declare namespace
Element dclink = htmlDOM.createElement("link");
dclink.setAttribute("rel","schema.DC");
dclink.setAttribute("href","http://purl.org/dc/elements/1.1/");
head.appendChild(dclink);
// Insert the actual meta data
createMetadataEntry(head,"DC.title",metadata.getTitle());
// DC.subject actually contains subject+keywords, so we merge them
String sDCSubject = "";
if (metadata.getSubject()!=null && metadata.getSubject().length()>0) {
sDCSubject = metadata.getSubject();
}
if (metadata.getKeywords()!=null && metadata.getKeywords().length()>0) {
if (sDCSubject.length()>0) { sDCSubject+=", "; }
sDCSubject += metadata.getKeywords();
}
createMetadataEntry(head,"DC.subject",sDCSubject);
createMetadataEntry(head,"DC.description",metadata.getDescription());
createMetadataEntry(head,"DC.creator",metadata.getCreator());
createMetadataEntry(head,"DC.date",metadata.getDate());
createMetadataEntry(head,"DC.language",metadata.getLanguage());
}
// Add epub namespace for the purpose of semantic inflection in EPUB 3 // Add epub namespace for the purpose of semantic inflection in EPUB 3
public void addEpubNs(Element elm) { public void addEpubNs(Element elm) {
if (bOPS && nType==XhtmlDocument.HTML5) { if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5) {
elm.setAttribute("xmlns:epub", "http://www.idpf.org/2007/ops"); elm.setAttribute("xmlns:epub", "http://www.idpf.org/2007/ops");
} }
} }
// Add a type from the structural semantics vocabulary of EPUB 3 // Add a type from the structural semantics vocabulary of EPUB 3
public void addEpubType(Element elm, String sType) { public void addEpubType(Element elm, String sType) {
if (bOPS && nType==XhtmlDocument.HTML5 && sType!=null) { if (isOpenPublicationStructure && nType==XhtmlDocument.HTML5 && sType!=null) {
elm.setAttribute("epub:type", sType); elm.setAttribute("epub:type", sType);
} }
} }
@ -834,11 +856,12 @@ public class Converter extends BasicConverter {
} }
private void createMeta(Element head, String sName, String sValue) { private void createMetadataEntry(Element head, String name, String value) {
if (sValue==null) { return; } if (value==null || value.isEmpty()) { return; }
if (name==null || name.isEmpty()) { return; }
Element meta = htmlDOM.createElement("meta"); Element meta = htmlDOM.createElement("meta");
meta.setAttribute("name",sName); meta.setAttribute("name",name);
meta.setAttribute("content",sValue); meta.setAttribute("content",value);
head.appendChild(meta); head.appendChild(meta);
} }