Progress on applying csv metadata to rdf

This commit is contained in:
Georgy Litvinov 2020-02-28 18:09:37 +01:00
parent d7330799f2
commit 3da557d82c
5 changed files with 37 additions and 12 deletions

View file

@ -22,6 +22,7 @@ dependencies{
bundledLibs 'org.libreoffice:unoil:5.3.2'
compile 'junit:junit:4.12'
bundledLibs group: 'org.apache.jena', name: 'jena-core', version: '3.14.0'
bundledLibs group: 'com.opencsv', name: 'opencsv', version: '5.1'
bundledLibs files('idl')
configurations.compile.extendsFrom(configurations.bundledLibs)
}

View file

@ -28,6 +28,7 @@ public class DocumentPart {
private String body;
private String parentPath;
private String name;
private String order;
public DocumentPart(XhtmlDocument document) {
this.excerptDoc = document;
@ -63,6 +64,12 @@ public class DocumentPart {
public String getBody() {
return body;
}
public void setOrder(String order) {
this.order = order;
}
public String getOrder() {
return order;
}
public String getParentPath() {
return parentPath;
}

View file

@ -1,18 +1,15 @@
package writer2latex.rdf;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import org.apache.jena.ontology.OntClass;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.*;
@ -20,7 +17,6 @@ import org.apache.jena.vocabulary.*;
import writer2latex.xhtml.XhtmlDocument;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFWriter;
public class DocumentStructure {
@ -56,12 +52,17 @@ public class DocumentStructure {
public DocumentStructure(Vector<XhtmlDocument> files,String fileName) {
this();
this.documentID = fileName;
Iterator<XhtmlDocument> filesIterator = files.iterator();
while (filesIterator.hasNext()) {
XhtmlDocument inputDoc = filesIterator.next();
//Iterator<XhtmlDocument> filesIterator = files.iterator();
//while (filesIterator.hasNext()) {
//XhtmlDocument inputDoc = filesIterator.next();
for(int i = 0 ; i< files.size();i++) {
XhtmlDocument inputDoc = files.get(i);
DocumentPart part = new DocumentPart(inputDoc);
part.setOrder(Integer.toString(i));
addPart(part);
}
//}
addEmptyParts();
}
@ -205,5 +206,16 @@ public class DocumentStructure {
}
}
}
public void applyMetadata(Metadata metadata) {
for (DocumentPart part: inputParts.values()) {
String partOrder = part.getOrder();
//Set blank main doc number as it is in CSV
if (partOrder.equals("0")) {
partOrder = "";
}
}
}
}

View file

@ -1,12 +1,9 @@
package writer2latex.rdf;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Iterator;
import java.util.Vector;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import writer2latex.api.ConverterResult;
import writer2latex.api.OutputFile;
@ -28,6 +25,9 @@ public class RDFDocumentResult implements OutputFile {
this.sFileName = Misc.removeExtension(fileName);
this.config = config;
rdfStructure = new DocumentStructure(outFiles,fileName);
Metadata metadata = new Metadata();
metadata.read(config.getCSVMetadataFile());
rdfStructure.applyMetadata(metadata);
rdfStructure.createTree();
}

View file

@ -40,7 +40,7 @@ import writer2latex.util.Misc;
public class XhtmlConfig extends writer2latex.base.ConfigBase {
// Implement configuration methods
protected int getOptionCount() { return 64; }
protected int getOptionCount() { return 65; }
protected String getDefaultConfigPath() { return "/writer2latex/xhtml/config/"; }
// Override setOption: To be backwards compatible, we must accept options
@ -163,6 +163,7 @@ public class XhtmlConfig extends writer2latex.base.ConfigBase {
private static final int MIN_LETTER_SPACING = 61;
private static final int PAGE_BREAK_STYLE = 62;
private static final int ALIGN_SPLITS_TO_PAGES = 63;
private static final int CSV_METADATA = 64;
protected ComplexOption xheading = addComplexOption("heading-map");
protected ComplexOption xpar = addComplexOption("paragraph-map");
@ -292,6 +293,8 @@ public class XhtmlConfig extends writer2latex.base.ConfigBase {
options[PAGINATION] = new BooleanOption("pagination","true");
options[MIN_LETTER_SPACING] = new Option("min_letter_spacing","0.15");
options[PAGE_BREAK_STYLE] = new Option("page_break_style","");
options[CSV_METADATA] = new Option("csv_metadata","");
options[CSS_INLINE] = new BooleanOption("css_inline","true");
options[ALIGN_SPLITS_TO_PAGES] = new BooleanOption("align_splits_to_pages","false");
@ -439,6 +442,8 @@ public class XhtmlConfig extends writer2latex.base.ConfigBase {
public String getXhtmlUplink() { return options[UPLINK].getString(); }
public String getXhtmlDirectoryIcon() { return options[DIRECTORY_ICON].getString(); }
public String getXhtmlDocumentIcon() { return options[DOCUMENT_ICON].getString(); }
public String getCSVMetadataFile() { return options[CSV_METADATA].getString(); }
public boolean getGreenstoneSeparation() {
if ( ((IntegerOption) options[SPLIT_LEVEL]).getValue() != 0) {
return false;