Improvements for RDF converter

This commit is contained in:
Georgy Litvinov 2020-02-25 23:42:59 +01:00
parent 6c90059fb0
commit 2010bd3daf
3 changed files with 68 additions and 59 deletions

View file

@ -56,6 +56,9 @@ public class DocumentPart {
return parentPath;
}
public String getName() {
if (name.isEmpty()) {
return "NONAME";
}
return name;
}

View file

@ -1,17 +1,25 @@
package writer2latex.rdf;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;
import java.util.Vector;
import org.apache.jena.ontology.OntClass;
import org.apache.jena.ontology.OntModel;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.*;
import writer2latex.xhtml.XhtmlDocument;
import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFWriter;
public class DocumentStructure {
@ -25,12 +33,23 @@ public class DocumentStructure {
private final String ELENPHARTICLE = "elenphArticle";
private String documentID = "DOC_ID";
Model m;
private OntModel m;
private OntClass excerptClass;
private OntClass elementClass;
private OntClass itemClass;
private OntClass elenphClass;
private DocumentStructure() {
this.elements = new HashMap<String, Resource>();
this.inputParts = new HashMap<String, DocumentPart>();
this.m = ModelFactory.createDefaultModel();
this.m = ModelFactory.createOntologyModel();
this.excerptClass = m.createClass(TS + EXCERPT);
this.elementClass = m.createClass(TS + TOC_ELEMENT);
this.itemClass = m.createClass(TS + TOCITEM);
this.elenphClass = m.createClass(TS + ELENPHARTICLE);
}
public DocumentStructure(Vector<XhtmlDocument> files) {
this();
@ -60,6 +79,7 @@ public class DocumentStructure {
}
}
}
private String createEmptyPath(String[] levels, int i) {
StringBuilder emptyPath = new StringBuilder();
for (int j = 0; j<= i;j++) {
@ -74,41 +94,72 @@ public class DocumentStructure {
m.write(System.out, "RDF/XML-ABBREV");
}
public void printModel(String fileName) {
//RDFWriter writer = m.getWriter();
//writer.setProperty("showXmlDeclaration","true");
File outFile = new File(fileName + ".rdf");
FileWriter fw = null;
try {
outFile.createNewFile();
fw = new FileWriter(outFile);
m.write(fw,"RDF/XML-ABBREV");
} catch (IOException e) {
System.out.println("File couldn't be created");
e.printStackTrace();
} finally {
try {
fw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
private void addPart(DocumentPart docExcerpt) {
inputParts.put(docExcerpt.getPath(), docExcerpt);
}
private Resource createExcerpt(DocumentPart docExcerpt) {
Resource excerpt = m.createResource(TS + EXCERPT + "/" + PARSERNAME + "_" + documentID + docExcerpt.getSafePath());
String name = TS + EXCERPT + "/" + PARSERNAME + "_" + documentID + docExcerpt.getSafePath();
Resource excerpt = m.createIndividual(name, excerptClass);
Property htmlExcerpt = m.createProperty(TS + "htmlExcerpt");
excerpt.addProperty(htmlExcerpt, docExcerpt.getBody());
excerpt.addLiteral(htmlExcerpt, docExcerpt.getBody());
return excerpt;
}
private void createElement(DocumentPart docPart) {
String elementName = TS + TOC_ELEMENT + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath();
Resource element = m.createResource(elementName);
Resource element = m.createIndividual(elementName,elementClass);
element.addProperty( RDFS.label, docPart.getName());
elements.put(docPart.getPath(), element);
attachExcerpt(docPart, element);
}
private void createDocumentElement(DocumentPart docPart) {
String elementName = TS + ELENPHARTICLE + "/" + PARSERNAME + "_" + documentID ;
Resource element = m.createResource(elementName);
Resource element = m.createResource(elementName,elenphClass);
element.addProperty( RDFS.label, docPart.getName());
elements.put(docPart.getPath(), element);
attachExcerpt(docPart, element);
}
private void createTOCItem(DocumentPart docPart) {
String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath();
Resource tocItem = m.createResource(tocItemName);
Resource tocItem = m.createIndividual(tocItemName,itemClass);
tocItem.addProperty( RDFS.label, docPart.getName());
Property pointsTo = m.createProperty(TS + "pointsTo");
Property itemNumber = m.createProperty(TS + "itemNumber");
Property hasTOCItem = m.createProperty(TS + "hasTOCItem");
tocItem.addLiteral(itemNumber, docPart.getNumber());
tocItem.addProperty(pointsTo, elements.get(docPart.getPath()));
m.add(tocItem, pointsTo, elements.get(docPart.getPath()));
if (!docPart.getPath().isEmpty()) {
Resource parent = elements.get(docPart.getParentPath());
m.add(parent, hasTOCItem, tocItem);
@ -121,6 +172,8 @@ public class DocumentStructure {
return;
}
Resource excerpt = createExcerpt(docPart);
excerpt.addProperty( RDFS.label, docPart.getName());
Property hasText = m.createProperty(TS + "hasText");
element.addProperty(hasText, excerpt);
}

View file

@ -51,8 +51,7 @@ public final class RDFConverter extends Converter {
@Override public ConverterResult convert(InputStream is, String sTargetFileName) throws IOException {
setOpenPubStructure(false);
ConverterResult xhtmlResult = super.convert(is, "chapter");
createPackage();
createRDF(sTargetFileName);
return createPackage(xhtmlResult,sTargetFileName);
}
@ -62,61 +61,15 @@ public final class RDFConverter extends Converter {
return createPackage(xhtmlResult,sTargetFileName);
}
private ConverterResult createPackage() {
createRDF();
return converterResult;
}
private void createRDF() {
/*
* Resource root = m.createResource(ts + "elenphArticle");
*
* Property itemNumber = m.createProperty(ts + "itemNumber"); Property hasItem =
* m.createProperty(ts + "hasItem"); Property hasText = m.createProperty(ts +
* "hasText");
*
* elements.put("root", root);
*/
private void createRDF(String sTargetFileName) {
DocumentStructure structure = new DocumentStructure(this.outFiles);
structure.createTree();
structure.printModel();
/*
* if (elements.containsKey(path)) { element = elements.get(path); } else {
* element = m.createResource(ts + "Element/" + path); }
* element.addProperty(hasText, body.toString());
*
* tocItem = m.createResource(ts + "TOCItem/" + path);
* tocItem.addProperty(itemNumber, order); String parentPath =
* calculateParentPath(path); System.out.println("parentPath " + parentPath);
* System.out.println("exPath " + path); Resource parent; if
* (elements.containsKey(parentPath)) { parent = elements.get(parentPath); }
* else { parent = m.createResource(ts + "Element/" + parentPath); }
* m.add(parent, hasItem, tocItem)
*/;
/* Resource root = m.createResource(ts + "Element");
Property P = m.createProperty(ts + "TOCItem");
Property Q = m.createProperty(nsB + "Q");
Resource y = m.createResource(ts + "Excerpt");
Resource z = m.createResource(ts + "z");*/
/* m.add(root, P, y);
m.add(y, Q, z);
m.setNsPrefix("nsA", ts);
m.write(System.out, "RDF/XML-ABBREV");*/
structure.printModel(sTargetFileName);
}
private ConverterResult createPackage(ConverterResult xhtmlResult, String sTargetFileName) {
ConverterResultImpl rdfResult = new ConverterResultImpl();
RDFWriter rdfWriter = new RDFWriter(xhtmlResult,sTargetFileName,3,getXhtmlConfig());