144 lines
4.2 KiB
Java
144 lines
4.2 KiB
Java
|
package writer2latex.rdf;
|
||
|
|
||
|
import java.util.HashMap;
|
||
|
import java.util.Iterator;
|
||
|
import java.util.Set;
|
||
|
import java.util.Vector;
|
||
|
|
||
|
import org.apache.jena.rdf.model.Model;
|
||
|
import org.apache.jena.rdf.model.ModelFactory;
|
||
|
import org.apache.jena.rdf.model.Resource;
|
||
|
|
||
|
import writer2latex.xhtml.XhtmlDocument;
|
||
|
|
||
|
import org.apache.jena.rdf.model.Property;
|
||
|
|
||
|
|
||
|
public class DocumentStructure {
|
||
|
private static final String TOCITEM = "TOCItem";
|
||
|
private HashMap<String, Resource> elements;
|
||
|
private HashMap<String, DocumentPart> inputParts;
|
||
|
private final String TS = "https://iph.ras.ru/text_structures#";
|
||
|
private final String PARSERNAME = "w2phtml";
|
||
|
private final String EXCERPT = "Excerpt";
|
||
|
private final String TOC_ELEMENT = "TOCElement";
|
||
|
private String documentID = "DOC_ID";
|
||
|
|
||
|
Model m;
|
||
|
|
||
|
private DocumentStructure() {
|
||
|
this.elements = new HashMap<String, Resource>();
|
||
|
this.inputParts = new HashMap<String, DocumentPart>();
|
||
|
this.m = ModelFactory.createDefaultModel();
|
||
|
}
|
||
|
public DocumentStructure(Vector<XhtmlDocument> files) {
|
||
|
this();
|
||
|
System.out.println("DocStructure");
|
||
|
Iterator<XhtmlDocument> filesIterator = files.iterator();
|
||
|
while (filesIterator.hasNext()) {
|
||
|
XhtmlDocument inputDoc = filesIterator.next();
|
||
|
DocumentPart part = new DocumentPart(inputDoc);
|
||
|
addPart(part);
|
||
|
}
|
||
|
addEmptyParts();
|
||
|
}
|
||
|
|
||
|
private void addEmptyParts() {
|
||
|
Set<String> paths = inputParts.keySet();
|
||
|
String[] array = new String[paths.size()];
|
||
|
paths.toArray(array);
|
||
|
for (int k = 0; k < array.length;k++) {
|
||
|
String[] levels = array[k].split(" ");
|
||
|
for (int i = 0; i < levels.length; i++) {
|
||
|
if (levels[i].equals("0")) {
|
||
|
String emptyPath = createEmptyPath(levels, i);
|
||
|
if (!inputParts.containsKey(emptyPath)) {
|
||
|
System.out.println("empty path added " + emptyPath);
|
||
|
DocumentPart emptyPart = new DocumentPart(emptyPath);
|
||
|
addPart(emptyPart);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
|
||
|
}
|
||
|
private String createEmptyPath(String[] levels, int i) {
|
||
|
StringBuilder emptyPath = new StringBuilder();
|
||
|
for (int j = 0; j<= i;j++) {
|
||
|
if (j != 0) {
|
||
|
emptyPath.append(" ");
|
||
|
}
|
||
|
emptyPath.append(levels[j]);
|
||
|
}
|
||
|
return emptyPath.toString();
|
||
|
}
|
||
|
public void printModel() {
|
||
|
m.write(System.out, "RDF/XML-ABBREV");
|
||
|
}
|
||
|
|
||
|
private void addPart(DocumentPart docExcerpt) {
|
||
|
inputParts.put(docExcerpt.getPath(), docExcerpt);
|
||
|
}
|
||
|
|
||
|
private Resource createExcerpt(DocumentPart docExcerpt) {
|
||
|
Resource excerpt = m.createResource(TS + EXCERPT + "/" + PARSERNAME + "_" + documentID + docExcerpt.getSafePath());
|
||
|
Property htmlExcerpt = m.createProperty(TS + "htmlExcerpt");
|
||
|
excerpt.addProperty(htmlExcerpt, docExcerpt.getBody());
|
||
|
|
||
|
return excerpt;
|
||
|
}
|
||
|
|
||
|
private void createElement(DocumentPart docPart) {
|
||
|
String elementName = TS + TOC_ELEMENT + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath();
|
||
|
Resource element = m.createResource(elementName);
|
||
|
elements.put(docPart.getPath(), element);
|
||
|
attachExcerpt(docPart, element);
|
||
|
}
|
||
|
private void createTOCItem(DocumentPart docPart) {
|
||
|
String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath();
|
||
|
Resource tocItem = m.createResource(tocItemName);
|
||
|
Property pointsTo = m.createProperty(TS + "pointsTo");
|
||
|
Property itemNumber = m.createProperty(TS + "itemNumber");
|
||
|
Property hasTOCItem = m.createProperty(TS + "hasTOCItem");
|
||
|
tocItem.addLiteral(itemNumber, docPart.getNumber());
|
||
|
tocItem.addProperty(pointsTo, elements.get(docPart.getPath()));
|
||
|
|
||
|
|
||
|
Resource parent = elements.get(docPart.getParentPath());
|
||
|
parent.addProperty(hasTOCItem, tocItem);
|
||
|
|
||
|
}
|
||
|
|
||
|
private void attachExcerpt(DocumentPart docPart, Resource element) {
|
||
|
if (docPart.getBody().isEmpty()) {
|
||
|
return;
|
||
|
}
|
||
|
Resource excerpt = createExcerpt(docPart);
|
||
|
Property hasText = m.createProperty(TS + "hasText");
|
||
|
element.addProperty(hasText, excerpt);
|
||
|
}
|
||
|
|
||
|
public void createTree() {
|
||
|
createElements();
|
||
|
createTOCItems();
|
||
|
}
|
||
|
|
||
|
private void createTOCItems() {
|
||
|
Set<String> paths = inputParts.keySet();
|
||
|
for (String path : paths) {
|
||
|
DocumentPart part = inputParts.get(path);
|
||
|
if (!part.getNumber().equals("")) {
|
||
|
createTOCItem(part);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
private void createElements() {
|
||
|
Set<String> paths = inputParts.keySet();
|
||
|
for (String path : paths) {
|
||
|
createElement(inputParts.get(path));
|
||
|
}
|
||
|
}
|
||
|
}
|