package writer2latex.rdf; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.Set; import java.util.Vector; import org.apache.jena.ontology.OntClass; import org.apache.jena.ontology.OntModel; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Resource; import org.apache.jena.vocabulary.*; import writer2latex.xhtml.XhtmlDocument; import org.apache.jena.rdf.model.Property; public class DocumentStructure { private static final String TOCITEM = "TOCItem"; private HashMap elements; private HashMap inputParts; private final String TS = "https://iph.ras.ru/text_structures#"; private final String PARSERNAME = "w2phtml"; private final String EXCERPT = "textExcerpt"; private final String TOC_ELEMENT = "TOCElement"; private final String ELENPHARTICLE = "elenphArticle"; private String documentID = "DOC_ID"; private OntModel m; private OntClass excerptClass; private OntClass elementClass; private OntClass itemClass; private OntClass elenphClass; private DocumentStructure() { this.elements = new HashMap(); this.inputParts = new HashMap(); this.m = ModelFactory.createOntologyModel(); this.excerptClass = m.createClass(TS + EXCERPT); this.elementClass = m.createClass(TS + TOC_ELEMENT); this.itemClass = m.createClass(TS + TOCITEM); this.elenphClass = m.createClass(TS + ELENPHARTICLE); } public DocumentStructure(Vector files,String fileName) { this(); this.documentID = fileName; //Iterator filesIterator = files.iterator(); //while (filesIterator.hasNext()) { //XhtmlDocument inputDoc = filesIterator.next(); for(int i = 0 ; i< files.size();i++) { XhtmlDocument inputDoc = files.get(i); DocumentPart part = new DocumentPart(inputDoc); part.setOrder(Integer.toString(i)); addPart(part); } //} addEmptyParts(); } private void addEmptyParts() { Set paths = inputParts.keySet(); String[] array = new String[paths.size()]; paths.toArray(array); for (int k = 0; k < array.length;k++) { String[] levels = array[k].split(" "); for (int i = 0; i < levels.length; i++) { if (levels[i].equals("0")) { String emptyPath = createEmptyPath(levels, i); if (!inputParts.containsKey(emptyPath)) { DocumentPart emptyPart = new DocumentPart(emptyPath); addPart(emptyPart); } } } } } private String createEmptyPath(String[] levels, int i) { StringBuilder emptyPath = new StringBuilder(); for (int j = 0; j<= i;j++) { if (j != 0) { emptyPath.append(" "); } emptyPath.append(levels[j]); } return emptyPath.toString(); } public void printModel(String fileName) { File outFile = new File(fileName + ".rdf"); FileWriter fw = null; try { outFile.createNewFile(); fw = new FileWriter(outFile); m.write(fw,"RDF/XML-ABBREV"); } catch (IOException e) { System.out.println("File couldn't be created"); e.printStackTrace(); } finally { try { fw.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void printModel(OutputStream os) { m.write(os,"RDF/XML-ABBREV"); } private void addPart(DocumentPart docExcerpt) { inputParts.put(docExcerpt.getPath(), docExcerpt); } private Resource createExcerpt(DocumentPart docExcerpt) { String name = TS + EXCERPT + "/" + PARSERNAME + "_" + documentID + docExcerpt.getSafePath(); Resource excerpt = m.createIndividual(name, excerptClass); Property htmlExcerpt = m.createProperty(TS + "htmlExcerpt"); excerpt.addLiteral(htmlExcerpt, docExcerpt.getBody()); return excerpt; } private void createElement(DocumentPart docPart) { String elementName = TS + TOC_ELEMENT + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath(); Resource element = m.createIndividual(elementName,elementClass); element.addProperty( RDFS.label, docPart.getName()); elements.put(docPart.getPath(), element); attachExcerpt(docPart, element); } private void createDocumentElement(DocumentPart docPart) { String elementName = TS + ELENPHARTICLE + "/" + PARSERNAME + "_" + documentID ; Resource element = m.createResource(elementName,elenphClass); element.addProperty( RDFS.label, docPart.getName()); elements.put(docPart.getPath(), element); attachExcerpt(docPart, element); } private void createTOCItem(DocumentPart docPart) { String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath(); Resource tocItem = m.createIndividual(tocItemName,itemClass); tocItem.addProperty( RDFS.label, docPart.getName()); Property pointsTo = m.createProperty(TS + "pointsTo"); Property itemNumber = m.createProperty(TS + "itemNumber"); Property hasTOCItem = m.createProperty(TS + "hasTOCItem"); tocItem.addLiteral(itemNumber, docPart.getNumber()); m.add(tocItem, pointsTo, elements.get(docPart.getPath())); if (!docPart.getPath().isEmpty()) { Resource parent = elements.get(docPart.getParentPath()); m.add(parent, hasTOCItem, tocItem); } } private void attachExcerpt(DocumentPart docPart, Resource element) { if (docPart.getBody().isEmpty()) { return; } Resource excerpt = createExcerpt(docPart); excerpt.addProperty( RDFS.label, docPart.getName()); Property hasText = m.createProperty(TS + "hasText"); element.addProperty(hasText, excerpt); } public void createTree() { createElements(); createTOCItems(); } private void createTOCItems() { Set paths = inputParts.keySet(); for (String path : paths) { DocumentPart part = inputParts.get(path); if (!part.getNumber().equals("")) { createTOCItem(part); } } } private void createElements() { Set paths = inputParts.keySet(); for (String path : paths) { DocumentPart part = inputParts.get(path); if (part.getPath().isEmpty()) { createDocumentElement(part); } else { createElement(part); } } } public void applyMetadata(Metadata metadata) { for (DocumentPart part: inputParts.values()) { String partOrder = part.getOrder(); //Set blank main doc number as it is in CSV if (partOrder.equals("0")) { partOrder = ""; } } } }