package writer2latex.rdf; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.Set; import java.util.Vector; import org.apache.jena.ontology.OntClass; import org.apache.jena.ontology.OntModel; import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.Resource; import org.apache.jena.vocabulary.*; import writer2latex.xhtml.XhtmlDocument; import org.apache.jena.rdf.model.Property; public class DocumentStructure { private static final String TOCITEM = "TOCItem"; private HashMap elements; private HashMap inputParts; private final String TS = "https://iph.ras.ru/text_structures#"; private final String PARSERNAME = "w2phtml"; private final String EXCERPT = "elenphExcerpt"; private final String TOC_ELEMENT = "TOCElement"; private final String ELENPHARTICLE = "elenphArticle"; private String documentID = "DOC_ID"; private OntModel m; private OntClass excerptClass; private OntClass elementClass; private OntClass itemClass; private OntClass elenphClass; private DocumentStructure() { this.elements = new HashMap(); this.inputParts = new HashMap(); this.m = ModelFactory.createOntologyModel(); this.excerptClass = m.createClass(TS + EXCERPT); this.elementClass = m.createClass(TS + TOC_ELEMENT); this.itemClass = m.createClass(TS + TOCITEM); this.elenphClass = m.createClass(TS + ELENPHARTICLE); } public DocumentStructure(Vector files,String fileName) { this(); this.documentID = fileName; for(int i = 0 ; i< files.size();i++) { XhtmlDocument inputDoc = files.get(i); DocumentPart part = new DocumentPart(inputDoc); part.setOrder(Integer.toString(i)); addPart(part); } addEmptyParts(); } private void addEmptyParts() { Set paths = inputParts.keySet(); String[] array = new String[paths.size()]; paths.toArray(array); for (int k = 0; k < array.length;k++) { String[] levels = array[k].split(" "); for (int i = 0; i < levels.length; i++) { if (levels[i].equals("0")) { String emptyPath = createEmptyPath(levels, i); if (!inputParts.containsKey(emptyPath)) { DocumentPart emptyPart = new DocumentPart(emptyPath); addPart(emptyPart); } } } } } private String createEmptyPath(String[] levels, int i) { StringBuilder emptyPath = new StringBuilder(); for (int j = 0; j<= i;j++) { if (j != 0) { emptyPath.append(" "); } emptyPath.append(levels[j]); } return emptyPath.toString(); } public void printModel(String fileName) { File outFile = new File(fileName + ".rdf"); FileWriter fw = null; try { outFile.createNewFile(); fw = new FileWriter(outFile); m.write(fw,"RDF/XML-ABBREV"); } catch (IOException e) { System.out.println("File couldn't be created"); e.printStackTrace(); } finally { try { fw.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } public void printModel(OutputStream os) { m.write(os,"RDF/XML-ABBREV"); } private void addPart(DocumentPart docExcerpt) { inputParts.put(docExcerpt.getPath(), docExcerpt); } private Resource createExcerpt(DocumentPart docExcerpt) { String name = TS + EXCERPT + "/" + PARSERNAME + "_" + documentID + docExcerpt.getSafePath(); Resource excerpt = m.createIndividual(name, excerptClass); if (!docExcerpt.getBody().isEmpty()) { Property htmlExcerpt = m.createProperty(TS + "htmlExcerpt"); excerpt.addLiteral(htmlExcerpt, docExcerpt.getBody()); } return excerpt; } private void createElement(DocumentPart docPart) { String elementName = TS + TOC_ELEMENT + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath(); Resource element = m.createIndividual(elementName,elementClass); element.addProperty( RDFS.label, docPart.getName()); elements.put(docPart.getPath(), element); attachExcerpt(docPart, element); } private void createDocumentElement(DocumentPart docPart) { String elementName = TS + ELENPHARTICLE + "/" + PARSERNAME + "_" + documentID ; Resource element = m.createResource(elementName,elenphClass); element.addProperty( RDFS.label, docPart.getName()); addMetadataProperties(element,docPart.getMetadata()); elements.put(docPart.getPath(), element); attachExcerpt(docPart, element); } private void addMetadataProperties(Resource resource, HashMap> metadata) { Set names = metadata.keySet(); for (String name : names) { Set values = metadata.get(name); if (values != null) { for (String value : values) { addMetadata(resource,name,value); } } } } private void addMetadata(Resource resource, String name, String value) { if (isNotBlacklisted(name)) { name = convertName(name); if (isDefinedInOntology(resource,name)) { Property property = m.createProperty(TS + name); resource.addProperty( property, value); } } } private boolean isDefinedInOntology(Resource resource, String name) { String nameSpace = resource.getNameSpace(); if (nameSpace.contains(TS + EXCERPT)) { if (name.equals("author") || name.equals("bibliography") || name.equals("keywords") || name.equals("works") || name.equals("affiliation") ) { return true; } } else if (nameSpace.contains(TS + ELENPHARTICLE)) { if (name.equals("doi") || name.equals("firstPublication") || //name.equals("yearAndMonth") || name.equals("year") || name.equals("issue") ) { return true; } } System.out.println(resource.getNameSpace() + " " + name); System.out.println("rightNamespace" + TS + EXCERPT ); return false; } private String convertName(String name) { if (name.equals("Affiliation")){ return "affiliation"; } else if (name.equals("DOI")){ return "doi"; } else if (name.equals("1st-edition")){ return "firstPublication"; } else if (name.equals("year")){ return "yearAndMonth"; } else if (name.equals("year.short")){ return "year"; }else return name; } private boolean isNotBlacklisted(String name) { if (name.equals("Filename") || name.equals("Section") || name.equals("dc.Title") || name.equals("subtitle") || name.equals("dc.Identifier") ) { return false; } return true; } private void createTOCItem(DocumentPart docPart) { String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath(); Resource tocItem = m.createIndividual(tocItemName,itemClass); tocItem.addProperty( RDFS.label, docPart.getName()); Property pointsTo = m.createProperty(TS + "pointsTo"); Property itemNumber = m.createProperty(TS + "itemNumber"); Property hasTOCItem = m.createProperty(TS + "hasTOCItem"); tocItem.addLiteral(itemNumber, docPart.getNumber()); m.add(tocItem, pointsTo, elements.get(docPart.getPath())); if (!docPart.getPath().isEmpty()) { Resource parent = elements.get(docPart.getParentPath()); m.add(parent, hasTOCItem, tocItem); } } private void attachExcerpt(DocumentPart docPart, Resource element) { if (docPart.isEmpty()) { return; } Resource excerpt = createExcerpt(docPart); excerpt.addProperty( RDFS.label, docPart.getName()); Property hasText = m.createProperty(TS + "hasText"); element.addProperty(hasText, excerpt); if (!docPart.isMasterPart()) { addMetadataProperties(excerpt,docPart.getMetadata()); } } public void createTree() { createElements(); createTOCItems(); } private void createTOCItems() { Set paths = inputParts.keySet(); for (String path : paths) { DocumentPart part = inputParts.get(path); if (!part.getNumber().equals("")) { createTOCItem(part); } } } private void createElements() { Set paths = inputParts.keySet(); for (String path : paths) { DocumentPart part = inputParts.get(path); if (part.getPath().isEmpty()) { createDocumentElement(part); } else { createElement(part); } } } public void applyMetadata(Metadata metadata) { for (DocumentPart part: inputParts.values()) { metadata.apply(part); } } }