diff --git a/src/main/java/writer2latex/rdf/DocumentPart.java b/src/main/java/writer2latex/rdf/DocumentPart.java new file mode 100644 index 0000000..ecd8537 --- /dev/null +++ b/src/main/java/writer2latex/rdf/DocumentPart.java @@ -0,0 +1,91 @@ +package writer2latex.rdf; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSSerializer; + +import writer2latex.xhtml.XhtmlDocument; + +public class DocumentPart { + + private XhtmlDocument excerptDoc; + private String path; + private String itemNumber; + private String body; + private String parentPath; + private String name; + + public DocumentPart(XhtmlDocument document) { + this.excerptDoc = document; + extractPath(); + extractName(); + extractNumber(); + extractBody(); + calculateParentPath(); + } + public DocumentPart(String path) { + this.path = path; + this.name = ""; + extractNumber(); + this.body = ""; + calculateParentPath(); + } + private void extractName() { + Element excerptContentNode = excerptDoc.getContentNode(); + this.name = excerptContentNode.getAttribute("name"); + } + private void extractNumber() { + itemNumber = path.replaceAll("([0-9]+ )+", ""); + } + public String getPath() { + return path; + } + public String getSafePath(){ + return path.replaceAll(" ","_"); + } + public String getNumber() { + return itemNumber; + } + public String getBody() { + return body; + } + public String getParentPath() { + return parentPath; + } + public String getName() { + return name; + } + + private void extractPath() { + Element excerptContentNode = excerptDoc.getContentNode(); + this.path = excerptContentNode.getAttribute("path"); + } + + private void calculateParentPath() { + if(path.length() == 1) { + parentPath = ""; + } + parentPath = path.replaceAll(" [0-9]+$", ""); + } + private void extractBody() { + Element excerptContentNode = excerptDoc.getContentNode(); + StringBuilder bodyBuilder; + Document document = excerptContentNode.getOwnerDocument(); + DOMImplementationLS domImplLS = (DOMImplementationLS) document.getImplementation(); + LSSerializer serializer = domImplLS.createLSSerializer(); + serializer.getDomConfig().setParameter("xml-declaration", false); + bodyBuilder = new StringBuilder(); + NodeList excerptContentNodes = excerptContentNode.getChildNodes(); + int i = 0; + while (excerptContentNodes.getLength() > i) { + Node child = excerptContentNodes.item(i); + bodyBuilder.append(serializer.writeToString(child)); + i++; + + } + this.body = bodyBuilder.toString(); + } +} diff --git a/src/main/java/writer2latex/rdf/DocumentStructure.java b/src/main/java/writer2latex/rdf/DocumentStructure.java new file mode 100644 index 0000000..5fef7bc --- /dev/null +++ b/src/main/java/writer2latex/rdf/DocumentStructure.java @@ -0,0 +1,143 @@ +package writer2latex.rdf; + +import java.util.HashMap; +import java.util.Iterator; +import java.util.Set; +import java.util.Vector; + +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.Resource; + +import writer2latex.xhtml.XhtmlDocument; + +import org.apache.jena.rdf.model.Property; + + +public class DocumentStructure { + private static final String TOCITEM = "TOCItem"; + private HashMap elements; + private HashMap inputParts; + private final String TS = "https://iph.ras.ru/text_structures#"; + private final String PARSERNAME = "w2phtml"; + private final String EXCERPT = "Excerpt"; + private final String TOC_ELEMENT = "TOCElement"; + private String documentID = "DOC_ID"; + + Model m; + + private DocumentStructure() { + this.elements = new HashMap(); + this.inputParts = new HashMap(); + this.m = ModelFactory.createDefaultModel(); + } + public DocumentStructure(Vector files) { + this(); + System.out.println("DocStructure"); + Iterator filesIterator = files.iterator(); + while (filesIterator.hasNext()) { + XhtmlDocument inputDoc = filesIterator.next(); + DocumentPart part = new DocumentPart(inputDoc); + addPart(part); + } + addEmptyParts(); + } + + private void addEmptyParts() { + Set paths = inputParts.keySet(); + String[] array = new String[paths.size()]; + paths.toArray(array); + for (int k = 0; k < array.length;k++) { + String[] levels = array[k].split(" "); + for (int i = 0; i < levels.length; i++) { + if (levels[i].equals("0")) { + String emptyPath = createEmptyPath(levels, i); + if (!inputParts.containsKey(emptyPath)) { + System.out.println("empty path added " + emptyPath); + DocumentPart emptyPart = new DocumentPart(emptyPath); + addPart(emptyPart); + } + } + } + } + + + } + private String createEmptyPath(String[] levels, int i) { + StringBuilder emptyPath = new StringBuilder(); + for (int j = 0; j<= i;j++) { + if (j != 0) { + emptyPath.append(" "); + } + emptyPath.append(levels[j]); + } + return emptyPath.toString(); + } + public void printModel() { + m.write(System.out, "RDF/XML-ABBREV"); + } + + private void addPart(DocumentPart docExcerpt) { + inputParts.put(docExcerpt.getPath(), docExcerpt); + } + + private Resource createExcerpt(DocumentPart docExcerpt) { + Resource excerpt = m.createResource(TS + EXCERPT + "/" + PARSERNAME + "_" + documentID + docExcerpt.getSafePath()); + Property htmlExcerpt = m.createProperty(TS + "htmlExcerpt"); + excerpt.addProperty(htmlExcerpt, docExcerpt.getBody()); + + return excerpt; + } + + private void createElement(DocumentPart docPart) { + String elementName = TS + TOC_ELEMENT + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath(); + Resource element = m.createResource(elementName); + elements.put(docPart.getPath(), element); + attachExcerpt(docPart, element); + } + private void createTOCItem(DocumentPart docPart) { + String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + documentID + docPart.getSafePath(); + Resource tocItem = m.createResource(tocItemName); + Property pointsTo = m.createProperty(TS + "pointsTo"); + Property itemNumber = m.createProperty(TS + "itemNumber"); + Property hasTOCItem = m.createProperty(TS + "hasTOCItem"); + tocItem.addLiteral(itemNumber, docPart.getNumber()); + tocItem.addProperty(pointsTo, elements.get(docPart.getPath())); + + + Resource parent = elements.get(docPart.getParentPath()); + parent.addProperty(hasTOCItem, tocItem); + + } + + private void attachExcerpt(DocumentPart docPart, Resource element) { + if (docPart.getBody().isEmpty()) { + return; + } + Resource excerpt = createExcerpt(docPart); + Property hasText = m.createProperty(TS + "hasText"); + element.addProperty(hasText, excerpt); + } + + public void createTree() { + createElements(); + createTOCItems(); + } + + private void createTOCItems() { + Set paths = inputParts.keySet(); + for (String path : paths) { + DocumentPart part = inputParts.get(path); + if (!part.getNumber().equals("")) { + createTOCItem(part); + } + } + } + + private void createElements() { + Set paths = inputParts.keySet(); + for (String path : paths) { + createElement(inputParts.get(path)); + } + } +} diff --git a/src/main/java/writer2latex/rdf/RDFConverter.java b/src/main/java/writer2latex/rdf/RDFConverter.java index 6408a2f..0dfb25a 100644 --- a/src/main/java/writer2latex/rdf/RDFConverter.java +++ b/src/main/java/writer2latex/rdf/RDFConverter.java @@ -2,57 +2,122 @@ package writer2latex.rdf; import java.io.IOException; import java.io.InputStream; +import java.io.StringWriter; +import java.util.HashMap; import java.util.Iterator; -import org.w3c.dom.Element; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.events.Namespace; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.TransformerFactoryConfigurationError; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.ModelFactory; +import org.apache.jena.rdf.model.Property; +import org.apache.jena.rdf.model.Resource; +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.w3c.dom.ls.DOMImplementationLS; +import org.w3c.dom.ls.LSSerializer; + +import com.sun.org.apache.xml.internal.utils.NameSpace; + +import pro.litvinovg.xml.Debug; import writer2latex.api.ConverterResult; import writer2latex.api.OutputFile; import writer2latex.base.ConverterResultImpl; -import writer2latex.epub.EPUBWriter; -import writer2latex.xhtml.Html5Converter; -import writer2latex.xhtml.Xhtml11Converter; +import writer2latex.xhtml.Converter; import writer2latex.xhtml.XhtmlDocument; -public final class RDFConverter extends Xhtml11Converter { +public final class RDFConverter extends Converter { // Constructor public RDFConverter() { - super(); + super(XhtmlDocument.HTML5); this.isRDF = true; } @Override public ConverterResult convert(InputStream is, String sTargetFileName) throws IOException { setOpenPubStructure(false); ConverterResult xhtmlResult = super.convert(is, "chapter"); - Iterator excerptIterator = this.outFiles.iterator(); - while (excerptIterator.hasNext()) { - XhtmlDocument excerptDoc = excerptIterator.next(); - Element excerptContentNode = excerptDoc.getContentNode(); - String exPath = excerptContentNode.getAttribute("path"); - System.out.println(exPath); - } - /* - * Iterator iterator = xhtmlResult.iterator(); - * while(iterator.hasNext()) { OutputFile of = iterator.next(); - * System.out.println(of.getFileName()); } - */ + createPackage(); + + return createPackage(xhtmlResult,sTargetFileName); } @Override public ConverterResult convert(org.w3c.dom.Document dom, String sTargetFileName, boolean bDestructive) throws IOException { ConverterResult xhtmlResult = super.convert(dom, "chapter", bDestructive); - //System.out.println(this.outFiles.size()); - /* - * Iterator iterator = xhtmlResult.iterator(); - * while(iterator.hasNext()) { OutputFile outfile = iterator.next(); - * System.out.println(outfile.isMasterDocument() + outfile.getFileName()); } - */ return createPackage(xhtmlResult,sTargetFileName); } - private ConverterResult createPackage(ConverterResult xhtmlResult, String sTargetFileName) { + private ConverterResult createPackage() { + createRDF(); + return converterResult; + + } + + private void createRDF() { + + + /* + * Resource root = m.createResource(ts + "elenphArticle"); + * + * Property itemNumber = m.createProperty(ts + "itemNumber"); Property hasItem = + * m.createProperty(ts + "hasItem"); Property hasText = m.createProperty(ts + + * "hasText"); + * + * elements.put("root", root); + */ + + DocumentStructure structure = new DocumentStructure(this.outFiles); + + structure.createTree(); + structure.printModel(); + /* + * if (elements.containsKey(path)) { element = elements.get(path); } else { + * element = m.createResource(ts + "Element/" + path); } + * element.addProperty(hasText, body.toString()); + * + * tocItem = m.createResource(ts + "TOCItem/" + path); + * tocItem.addProperty(itemNumber, order); String parentPath = + * calculateParentPath(path); System.out.println("parentPath " + parentPath); + * System.out.println("exPath " + path); Resource parent; if + * (elements.containsKey(parentPath)) { parent = elements.get(parentPath); } + * else { parent = m.createResource(ts + "Element/" + parentPath); } + * m.add(parent, hasItem, tocItem) + */; + + /* Resource root = m.createResource(ts + "Element"); + Property P = m.createProperty(ts + "TOCItem"); + Property Q = m.createProperty(nsB + "Q"); + Resource y = m.createResource(ts + "Excerpt"); + Resource z = m.createResource(ts + "z");*/ + +/* m.add(root, P, y); + m.add(y, Q, z); + m.setNsPrefix("nsA", ts); + m.write(System.out, "RDF/XML-ABBREV");*/ + + } + + + + + + + private ConverterResult createPackage(ConverterResult xhtmlResult, String sTargetFileName) { ConverterResultImpl rdfResult = new ConverterResultImpl(); RDFWriter rdfWriter = new RDFWriter(xhtmlResult,sTargetFileName,3,getXhtmlConfig()); rdfResult.addDocument(rdfWriter);