diff --git a/src/main/java/pro/litvinovg/w2phtml/gui/ConversionExecutor.java b/src/main/java/pro/litvinovg/w2phtml/gui/ConversionExecutor.java index a46ce5f..a1b4f2d 100644 --- a/src/main/java/pro/litvinovg/w2phtml/gui/ConversionExecutor.java +++ b/src/main/java/pro/litvinovg/w2phtml/gui/ConversionExecutor.java @@ -25,7 +25,7 @@ import org.slf4j.LoggerFactory; import com.sun.star.uno.XComponentContext; import w2phtml.Application; -import w2phtml.rdf.Metadata; +import w2phtml.rdf.MetadataContainer; public class ConversionExecutor { diff --git a/src/main/java/w2phtml/rdf/DocumentPart.java b/src/main/java/w2phtml/rdf/DocumentPart.java index e9fc3de..3ad4eeb 100644 --- a/src/main/java/w2phtml/rdf/DocumentPart.java +++ b/src/main/java/w2phtml/rdf/DocumentPart.java @@ -37,9 +37,9 @@ public class DocumentPart { private String parentPath; private String name = null; private String order = null; - private Metadata metadata = null; + private MetadataContainer metadata = null; - public DocumentPart(XhtmlDocument document,Metadata metadata) { + public DocumentPart(XhtmlDocument document,MetadataContainer metadata) { this.excerptDoc = document; this.metadata = metadata; extractPath(); @@ -50,7 +50,7 @@ public class DocumentPart { extractAnnotationMetadata(); } - public DocumentPart(String path, Metadata metadata) { + public DocumentPart(String path, MetadataContainer metadata) { this.path = path; this.metadata = metadata; this.name = ""; diff --git a/src/main/java/w2phtml/rdf/Metadata.java b/src/main/java/w2phtml/rdf/MetadataContainer.java similarity index 94% rename from src/main/java/w2phtml/rdf/Metadata.java rename to src/main/java/w2phtml/rdf/MetadataContainer.java index 8febdfa..3bce9ff 100644 --- a/src/main/java/w2phtml/rdf/Metadata.java +++ b/src/main/java/w2phtml/rdf/MetadataContainer.java @@ -19,28 +19,28 @@ import org.slf4j.LoggerFactory; import com.opencsv.CSVReaderHeaderAware; import com.opencsv.exceptions.CsvValidationException; -public class Metadata { +public class MetadataContainer { private static final String SUBTITLE = "subtitle"; private static final String FILENAME = "Filename"; private static final String SECTION = "Section"; - private static final Logger logger = LoggerFactory.getLogger(Metadata.class); + private static final Logger logger = LoggerFactory.getLogger(MetadataContainer.class); private HashMap>> sectionsMetadata; - public Metadata() { + public MetadataContainer() { sectionsMetadata = new HashMap>>(); } - public void read(String metadataFilePath) { - if (metadataFilePath == null) { + public void read(String filePath) { + if (filePath == null) { return; } - File file = new File(metadataFilePath); + File file = new File(filePath); if (!file.exists() || !file.canRead()) { return; } - readCSVFile(metadataFilePath); + readCSVFile(filePath); } private void readCSVFile(String filePath) { diff --git a/src/main/java/w2phtml/rdf/DocumentStructure.java b/src/main/java/w2phtml/rdf/RDFDocument.java similarity index 84% rename from src/main/java/w2phtml/rdf/DocumentStructure.java rename to src/main/java/w2phtml/rdf/RDFDocument.java index 61ed2cd..da17130 100644 --- a/src/main/java/w2phtml/rdf/DocumentStructure.java +++ b/src/main/java/w2phtml/rdf/RDFDocument.java @@ -20,6 +20,7 @@ import org.apache.jena.rdf.model.Resource; import org.apache.jena.vocabulary.*; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static w2phtml.util.Transliteration.*; import w2phtml.xhtml.XhtmlConfig; import w2phtml.xhtml.XhtmlDocument; @@ -27,36 +28,32 @@ import w2phtml.xhtml.XhtmlDocument; import org.apache.jena.rdf.model.Property; -public class DocumentStructure { +public class RDFDocument { + private static final String FORMAT_RDF = "RDF/XML-ABBREV"; private static final String AFFILIATED_ORGANIZATION_POSTAL_CODE = "affiliated organization postal code"; - private static final String AFFILIATED_ORGANIZATION_ADDRESS = "affiliated organization address"; - private static final String AFFILIATED_ORGANIZATION_OFFICIAL_NAME = "affiliated organization official name"; - private static final String AFFILIATED_ORGANIZATION_NAME = "affiliated organization name"; - private static final String AUTHOR_INITIALS = "author initials"; - private static final String AUTHOR_EMAIL = "author email"; - private static final String AUTHOR_FAMILY = "author family"; - - private static final Logger logger = LoggerFactory.getLogger(DocumentStructure.class); - + private static final Logger logger = LoggerFactory.getLogger(RDFDocument.class); private static final String AUTHOR_GIVEN_NAME = "author given name"; private static final String MODIFICATION_TIME = "modificationTime"; private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt"; private static final String TOCITEM = "TOCItem"; private static final String TS = "https://litvinovg.pro/text_structures#"; + private static final String POINTS_TO = TS + "pointsTo"; + private static final String ITEM_NUMBER = TS + "itemNumber"; + private static final String HAS_TOC_ITEM = TS + "hasTOCItem"; + private static final String HAS_TEXT = TS + "hasText"; //private static final String PARSERNAME = "w2phtml"; private static final String TEXT_EXCERPT = "textExcerpt"; private static final String TOC_LEVEL = "TOCLevel"; private static final String TABLE_OF_CONTENTS = "TOC"; private static final String PARTICIPANT = "publicationParticipant"; private static final String ORGANIZATION = "organization"; - private static final String PUBLICATION = "publication"; private static final String BOOK = "book"; private static final String JOURNAL = "journal"; @@ -81,10 +78,10 @@ public class DocumentStructure { private HashMap tocLevels; private HashMap inputParts; - private Metadata metadata; + private MetadataContainer metadata; private Date currentTime; - public DocumentStructure(Vector files,String fileName, XhtmlConfig config,Metadata metadata) { + public RDFDocument(Vector files,String fileName, XhtmlConfig config, MetadataContainer metadata) { this.tocLevels = new HashMap(); this.inputParts = new HashMap(); this.m = ModelFactory.createOntologyModel(); @@ -95,10 +92,9 @@ public class DocumentStructure { this.organizationClass = textOntology.createClass(TS + ORGANIZATION); this.participantClass = textOntology.createClass(TS + PARTICIPANT); this.itemClass = textOntology.createClass(TS + TOCITEM); - setDocID(fileName); + formatDocID(fileName); this.currentTime = Calendar.getInstance().getTime(); - String publicationType = config.getRDFType(); - setPublicationType(publicationType); + setPublicationType(config.getRDFType()); this.documentClass = textOntology.createClass(TS + documentType); this.excerptClass = textOntology.createClass(TS + excerptType); for(int i = 0 ; i< files.size();i++) { @@ -108,6 +104,12 @@ public class DocumentStructure { addPart(part); } addEmptyParts(); + createElements(); + createTOCItems(); + } + + public void printModel(OutputStream os) { + m.write(os,FORMAT_RDF); } private void setPublicationType(String publicationType) { @@ -155,30 +157,6 @@ public class DocumentStructure { } return emptyPath.toString(); } - - public void printModel(String fileName) { - File outFile = new File(fileName + ".rdf"); - FileWriter fw = null; - try { - outFile.createNewFile(); - fw = new FileWriter(outFile); - m.write(fw,"RDF/XML-ABBREV"); - - } catch (IOException e) { - System.out.println("File couldn't be created"); - e.printStackTrace(); - } finally { - try { - fw.close(); - } catch (IOException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - } - } - public void printModel(OutputStream os) { - m.write(os,"RDF/XML-ABBREV"); - } private void addPart(DocumentPart docExcerpt) { inputParts.put(docExcerpt.getPath(), docExcerpt); @@ -273,13 +251,13 @@ public class DocumentStructure { Property initialsProperty = m.createProperty(TS + "participantInitials"); participant.addProperty( initialsProperty, authorInitials.trim()); } - attachOrganization(participant, map, order, postfix); + addOrganization(participant, map, order, postfix); } } } } - private void attachOrganization(Resource participant, Map map, String order, String postfix) { + private void addOrganization(Resource participant, Map map, String order, String postfix) { String orgName = map.get(AFFILIATED_ORGANIZATION_NAME + postfix); if (orgName == null) { System.out.println(orgName + " not found"); @@ -402,19 +380,18 @@ public class DocumentStructure { return true; } private void createTOCItem(DocumentPart docPart) { - String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath(); - Resource tocItem = m.createIndividual(tocItemName,itemClass); + String tocItemUri = TS + TOCITEM + "/" + docID + docPart.getSafePath(); + Resource tocItem = m.createIndividual(tocItemUri,itemClass); tocItem.addProperty( RDFS.label, docPart.getName()); - Property pointsTo = m.createProperty(TS + "pointsTo"); - Property itemNumber = m.createProperty(TS + "itemNumber"); - Property hasTOCItem = m.createProperty(TS + "hasTOCItem"); + Property pointsTo = m.createProperty(POINTS_TO); + Property itemNumber = m.createProperty(ITEM_NUMBER); + Property hasTOCItem = m.createProperty(HAS_TOC_ITEM); tocItem.addLiteral(itemNumber, docPart.getNumber()); m.add(tocItem, pointsTo, tocLevels.get(docPart.getPath())); if (!docPart.getPath().isEmpty()) { Resource parent = tocLevels.get(docPart.getParentPath()); m.add(parent, hasTOCItem, tocItem); } - } private void attachExcerpt(DocumentPart docPart, Resource element) { @@ -426,7 +403,7 @@ public class DocumentStructure { } Resource excerpt = createExcerpt(docPart); excerpt.addProperty( RDFS.label, docPart.getName()); - Property hasText = m.createProperty(TS + "hasText"); + Property hasText = m.createProperty(HAS_TEXT); element.addProperty(hasText, excerpt); if (!docPart.isMasterPart()) { addMetadataProperties(excerpt, docPart); @@ -435,11 +412,6 @@ public class DocumentStructure { } } - public void createTree() { - createElements(); - createTOCItems(); - } - private void createTOCItems() { Set paths = inputParts.keySet(); for (String path : paths) { @@ -462,48 +434,10 @@ public class DocumentStructure { } } - private void setDocID(String fileName) { + private void formatDocID(String fileName) { String identifier = fileName.replaceAll("\\s+", "_"); identifier = transliterateToEn(identifier); identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", ""); this.docID = identifier; } - - private String transliterateToEn(String fileName) { - return fileName - .replaceAll("[аА]", "a") - .replaceAll("[бБ]", "b") - .replaceAll("[вВ]", "v") - .replaceAll("[гГ]", "g") - .replaceAll("[дД]", "d") - .replaceAll("[еЕ]", "e") - .replaceAll("[ёЁ]", "e") - .replaceAll("[жЖ]", "zh") - .replaceAll("[зЗ]", "z") - .replaceAll("[иИ]", "i") - .replaceAll("[йЙ]", "y") - .replaceAll("[кК]", "k") - .replaceAll("[лЛ]", "l") - .replaceAll("[мМ]", "m") - .replaceAll("[нН]", "n") - .replaceAll("[оО]", "o") - .replaceAll("[пП]", "p") - .replaceAll("[рР]", "r") - .replaceAll("[сС]", "s") - .replaceAll("[тТ]", "t") - .replaceAll("[уУ]", "u") - .replaceAll("[фФ]", "f") - .replaceAll("[хХ]", "kh") - .replaceAll("[цЦ]", "ts") - .replaceAll("[чЧ]", "ch") - .replaceAll("[шШ]", "sh") - .replaceAll("[щЩ]", "sch") - .replaceAll("[ъЪ]", "") - .replaceAll("[ыЫ]", "y") - .replaceAll("[ьЬ]", "") - .replaceAll("[эЭ]", "e") - .replaceAll("[юЮ]", "yu") - .replaceAll("[яЯ]", "ya"); - } - } diff --git a/src/main/java/w2phtml/rdf/RDFDocumentResult.java b/src/main/java/w2phtml/rdf/RDFDocumentResult.java index 09421ef..7dd8c34 100644 --- a/src/main/java/w2phtml/rdf/RDFDocumentResult.java +++ b/src/main/java/w2phtml/rdf/RDFDocumentResult.java @@ -17,17 +17,15 @@ public class RDFDocumentResult implements OutputFile { private ConverterResult xhtmlResult; private String sFileName; private XhtmlConfig config; - private DocumentStructure rdfStructure = null; + private RDFDocument rdfDocument = null; public RDFDocumentResult(Vector outFiles, String fileName, XhtmlConfig config) { this.sFileName = Misc.removeExtension(fileName); this.config = config; - Metadata metadata = new Metadata(); + MetadataContainer metadata = new MetadataContainer(); metadata.read(config.getCSVMetadataFile()); - rdfStructure = new DocumentStructure(outFiles,sFileName,config,metadata); - rdfStructure.createTree(); - + rdfDocument = new RDFDocument(outFiles,sFileName,config,metadata); } @@ -48,7 +46,7 @@ public class RDFDocumentResult implements OutputFile { } @Override public void write(OutputStream os) throws IOException { - rdfStructure.printModel(os); + rdfDocument.printModel(os); } } diff --git a/src/main/java/w2phtml/util/Package.html b/src/main/java/w2phtml/util/Package.html deleted file mode 100644 index 3536243..0000000 --- a/src/main/java/w2phtml/util/Package.html +++ /dev/null @@ -1,11 +0,0 @@ - - - - - The package writer2latex.util - - - -

Some general utility classes.

- - diff --git a/src/main/java/w2phtml/util/Transliteration.java b/src/main/java/w2phtml/util/Transliteration.java new file mode 100644 index 0000000..8137401 --- /dev/null +++ b/src/main/java/w2phtml/util/Transliteration.java @@ -0,0 +1,41 @@ +package w2phtml.util; + +public class Transliteration { + + public static String transliterateToEn(String fileName) { + return fileName + .replaceAll("[аА]", "a") + .replaceAll("[бБ]", "b") + .replaceAll("[вВ]", "v") + .replaceAll("[гГ]", "g") + .replaceAll("[дД]", "d") + .replaceAll("[еЕ]", "e") + .replaceAll("[ёЁ]", "e") + .replaceAll("[жЖ]", "zh") + .replaceAll("[зЗ]", "z") + .replaceAll("[иИ]", "i") + .replaceAll("[йЙ]", "y") + .replaceAll("[кК]", "k") + .replaceAll("[лЛ]", "l") + .replaceAll("[мМ]", "m") + .replaceAll("[нН]", "n") + .replaceAll("[оО]", "o") + .replaceAll("[пП]", "p") + .replaceAll("[рР]", "r") + .replaceAll("[сС]", "s") + .replaceAll("[тТ]", "t") + .replaceAll("[уУ]", "u") + .replaceAll("[фФ]", "f") + .replaceAll("[хХ]", "kh") + .replaceAll("[цЦ]", "ts") + .replaceAll("[чЧ]", "ch") + .replaceAll("[шШ]", "sh") + .replaceAll("[щЩ]", "sch") + .replaceAll("[ъЪ]", "") + .replaceAll("[ыЫ]", "y") + .replaceAll("[ьЬ]", "") + .replaceAll("[эЭ]", "e") + .replaceAll("[юЮ]", "yu") + .replaceAll("[яЯ]", "ya"); + } +}