From 7cc2ba2d4d3e34ee84cb0fbcb423ae24a4109451 Mon Sep 17 00:00:00 2001 From: Georgy Litvinov Date: Mon, 18 Oct 2021 16:09:47 +0200 Subject: [PATCH] fix: use custom transliteration method to avoid dependency on new java --- .../java/w2phtml/rdf/DocumentStructure.java | 62 ++++++++++++++++--- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/src/main/java/w2phtml/rdf/DocumentStructure.java b/src/main/java/w2phtml/rdf/DocumentStructure.java index c6decc5..61ed2cd 100644 --- a/src/main/java/w2phtml/rdf/DocumentStructure.java +++ b/src/main/java/w2phtml/rdf/DocumentStructure.java @@ -50,7 +50,7 @@ public class DocumentStructure { private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt"; private static final String TOCITEM = "TOCItem"; private static final String TS = "https://litvinovg.pro/text_structures#"; - private static final String PARSERNAME = "w2phtml"; + //private static final String PARSERNAME = "w2phtml"; private static final String TEXT_EXCERPT = "textExcerpt"; private static final String TOC_LEVEL = "TOCLevel"; private static final String TABLE_OF_CONTENTS = "TOC"; @@ -95,7 +95,7 @@ public class DocumentStructure { this.organizationClass = textOntology.createClass(TS + ORGANIZATION); this.participantClass = textOntology.createClass(TS + PARTICIPANT); this.itemClass = textOntology.createClass(TS + TOCITEM); - this.docID = fileName; + setDocID(fileName); this.currentTime = Calendar.getInstance().getTime(); String publicationType = config.getRDFType(); setPublicationType(publicationType); @@ -185,7 +185,7 @@ public class DocumentStructure { } private Resource createExcerpt(DocumentPart docExcerpt) { - String excerptName = TS + excerptType + "/" + PARSERNAME + "_" + docID + docExcerpt.getSafePath(); + String excerptName = TS + excerptType + "/" + docID + docExcerpt.getSafePath(); Resource excerpt = m.createIndividual(excerptName, excerptClass); addModificationTime(excerpt); if (!docExcerpt.getBody().isEmpty()) { @@ -196,7 +196,7 @@ public class DocumentStructure { } private void createTOCLevel(DocumentPart docPart) { - String levelName = TS + TOC_LEVEL + "/" + PARSERNAME + "_" + docID + docPart.getSafePath(); + String levelName = TS + TOC_LEVEL + "/" + docID + docPart.getSafePath(); Resource level = m.createIndividual(levelName,tocLevelClass); level.addProperty( RDFS.label, docPart.getName()); tocLevels.put(docPart.getPath(), level); @@ -204,13 +204,13 @@ public class DocumentStructure { } private void createDocument(DocumentPart docPart) { - String documentURI = TS + documentType + "/" + PARSERNAME + "_" + docID ; + String documentURI = TS + documentType + "/" + docID ; Resource mainResource = m.createResource(documentURI, documentClass); mainResource.addProperty( RDFS.label, docPart.getName()); addModificationTime(mainResource); - String tocURI = TS + TABLE_OF_CONTENTS + "/" + PARSERNAME + "_" + docID ; + String tocURI = TS + TABLE_OF_CONTENTS + "/" + docID ; Resource toc = m.createResource(tocURI, tocClass); toc.addProperty( RDFS.label, docPart.getName()); @@ -244,7 +244,7 @@ public class DocumentStructure { + postfix + " found. Author givenName field is " + givenNameKey ); System.exit(1); } - String participantURI = TS + PARTICIPANT + "/" + PARSERNAME + "_" + docID + order + "author" + postfix; + String participantURI = TS + PARTICIPANT + "/" + docID + order + "author" + postfix; Resource participant = m.createResource(participantURI, participantClass); Property hasAuthor = m.createProperty(TS + "hasAuthor"); excerpt.addProperty(hasAuthor, participant); @@ -286,7 +286,7 @@ public class DocumentStructure { logger.warn("No " + AFFILIATED_ORGANIZATION_NAME + postfix + " found."); return; } - String organizationUri = TS + ORGANIZATION + "/" + PARSERNAME + "_" + docID + order + "author" + postfix; + String organizationUri = TS + ORGANIZATION + "/" + docID + order + "author" + postfix; Resource organization = m.createResource(organizationUri, organizationClass); Property affiliatedWith = m.createProperty(TS + "affiliatedWith"); participant.addProperty(affiliatedWith, organization); @@ -402,7 +402,7 @@ public class DocumentStructure { return true; } private void createTOCItem(DocumentPart docPart) { - String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + docID + docPart.getSafePath(); + String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath(); Resource tocItem = m.createIndividual(tocItemName,itemClass); tocItem.addProperty( RDFS.label, docPart.getName()); Property pointsTo = m.createProperty(TS + "pointsTo"); @@ -461,5 +461,49 @@ public class DocumentStructure { } } } + + private void setDocID(String fileName) { + String identifier = fileName.replaceAll("\\s+", "_"); + identifier = transliterateToEn(identifier); + identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", ""); + this.docID = identifier; + } + + private String transliterateToEn(String fileName) { + return fileName + .replaceAll("[аА]", "a") + .replaceAll("[бБ]", "b") + .replaceAll("[вВ]", "v") + .replaceAll("[гГ]", "g") + .replaceAll("[дД]", "d") + .replaceAll("[еЕ]", "e") + .replaceAll("[ёЁ]", "e") + .replaceAll("[жЖ]", "zh") + .replaceAll("[зЗ]", "z") + .replaceAll("[иИ]", "i") + .replaceAll("[йЙ]", "y") + .replaceAll("[кК]", "k") + .replaceAll("[лЛ]", "l") + .replaceAll("[мМ]", "m") + .replaceAll("[нН]", "n") + .replaceAll("[оО]", "o") + .replaceAll("[пП]", "p") + .replaceAll("[рР]", "r") + .replaceAll("[сС]", "s") + .replaceAll("[тТ]", "t") + .replaceAll("[уУ]", "u") + .replaceAll("[фФ]", "f") + .replaceAll("[хХ]", "kh") + .replaceAll("[цЦ]", "ts") + .replaceAll("[чЧ]", "ch") + .replaceAll("[шШ]", "sh") + .replaceAll("[щЩ]", "sch") + .replaceAll("[ъЪ]", "") + .replaceAll("[ыЫ]", "y") + .replaceAll("[ьЬ]", "") + .replaceAll("[эЭ]", "e") + .replaceAll("[юЮ]", "yu") + .replaceAll("[яЯ]", "ya"); + } }