fix: use custom transliteration method to avoid dependency on new java

This commit is contained in:
Georgy Litvinov 2021-10-18 16:09:47 +02:00
parent 9a4f2eda97
commit 7cc2ba2d4d

View file

@ -50,7 +50,7 @@ public class DocumentStructure {
private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt"; private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt";
private static final String TOCITEM = "TOCItem"; private static final String TOCITEM = "TOCItem";
private static final String TS = "https://litvinovg.pro/text_structures#"; private static final String TS = "https://litvinovg.pro/text_structures#";
private static final String PARSERNAME = "w2phtml"; //private static final String PARSERNAME = "w2phtml";
private static final String TEXT_EXCERPT = "textExcerpt"; private static final String TEXT_EXCERPT = "textExcerpt";
private static final String TOC_LEVEL = "TOCLevel"; private static final String TOC_LEVEL = "TOCLevel";
private static final String TABLE_OF_CONTENTS = "TOC"; private static final String TABLE_OF_CONTENTS = "TOC";
@ -95,7 +95,7 @@ public class DocumentStructure {
this.organizationClass = textOntology.createClass(TS + ORGANIZATION); this.organizationClass = textOntology.createClass(TS + ORGANIZATION);
this.participantClass = textOntology.createClass(TS + PARTICIPANT); this.participantClass = textOntology.createClass(TS + PARTICIPANT);
this.itemClass = textOntology.createClass(TS + TOCITEM); this.itemClass = textOntology.createClass(TS + TOCITEM);
this.docID = fileName; setDocID(fileName);
this.currentTime = Calendar.getInstance().getTime(); this.currentTime = Calendar.getInstance().getTime();
String publicationType = config.getRDFType(); String publicationType = config.getRDFType();
setPublicationType(publicationType); setPublicationType(publicationType);
@ -185,7 +185,7 @@ public class DocumentStructure {
} }
private Resource createExcerpt(DocumentPart docExcerpt) { private Resource createExcerpt(DocumentPart docExcerpt) {
String excerptName = TS + excerptType + "/" + PARSERNAME + "_" + docID + docExcerpt.getSafePath(); String excerptName = TS + excerptType + "/" + docID + docExcerpt.getSafePath();
Resource excerpt = m.createIndividual(excerptName, excerptClass); Resource excerpt = m.createIndividual(excerptName, excerptClass);
addModificationTime(excerpt); addModificationTime(excerpt);
if (!docExcerpt.getBody().isEmpty()) { if (!docExcerpt.getBody().isEmpty()) {
@ -196,7 +196,7 @@ public class DocumentStructure {
} }
private void createTOCLevel(DocumentPart docPart) { private void createTOCLevel(DocumentPart docPart) {
String levelName = TS + TOC_LEVEL + "/" + PARSERNAME + "_" + docID + docPart.getSafePath(); String levelName = TS + TOC_LEVEL + "/" + docID + docPart.getSafePath();
Resource level = m.createIndividual(levelName,tocLevelClass); Resource level = m.createIndividual(levelName,tocLevelClass);
level.addProperty( RDFS.label, docPart.getName()); level.addProperty( RDFS.label, docPart.getName());
tocLevels.put(docPart.getPath(), level); tocLevels.put(docPart.getPath(), level);
@ -204,13 +204,13 @@ public class DocumentStructure {
} }
private void createDocument(DocumentPart docPart) { private void createDocument(DocumentPart docPart) {
String documentURI = TS + documentType + "/" + PARSERNAME + "_" + docID ; String documentURI = TS + documentType + "/" + docID ;
Resource mainResource = m.createResource(documentURI, documentClass); Resource mainResource = m.createResource(documentURI, documentClass);
mainResource.addProperty( RDFS.label, docPart.getName()); mainResource.addProperty( RDFS.label, docPart.getName());
addModificationTime(mainResource); addModificationTime(mainResource);
String tocURI = TS + TABLE_OF_CONTENTS + "/" + PARSERNAME + "_" + docID ; String tocURI = TS + TABLE_OF_CONTENTS + "/" + docID ;
Resource toc = m.createResource(tocURI, tocClass); Resource toc = m.createResource(tocURI, tocClass);
toc.addProperty( RDFS.label, docPart.getName()); toc.addProperty( RDFS.label, docPart.getName());
@ -244,7 +244,7 @@ public class DocumentStructure {
+ postfix + " found. Author givenName field is " + givenNameKey ); + postfix + " found. Author givenName field is " + givenNameKey );
System.exit(1); System.exit(1);
} }
String participantURI = TS + PARTICIPANT + "/" + PARSERNAME + "_" + docID + order + "author" + postfix; String participantURI = TS + PARTICIPANT + "/" + docID + order + "author" + postfix;
Resource participant = m.createResource(participantURI, participantClass); Resource participant = m.createResource(participantURI, participantClass);
Property hasAuthor = m.createProperty(TS + "hasAuthor"); Property hasAuthor = m.createProperty(TS + "hasAuthor");
excerpt.addProperty(hasAuthor, participant); excerpt.addProperty(hasAuthor, participant);
@ -286,7 +286,7 @@ public class DocumentStructure {
logger.warn("No " + AFFILIATED_ORGANIZATION_NAME + postfix + " found."); logger.warn("No " + AFFILIATED_ORGANIZATION_NAME + postfix + " found.");
return; return;
} }
String organizationUri = TS + ORGANIZATION + "/" + PARSERNAME + "_" + docID + order + "author" + postfix; String organizationUri = TS + ORGANIZATION + "/" + docID + order + "author" + postfix;
Resource organization = m.createResource(organizationUri, organizationClass); Resource organization = m.createResource(organizationUri, organizationClass);
Property affiliatedWith = m.createProperty(TS + "affiliatedWith"); Property affiliatedWith = m.createProperty(TS + "affiliatedWith");
participant.addProperty(affiliatedWith, organization); participant.addProperty(affiliatedWith, organization);
@ -402,7 +402,7 @@ public class DocumentStructure {
return true; return true;
} }
private void createTOCItem(DocumentPart docPart) { private void createTOCItem(DocumentPart docPart) {
String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + docID + docPart.getSafePath(); String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath();
Resource tocItem = m.createIndividual(tocItemName,itemClass); Resource tocItem = m.createIndividual(tocItemName,itemClass);
tocItem.addProperty( RDFS.label, docPart.getName()); tocItem.addProperty( RDFS.label, docPart.getName());
Property pointsTo = m.createProperty(TS + "pointsTo"); Property pointsTo = m.createProperty(TS + "pointsTo");
@ -462,4 +462,48 @@ public class DocumentStructure {
} }
} }
private void setDocID(String fileName) {
String identifier = fileName.replaceAll("\\s+", "_");
identifier = transliterateToEn(identifier);
identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", "");
this.docID = identifier;
}
private String transliterateToEn(String fileName) {
return fileName
.replaceAll("[аА]", "a")
.replaceAll("[бБ]", "b")
.replaceAll("[вВ]", "v")
.replaceAll("[гГ]", "g")
.replaceAll("[дД]", "d")
.replaceAll("[еЕ]", "e")
.replaceAll("[ёЁ]", "e")
.replaceAll("[жЖ]", "zh")
.replaceAll("[зЗ]", "z")
.replaceAll("[иИ]", "i")
.replaceAll("[йЙ]", "y")
.replaceAll("[кК]", "k")
.replaceAll("[лЛ]", "l")
.replaceAll("[мМ]", "m")
.replaceAll("[нН]", "n")
.replaceAll("[оО]", "o")
.replaceAll("[пП]", "p")
.replaceAll("[рР]", "r")
.replaceAll("[сС]", "s")
.replaceAll("[тТ]", "t")
.replaceAll("[уУ]", "u")
.replaceAll("[фФ]", "f")
.replaceAll("[хХ]", "kh")
.replaceAll("[цЦ]", "ts")
.replaceAll("[чЧ]", "ch")
.replaceAll("[шШ]", "sh")
.replaceAll("[щЩ]", "sch")
.replaceAll("[ъЪ]", "")
.replaceAll("[ыЫ]", "y")
.replaceAll("[ьЬ]", "")
.replaceAll("[эЭ]", "e")
.replaceAll("[юЮ]", "yu")
.replaceAll("[яЯ]", "ya");
}
} }