fix: use custom transliteration method to avoid dependency on new java

This commit is contained in:
Georgy Litvinov 2021-10-18 16:09:47 +02:00
parent 9a4f2eda97
commit 7cc2ba2d4d

View file

@ -50,7 +50,7 @@ public class DocumentStructure {
private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt";
private static final String TOCITEM = "TOCItem";
private static final String TS = "https://litvinovg.pro/text_structures#";
private static final String PARSERNAME = "w2phtml";
//private static final String PARSERNAME = "w2phtml";
private static final String TEXT_EXCERPT = "textExcerpt";
private static final String TOC_LEVEL = "TOCLevel";
private static final String TABLE_OF_CONTENTS = "TOC";
@ -95,7 +95,7 @@ public class DocumentStructure {
this.organizationClass = textOntology.createClass(TS + ORGANIZATION);
this.participantClass = textOntology.createClass(TS + PARTICIPANT);
this.itemClass = textOntology.createClass(TS + TOCITEM);
this.docID = fileName;
setDocID(fileName);
this.currentTime = Calendar.getInstance().getTime();
String publicationType = config.getRDFType();
setPublicationType(publicationType);
@ -185,7 +185,7 @@ public class DocumentStructure {
}
private Resource createExcerpt(DocumentPart docExcerpt) {
String excerptName = TS + excerptType + "/" + PARSERNAME + "_" + docID + docExcerpt.getSafePath();
String excerptName = TS + excerptType + "/" + docID + docExcerpt.getSafePath();
Resource excerpt = m.createIndividual(excerptName, excerptClass);
addModificationTime(excerpt);
if (!docExcerpt.getBody().isEmpty()) {
@ -196,7 +196,7 @@ public class DocumentStructure {
}
private void createTOCLevel(DocumentPart docPart) {
String levelName = TS + TOC_LEVEL + "/" + PARSERNAME + "_" + docID + docPart.getSafePath();
String levelName = TS + TOC_LEVEL + "/" + docID + docPart.getSafePath();
Resource level = m.createIndividual(levelName,tocLevelClass);
level.addProperty( RDFS.label, docPart.getName());
tocLevels.put(docPart.getPath(), level);
@ -204,13 +204,13 @@ public class DocumentStructure {
}
private void createDocument(DocumentPart docPart) {
String documentURI = TS + documentType + "/" + PARSERNAME + "_" + docID ;
String documentURI = TS + documentType + "/" + docID ;
Resource mainResource = m.createResource(documentURI, documentClass);
mainResource.addProperty( RDFS.label, docPart.getName());
addModificationTime(mainResource);
String tocURI = TS + TABLE_OF_CONTENTS + "/" + PARSERNAME + "_" + docID ;
String tocURI = TS + TABLE_OF_CONTENTS + "/" + docID ;
Resource toc = m.createResource(tocURI, tocClass);
toc.addProperty( RDFS.label, docPart.getName());
@ -244,7 +244,7 @@ public class DocumentStructure {
+ postfix + " found. Author givenName field is " + givenNameKey );
System.exit(1);
}
String participantURI = TS + PARTICIPANT + "/" + PARSERNAME + "_" + docID + order + "author" + postfix;
String participantURI = TS + PARTICIPANT + "/" + docID + order + "author" + postfix;
Resource participant = m.createResource(participantURI, participantClass);
Property hasAuthor = m.createProperty(TS + "hasAuthor");
excerpt.addProperty(hasAuthor, participant);
@ -286,7 +286,7 @@ public class DocumentStructure {
logger.warn("No " + AFFILIATED_ORGANIZATION_NAME + postfix + " found.");
return;
}
String organizationUri = TS + ORGANIZATION + "/" + PARSERNAME + "_" + docID + order + "author" + postfix;
String organizationUri = TS + ORGANIZATION + "/" + docID + order + "author" + postfix;
Resource organization = m.createResource(organizationUri, organizationClass);
Property affiliatedWith = m.createProperty(TS + "affiliatedWith");
participant.addProperty(affiliatedWith, organization);
@ -402,7 +402,7 @@ public class DocumentStructure {
return true;
}
private void createTOCItem(DocumentPart docPart) {
String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + docID + docPart.getSafePath();
String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath();
Resource tocItem = m.createIndividual(tocItemName,itemClass);
tocItem.addProperty( RDFS.label, docPart.getName());
Property pointsTo = m.createProperty(TS + "pointsTo");
@ -461,5 +461,49 @@ public class DocumentStructure {
}
}
}
private void setDocID(String fileName) {
String identifier = fileName.replaceAll("\\s+", "_");
identifier = transliterateToEn(identifier);
identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", "");
this.docID = identifier;
}
private String transliterateToEn(String fileName) {
return fileName
.replaceAll("[аА]", "a")
.replaceAll("[бБ]", "b")
.replaceAll("[вВ]", "v")
.replaceAll("[гГ]", "g")
.replaceAll("[дД]", "d")
.replaceAll("[еЕ]", "e")
.replaceAll("[ёЁ]", "e")
.replaceAll("[жЖ]", "zh")
.replaceAll("[зЗ]", "z")
.replaceAll("[иИ]", "i")
.replaceAll("[йЙ]", "y")
.replaceAll("[кК]", "k")
.replaceAll("[лЛ]", "l")
.replaceAll("[мМ]", "m")
.replaceAll("[нН]", "n")
.replaceAll("[оО]", "o")
.replaceAll("[пП]", "p")
.replaceAll("[рР]", "r")
.replaceAll("[сС]", "s")
.replaceAll("[тТ]", "t")
.replaceAll("[уУ]", "u")
.replaceAll("[фФ]", "f")
.replaceAll("[хХ]", "kh")
.replaceAll("[цЦ]", "ts")
.replaceAll("[чЧ]", "ch")
.replaceAll("[шШ]", "sh")
.replaceAll("[щЩ]", "sch")
.replaceAll("[ъЪ]", "")
.replaceAll("[ыЫ]", "y")
.replaceAll("[ьЬ]", "")
.replaceAll("[эЭ]", "e")
.replaceAll("[юЮ]", "yu")
.replaceAll("[яЯ]", "ya");
}
}