fix: use custom transliteration method to avoid dependency on new java
This commit is contained in:
parent
9a4f2eda97
commit
7cc2ba2d4d
1 changed files with 53 additions and 9 deletions
|
@ -50,7 +50,7 @@ public class DocumentStructure {
|
|||
private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt";
|
||||
private static final String TOCITEM = "TOCItem";
|
||||
private static final String TS = "https://litvinovg.pro/text_structures#";
|
||||
private static final String PARSERNAME = "w2phtml";
|
||||
//private static final String PARSERNAME = "w2phtml";
|
||||
private static final String TEXT_EXCERPT = "textExcerpt";
|
||||
private static final String TOC_LEVEL = "TOCLevel";
|
||||
private static final String TABLE_OF_CONTENTS = "TOC";
|
||||
|
@ -95,7 +95,7 @@ public class DocumentStructure {
|
|||
this.organizationClass = textOntology.createClass(TS + ORGANIZATION);
|
||||
this.participantClass = textOntology.createClass(TS + PARTICIPANT);
|
||||
this.itemClass = textOntology.createClass(TS + TOCITEM);
|
||||
this.docID = fileName;
|
||||
setDocID(fileName);
|
||||
this.currentTime = Calendar.getInstance().getTime();
|
||||
String publicationType = config.getRDFType();
|
||||
setPublicationType(publicationType);
|
||||
|
@ -185,7 +185,7 @@ public class DocumentStructure {
|
|||
}
|
||||
|
||||
private Resource createExcerpt(DocumentPart docExcerpt) {
|
||||
String excerptName = TS + excerptType + "/" + PARSERNAME + "_" + docID + docExcerpt.getSafePath();
|
||||
String excerptName = TS + excerptType + "/" + docID + docExcerpt.getSafePath();
|
||||
Resource excerpt = m.createIndividual(excerptName, excerptClass);
|
||||
addModificationTime(excerpt);
|
||||
if (!docExcerpt.getBody().isEmpty()) {
|
||||
|
@ -196,7 +196,7 @@ public class DocumentStructure {
|
|||
}
|
||||
|
||||
private void createTOCLevel(DocumentPart docPart) {
|
||||
String levelName = TS + TOC_LEVEL + "/" + PARSERNAME + "_" + docID + docPart.getSafePath();
|
||||
String levelName = TS + TOC_LEVEL + "/" + docID + docPart.getSafePath();
|
||||
Resource level = m.createIndividual(levelName,tocLevelClass);
|
||||
level.addProperty( RDFS.label, docPart.getName());
|
||||
tocLevels.put(docPart.getPath(), level);
|
||||
|
@ -204,13 +204,13 @@ public class DocumentStructure {
|
|||
}
|
||||
|
||||
private void createDocument(DocumentPart docPart) {
|
||||
String documentURI = TS + documentType + "/" + PARSERNAME + "_" + docID ;
|
||||
String documentURI = TS + documentType + "/" + docID ;
|
||||
Resource mainResource = m.createResource(documentURI, documentClass);
|
||||
mainResource.addProperty( RDFS.label, docPart.getName());
|
||||
|
||||
addModificationTime(mainResource);
|
||||
|
||||
String tocURI = TS + TABLE_OF_CONTENTS + "/" + PARSERNAME + "_" + docID ;
|
||||
String tocURI = TS + TABLE_OF_CONTENTS + "/" + docID ;
|
||||
Resource toc = m.createResource(tocURI, tocClass);
|
||||
toc.addProperty( RDFS.label, docPart.getName());
|
||||
|
||||
|
@ -244,7 +244,7 @@ public class DocumentStructure {
|
|||
+ postfix + " found. Author givenName field is " + givenNameKey );
|
||||
System.exit(1);
|
||||
}
|
||||
String participantURI = TS + PARTICIPANT + "/" + PARSERNAME + "_" + docID + order + "author" + postfix;
|
||||
String participantURI = TS + PARTICIPANT + "/" + docID + order + "author" + postfix;
|
||||
Resource participant = m.createResource(participantURI, participantClass);
|
||||
Property hasAuthor = m.createProperty(TS + "hasAuthor");
|
||||
excerpt.addProperty(hasAuthor, participant);
|
||||
|
@ -286,7 +286,7 @@ public class DocumentStructure {
|
|||
logger.warn("No " + AFFILIATED_ORGANIZATION_NAME + postfix + " found.");
|
||||
return;
|
||||
}
|
||||
String organizationUri = TS + ORGANIZATION + "/" + PARSERNAME + "_" + docID + order + "author" + postfix;
|
||||
String organizationUri = TS + ORGANIZATION + "/" + docID + order + "author" + postfix;
|
||||
Resource organization = m.createResource(organizationUri, organizationClass);
|
||||
Property affiliatedWith = m.createProperty(TS + "affiliatedWith");
|
||||
participant.addProperty(affiliatedWith, organization);
|
||||
|
@ -402,7 +402,7 @@ public class DocumentStructure {
|
|||
return true;
|
||||
}
|
||||
private void createTOCItem(DocumentPart docPart) {
|
||||
String tocItemName = TS + TOCITEM + "/" + PARSERNAME + "_" + docID + docPart.getSafePath();
|
||||
String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath();
|
||||
Resource tocItem = m.createIndividual(tocItemName,itemClass);
|
||||
tocItem.addProperty( RDFS.label, docPart.getName());
|
||||
Property pointsTo = m.createProperty(TS + "pointsTo");
|
||||
|
@ -461,5 +461,49 @@ public class DocumentStructure {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setDocID(String fileName) {
|
||||
String identifier = fileName.replaceAll("\\s+", "_");
|
||||
identifier = transliterateToEn(identifier);
|
||||
identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", "");
|
||||
this.docID = identifier;
|
||||
}
|
||||
|
||||
private String transliterateToEn(String fileName) {
|
||||
return fileName
|
||||
.replaceAll("[аА]", "a")
|
||||
.replaceAll("[бБ]", "b")
|
||||
.replaceAll("[вВ]", "v")
|
||||
.replaceAll("[гГ]", "g")
|
||||
.replaceAll("[дД]", "d")
|
||||
.replaceAll("[еЕ]", "e")
|
||||
.replaceAll("[ёЁ]", "e")
|
||||
.replaceAll("[жЖ]", "zh")
|
||||
.replaceAll("[зЗ]", "z")
|
||||
.replaceAll("[иИ]", "i")
|
||||
.replaceAll("[йЙ]", "y")
|
||||
.replaceAll("[кК]", "k")
|
||||
.replaceAll("[лЛ]", "l")
|
||||
.replaceAll("[мМ]", "m")
|
||||
.replaceAll("[нН]", "n")
|
||||
.replaceAll("[оО]", "o")
|
||||
.replaceAll("[пП]", "p")
|
||||
.replaceAll("[рР]", "r")
|
||||
.replaceAll("[сС]", "s")
|
||||
.replaceAll("[тТ]", "t")
|
||||
.replaceAll("[уУ]", "u")
|
||||
.replaceAll("[фФ]", "f")
|
||||
.replaceAll("[хХ]", "kh")
|
||||
.replaceAll("[цЦ]", "ts")
|
||||
.replaceAll("[чЧ]", "ch")
|
||||
.replaceAll("[шШ]", "sh")
|
||||
.replaceAll("[щЩ]", "sch")
|
||||
.replaceAll("[ъЪ]", "")
|
||||
.replaceAll("[ыЫ]", "y")
|
||||
.replaceAll("[ьЬ]", "")
|
||||
.replaceAll("[эЭ]", "e")
|
||||
.replaceAll("[юЮ]", "yu")
|
||||
.replaceAll("[яЯ]", "ya");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue