refactoring 1

This commit is contained in:
Georgy Litvinov 2021-12-03 13:03:52 +01:00
parent f0c43c83e3
commit 1454d68b4a
7 changed files with 83 additions and 121 deletions

View file

@ -25,7 +25,7 @@ import org.slf4j.LoggerFactory;
import com.sun.star.uno.XComponentContext;
import w2phtml.Application;
import w2phtml.rdf.Metadata;
import w2phtml.rdf.MetadataContainer;
public class ConversionExecutor {

View file

@ -37,9 +37,9 @@ public class DocumentPart {
private String parentPath;
private String name = null;
private String order = null;
private Metadata metadata = null;
private MetadataContainer metadata = null;
public DocumentPart(XhtmlDocument document,Metadata metadata) {
public DocumentPart(XhtmlDocument document,MetadataContainer metadata) {
this.excerptDoc = document;
this.metadata = metadata;
extractPath();
@ -50,7 +50,7 @@ public class DocumentPart {
extractAnnotationMetadata();
}
public DocumentPart(String path, Metadata metadata) {
public DocumentPart(String path, MetadataContainer metadata) {
this.path = path;
this.metadata = metadata;
this.name = "";

View file

@ -19,28 +19,28 @@ import org.slf4j.LoggerFactory;
import com.opencsv.CSVReaderHeaderAware;
import com.opencsv.exceptions.CsvValidationException;
public class Metadata {
public class MetadataContainer {
private static final String SUBTITLE = "subtitle";
private static final String FILENAME = "Filename";
private static final String SECTION = "Section";
private static final Logger logger = LoggerFactory.getLogger(Metadata.class);
private static final Logger logger = LoggerFactory.getLogger(MetadataContainer.class);
private HashMap<String, ArrayList<Map<String, String>>> sectionsMetadata;
public Metadata() {
public MetadataContainer() {
sectionsMetadata = new HashMap<String, ArrayList<Map<String, String>>>();
}
public void read(String metadataFilePath) {
if (metadataFilePath == null) {
public void read(String filePath) {
if (filePath == null) {
return;
}
File file = new File(metadataFilePath);
File file = new File(filePath);
if (!file.exists() || !file.canRead()) {
return;
}
readCSVFile(metadataFilePath);
readCSVFile(filePath);
}
private void readCSVFile(String filePath) {

View file

@ -20,6 +20,7 @@ import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static w2phtml.util.Transliteration.*;
import w2phtml.xhtml.XhtmlConfig;
import w2phtml.xhtml.XhtmlDocument;
@ -27,36 +28,32 @@ import w2phtml.xhtml.XhtmlDocument;
import org.apache.jena.rdf.model.Property;
public class DocumentStructure {
public class RDFDocument {
private static final String FORMAT_RDF = "RDF/XML-ABBREV";
private static final String AFFILIATED_ORGANIZATION_POSTAL_CODE = "affiliated organization postal code";
private static final String AFFILIATED_ORGANIZATION_ADDRESS = "affiliated organization address";
private static final String AFFILIATED_ORGANIZATION_OFFICIAL_NAME = "affiliated organization official name";
private static final String AFFILIATED_ORGANIZATION_NAME = "affiliated organization name";
private static final String AUTHOR_INITIALS = "author initials";
private static final String AUTHOR_EMAIL = "author email";
private static final String AUTHOR_FAMILY = "author family";
private static final Logger logger = LoggerFactory.getLogger(DocumentStructure.class);
private static final Logger logger = LoggerFactory.getLogger(RDFDocument.class);
private static final String AUTHOR_GIVEN_NAME = "author given name";
private static final String MODIFICATION_TIME = "modificationTime";
private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt";
private static final String TOCITEM = "TOCItem";
private static final String TS = "https://litvinovg.pro/text_structures#";
private static final String POINTS_TO = TS + "pointsTo";
private static final String ITEM_NUMBER = TS + "itemNumber";
private static final String HAS_TOC_ITEM = TS + "hasTOCItem";
private static final String HAS_TEXT = TS + "hasText";
//private static final String PARSERNAME = "w2phtml";
private static final String TEXT_EXCERPT = "textExcerpt";
private static final String TOC_LEVEL = "TOCLevel";
private static final String TABLE_OF_CONTENTS = "TOC";
private static final String PARTICIPANT = "publicationParticipant";
private static final String ORGANIZATION = "organization";
private static final String PUBLICATION = "publication";
private static final String BOOK = "book";
private static final String JOURNAL = "journal";
@ -81,10 +78,10 @@ public class DocumentStructure {
private HashMap<String, Resource> tocLevels;
private HashMap<String, DocumentPart> inputParts;
private Metadata metadata;
private MetadataContainer metadata;
private Date currentTime;
public DocumentStructure(Vector<XhtmlDocument> files,String fileName, XhtmlConfig config,Metadata metadata) {
public RDFDocument(Vector<XhtmlDocument> files,String fileName, XhtmlConfig config, MetadataContainer metadata) {
this.tocLevels = new HashMap<String, Resource>();
this.inputParts = new HashMap<String, DocumentPart>();
this.m = ModelFactory.createOntologyModel();
@ -95,10 +92,9 @@ public class DocumentStructure {
this.organizationClass = textOntology.createClass(TS + ORGANIZATION);
this.participantClass = textOntology.createClass(TS + PARTICIPANT);
this.itemClass = textOntology.createClass(TS + TOCITEM);
setDocID(fileName);
formatDocID(fileName);
this.currentTime = Calendar.getInstance().getTime();
String publicationType = config.getRDFType();
setPublicationType(publicationType);
setPublicationType(config.getRDFType());
this.documentClass = textOntology.createClass(TS + documentType);
this.excerptClass = textOntology.createClass(TS + excerptType);
for(int i = 0 ; i< files.size();i++) {
@ -108,6 +104,12 @@ public class DocumentStructure {
addPart(part);
}
addEmptyParts();
createElements();
createTOCItems();
}
public void printModel(OutputStream os) {
m.write(os,FORMAT_RDF);
}
private void setPublicationType(String publicationType) {
@ -156,30 +158,6 @@ public class DocumentStructure {
return emptyPath.toString();
}
public void printModel(String fileName) {
File outFile = new File(fileName + ".rdf");
FileWriter fw = null;
try {
outFile.createNewFile();
fw = new FileWriter(outFile);
m.write(fw,"RDF/XML-ABBREV");
} catch (IOException e) {
System.out.println("File couldn't be created");
e.printStackTrace();
} finally {
try {
fw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void printModel(OutputStream os) {
m.write(os,"RDF/XML-ABBREV");
}
private void addPart(DocumentPart docExcerpt) {
inputParts.put(docExcerpt.getPath(), docExcerpt);
}
@ -273,13 +251,13 @@ public class DocumentStructure {
Property initialsProperty = m.createProperty(TS + "participantInitials");
participant.addProperty( initialsProperty, authorInitials.trim());
}
attachOrganization(participant, map, order, postfix);
addOrganization(participant, map, order, postfix);
}
}
}
}
private void attachOrganization(Resource participant, Map<String, String> map, String order, String postfix) {
private void addOrganization(Resource participant, Map<String, String> map, String order, String postfix) {
String orgName = map.get(AFFILIATED_ORGANIZATION_NAME + postfix);
if (orgName == null) {
System.out.println(orgName + " not found");
@ -402,19 +380,18 @@ public class DocumentStructure {
return true;
}
private void createTOCItem(DocumentPart docPart) {
String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath();
Resource tocItem = m.createIndividual(tocItemName,itemClass);
String tocItemUri = TS + TOCITEM + "/" + docID + docPart.getSafePath();
Resource tocItem = m.createIndividual(tocItemUri,itemClass);
tocItem.addProperty( RDFS.label, docPart.getName());
Property pointsTo = m.createProperty(TS + "pointsTo");
Property itemNumber = m.createProperty(TS + "itemNumber");
Property hasTOCItem = m.createProperty(TS + "hasTOCItem");
Property pointsTo = m.createProperty(POINTS_TO);
Property itemNumber = m.createProperty(ITEM_NUMBER);
Property hasTOCItem = m.createProperty(HAS_TOC_ITEM);
tocItem.addLiteral(itemNumber, docPart.getNumber());
m.add(tocItem, pointsTo, tocLevels.get(docPart.getPath()));
if (!docPart.getPath().isEmpty()) {
Resource parent = tocLevels.get(docPart.getParentPath());
m.add(parent, hasTOCItem, tocItem);
}
}
private void attachExcerpt(DocumentPart docPart, Resource element) {
@ -426,7 +403,7 @@ public class DocumentStructure {
}
Resource excerpt = createExcerpt(docPart);
excerpt.addProperty( RDFS.label, docPart.getName());
Property hasText = m.createProperty(TS + "hasText");
Property hasText = m.createProperty(HAS_TEXT);
element.addProperty(hasText, excerpt);
if (!docPart.isMasterPart()) {
addMetadataProperties(excerpt, docPart);
@ -435,11 +412,6 @@ public class DocumentStructure {
}
}
public void createTree() {
createElements();
createTOCItems();
}
private void createTOCItems() {
Set<String> paths = inputParts.keySet();
for (String path : paths) {
@ -462,48 +434,10 @@ public class DocumentStructure {
}
}
private void setDocID(String fileName) {
private void formatDocID(String fileName) {
String identifier = fileName.replaceAll("\\s+", "_");
identifier = transliterateToEn(identifier);
identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", "");
this.docID = identifier;
}
private String transliterateToEn(String fileName) {
return fileName
.replaceAll("[аА]", "a")
.replaceAll("[бБ]", "b")
.replaceAll("[вВ]", "v")
.replaceAll("[гГ]", "g")
.replaceAll("[дД]", "d")
.replaceAll("[еЕ]", "e")
.replaceAll("[ёЁ]", "e")
.replaceAll("[жЖ]", "zh")
.replaceAll("[зЗ]", "z")
.replaceAll("[иИ]", "i")
.replaceAll("[йЙ]", "y")
.replaceAll("[кК]", "k")
.replaceAll("[лЛ]", "l")
.replaceAll("[мМ]", "m")
.replaceAll("[нН]", "n")
.replaceAll("[оО]", "o")
.replaceAll("[пП]", "p")
.replaceAll("[рР]", "r")
.replaceAll("[сС]", "s")
.replaceAll("[тТ]", "t")
.replaceAll("[уУ]", "u")
.replaceAll("[фФ]", "f")
.replaceAll("[хХ]", "kh")
.replaceAll("[цЦ]", "ts")
.replaceAll("[чЧ]", "ch")
.replaceAll("[шШ]", "sh")
.replaceAll("[щЩ]", "sch")
.replaceAll("[ъЪ]", "")
.replaceAll("[ыЫ]", "y")
.replaceAll("[ьЬ]", "")
.replaceAll("[эЭ]", "e")
.replaceAll("[юЮ]", "yu")
.replaceAll("[яЯ]", "ya");
}
}

View file

@ -17,17 +17,15 @@ public class RDFDocumentResult implements OutputFile {
private ConverterResult xhtmlResult;
private String sFileName;
private XhtmlConfig config;
private DocumentStructure rdfStructure = null;
private RDFDocument rdfDocument = null;
public RDFDocumentResult(Vector<XhtmlDocument> outFiles, String fileName, XhtmlConfig config) {
this.sFileName = Misc.removeExtension(fileName);
this.config = config;
Metadata metadata = new Metadata();
MetadataContainer metadata = new MetadataContainer();
metadata.read(config.getCSVMetadataFile());
rdfStructure = new DocumentStructure(outFiles,sFileName,config,metadata);
rdfStructure.createTree();
rdfDocument = new RDFDocument(outFiles,sFileName,config,metadata);
}
@ -48,7 +46,7 @@ public class RDFDocumentResult implements OutputFile {
}
@Override public void write(OutputStream os) throws IOException {
rdfStructure.printModel(os);
rdfDocument.printModel(os);
}
}

View file

@ -1,11 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>The package writer2latex.util</title>
</head>
<body>
<p>Some general utility classes.</p>
</body>
</html>

View file

@ -0,0 +1,41 @@
package w2phtml.util;
public class Transliteration {
public static String transliterateToEn(String fileName) {
return fileName
.replaceAll("[аА]", "a")
.replaceAll("[бБ]", "b")
.replaceAll("[вВ]", "v")
.replaceAll("[гГ]", "g")
.replaceAll("[дД]", "d")
.replaceAll("[еЕ]", "e")
.replaceAll("[ёЁ]", "e")
.replaceAll("[жЖ]", "zh")
.replaceAll("[зЗ]", "z")
.replaceAll("[иИ]", "i")
.replaceAll("[йЙ]", "y")
.replaceAll("[кК]", "k")
.replaceAll("[лЛ]", "l")
.replaceAll("[мМ]", "m")
.replaceAll("[нН]", "n")
.replaceAll("[оО]", "o")
.replaceAll("[пП]", "p")
.replaceAll("[рР]", "r")
.replaceAll("[сС]", "s")
.replaceAll("[тТ]", "t")
.replaceAll("[уУ]", "u")
.replaceAll("[фФ]", "f")
.replaceAll("[хХ]", "kh")
.replaceAll("[цЦ]", "ts")
.replaceAll("[чЧ]", "ch")
.replaceAll("[шШ]", "sh")
.replaceAll("[щЩ]", "sch")
.replaceAll("[ъЪ]", "")
.replaceAll("[ыЫ]", "y")
.replaceAll("[ьЬ]", "")
.replaceAll("[эЭ]", "e")
.replaceAll("[юЮ]", "yu")
.replaceAll("[яЯ]", "ya");
}
}