refactoring 1

This commit is contained in:
Georgy Litvinov 2021-12-03 13:03:52 +01:00
parent f0c43c83e3
commit 1454d68b4a
7 changed files with 83 additions and 121 deletions

View file

@ -25,7 +25,7 @@ import org.slf4j.LoggerFactory;
import com.sun.star.uno.XComponentContext; import com.sun.star.uno.XComponentContext;
import w2phtml.Application; import w2phtml.Application;
import w2phtml.rdf.Metadata; import w2phtml.rdf.MetadataContainer;
public class ConversionExecutor { public class ConversionExecutor {

View file

@ -37,9 +37,9 @@ public class DocumentPart {
private String parentPath; private String parentPath;
private String name = null; private String name = null;
private String order = null; private String order = null;
private Metadata metadata = null; private MetadataContainer metadata = null;
public DocumentPart(XhtmlDocument document,Metadata metadata) { public DocumentPart(XhtmlDocument document,MetadataContainer metadata) {
this.excerptDoc = document; this.excerptDoc = document;
this.metadata = metadata; this.metadata = metadata;
extractPath(); extractPath();
@ -50,7 +50,7 @@ public class DocumentPart {
extractAnnotationMetadata(); extractAnnotationMetadata();
} }
public DocumentPart(String path, Metadata metadata) { public DocumentPart(String path, MetadataContainer metadata) {
this.path = path; this.path = path;
this.metadata = metadata; this.metadata = metadata;
this.name = ""; this.name = "";

View file

@ -19,28 +19,28 @@ import org.slf4j.LoggerFactory;
import com.opencsv.CSVReaderHeaderAware; import com.opencsv.CSVReaderHeaderAware;
import com.opencsv.exceptions.CsvValidationException; import com.opencsv.exceptions.CsvValidationException;
public class Metadata { public class MetadataContainer {
private static final String SUBTITLE = "subtitle"; private static final String SUBTITLE = "subtitle";
private static final String FILENAME = "Filename"; private static final String FILENAME = "Filename";
private static final String SECTION = "Section"; private static final String SECTION = "Section";
private static final Logger logger = LoggerFactory.getLogger(Metadata.class); private static final Logger logger = LoggerFactory.getLogger(MetadataContainer.class);
private HashMap<String, ArrayList<Map<String, String>>> sectionsMetadata; private HashMap<String, ArrayList<Map<String, String>>> sectionsMetadata;
public Metadata() { public MetadataContainer() {
sectionsMetadata = new HashMap<String, ArrayList<Map<String, String>>>(); sectionsMetadata = new HashMap<String, ArrayList<Map<String, String>>>();
} }
public void read(String metadataFilePath) { public void read(String filePath) {
if (metadataFilePath == null) { if (filePath == null) {
return; return;
} }
File file = new File(metadataFilePath); File file = new File(filePath);
if (!file.exists() || !file.canRead()) { if (!file.exists() || !file.canRead()) {
return; return;
} }
readCSVFile(metadataFilePath); readCSVFile(filePath);
} }
private void readCSVFile(String filePath) { private void readCSVFile(String filePath) {

View file

@ -20,6 +20,7 @@ import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.*; import org.apache.jena.vocabulary.*;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import static w2phtml.util.Transliteration.*;
import w2phtml.xhtml.XhtmlConfig; import w2phtml.xhtml.XhtmlConfig;
import w2phtml.xhtml.XhtmlDocument; import w2phtml.xhtml.XhtmlDocument;
@ -27,36 +28,32 @@ import w2phtml.xhtml.XhtmlDocument;
import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.Property;
public class DocumentStructure { public class RDFDocument {
private static final String FORMAT_RDF = "RDF/XML-ABBREV";
private static final String AFFILIATED_ORGANIZATION_POSTAL_CODE = "affiliated organization postal code"; private static final String AFFILIATED_ORGANIZATION_POSTAL_CODE = "affiliated organization postal code";
private static final String AFFILIATED_ORGANIZATION_ADDRESS = "affiliated organization address"; private static final String AFFILIATED_ORGANIZATION_ADDRESS = "affiliated organization address";
private static final String AFFILIATED_ORGANIZATION_OFFICIAL_NAME = "affiliated organization official name"; private static final String AFFILIATED_ORGANIZATION_OFFICIAL_NAME = "affiliated organization official name";
private static final String AFFILIATED_ORGANIZATION_NAME = "affiliated organization name"; private static final String AFFILIATED_ORGANIZATION_NAME = "affiliated organization name";
private static final String AUTHOR_INITIALS = "author initials"; private static final String AUTHOR_INITIALS = "author initials";
private static final String AUTHOR_EMAIL = "author email"; private static final String AUTHOR_EMAIL = "author email";
private static final String AUTHOR_FAMILY = "author family"; private static final String AUTHOR_FAMILY = "author family";
private static final Logger logger = LoggerFactory.getLogger(RDFDocument.class);
private static final Logger logger = LoggerFactory.getLogger(DocumentStructure.class);
private static final String AUTHOR_GIVEN_NAME = "author given name"; private static final String AUTHOR_GIVEN_NAME = "author given name";
private static final String MODIFICATION_TIME = "modificationTime"; private static final String MODIFICATION_TIME = "modificationTime";
private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt"; private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt";
private static final String TOCITEM = "TOCItem"; private static final String TOCITEM = "TOCItem";
private static final String TS = "https://litvinovg.pro/text_structures#"; private static final String TS = "https://litvinovg.pro/text_structures#";
private static final String POINTS_TO = TS + "pointsTo";
private static final String ITEM_NUMBER = TS + "itemNumber";
private static final String HAS_TOC_ITEM = TS + "hasTOCItem";
private static final String HAS_TEXT = TS + "hasText";
//private static final String PARSERNAME = "w2phtml"; //private static final String PARSERNAME = "w2phtml";
private static final String TEXT_EXCERPT = "textExcerpt"; private static final String TEXT_EXCERPT = "textExcerpt";
private static final String TOC_LEVEL = "TOCLevel"; private static final String TOC_LEVEL = "TOCLevel";
private static final String TABLE_OF_CONTENTS = "TOC"; private static final String TABLE_OF_CONTENTS = "TOC";
private static final String PARTICIPANT = "publicationParticipant"; private static final String PARTICIPANT = "publicationParticipant";
private static final String ORGANIZATION = "organization"; private static final String ORGANIZATION = "organization";
private static final String PUBLICATION = "publication"; private static final String PUBLICATION = "publication";
private static final String BOOK = "book"; private static final String BOOK = "book";
private static final String JOURNAL = "journal"; private static final String JOURNAL = "journal";
@ -81,10 +78,10 @@ public class DocumentStructure {
private HashMap<String, Resource> tocLevels; private HashMap<String, Resource> tocLevels;
private HashMap<String, DocumentPart> inputParts; private HashMap<String, DocumentPart> inputParts;
private Metadata metadata; private MetadataContainer metadata;
private Date currentTime; private Date currentTime;
public DocumentStructure(Vector<XhtmlDocument> files,String fileName, XhtmlConfig config,Metadata metadata) { public RDFDocument(Vector<XhtmlDocument> files,String fileName, XhtmlConfig config, MetadataContainer metadata) {
this.tocLevels = new HashMap<String, Resource>(); this.tocLevels = new HashMap<String, Resource>();
this.inputParts = new HashMap<String, DocumentPart>(); this.inputParts = new HashMap<String, DocumentPart>();
this.m = ModelFactory.createOntologyModel(); this.m = ModelFactory.createOntologyModel();
@ -95,10 +92,9 @@ public class DocumentStructure {
this.organizationClass = textOntology.createClass(TS + ORGANIZATION); this.organizationClass = textOntology.createClass(TS + ORGANIZATION);
this.participantClass = textOntology.createClass(TS + PARTICIPANT); this.participantClass = textOntology.createClass(TS + PARTICIPANT);
this.itemClass = textOntology.createClass(TS + TOCITEM); this.itemClass = textOntology.createClass(TS + TOCITEM);
setDocID(fileName); formatDocID(fileName);
this.currentTime = Calendar.getInstance().getTime(); this.currentTime = Calendar.getInstance().getTime();
String publicationType = config.getRDFType(); setPublicationType(config.getRDFType());
setPublicationType(publicationType);
this.documentClass = textOntology.createClass(TS + documentType); this.documentClass = textOntology.createClass(TS + documentType);
this.excerptClass = textOntology.createClass(TS + excerptType); this.excerptClass = textOntology.createClass(TS + excerptType);
for(int i = 0 ; i< files.size();i++) { for(int i = 0 ; i< files.size();i++) {
@ -108,6 +104,12 @@ public class DocumentStructure {
addPart(part); addPart(part);
} }
addEmptyParts(); addEmptyParts();
createElements();
createTOCItems();
}
public void printModel(OutputStream os) {
m.write(os,FORMAT_RDF);
} }
private void setPublicationType(String publicationType) { private void setPublicationType(String publicationType) {
@ -155,30 +157,6 @@ public class DocumentStructure {
} }
return emptyPath.toString(); return emptyPath.toString();
} }
public void printModel(String fileName) {
File outFile = new File(fileName + ".rdf");
FileWriter fw = null;
try {
outFile.createNewFile();
fw = new FileWriter(outFile);
m.write(fw,"RDF/XML-ABBREV");
} catch (IOException e) {
System.out.println("File couldn't be created");
e.printStackTrace();
} finally {
try {
fw.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public void printModel(OutputStream os) {
m.write(os,"RDF/XML-ABBREV");
}
private void addPart(DocumentPart docExcerpt) { private void addPart(DocumentPart docExcerpt) {
inputParts.put(docExcerpt.getPath(), docExcerpt); inputParts.put(docExcerpt.getPath(), docExcerpt);
@ -273,13 +251,13 @@ public class DocumentStructure {
Property initialsProperty = m.createProperty(TS + "participantInitials"); Property initialsProperty = m.createProperty(TS + "participantInitials");
participant.addProperty( initialsProperty, authorInitials.trim()); participant.addProperty( initialsProperty, authorInitials.trim());
} }
attachOrganization(participant, map, order, postfix); addOrganization(participant, map, order, postfix);
} }
} }
} }
} }
private void attachOrganization(Resource participant, Map<String, String> map, String order, String postfix) { private void addOrganization(Resource participant, Map<String, String> map, String order, String postfix) {
String orgName = map.get(AFFILIATED_ORGANIZATION_NAME + postfix); String orgName = map.get(AFFILIATED_ORGANIZATION_NAME + postfix);
if (orgName == null) { if (orgName == null) {
System.out.println(orgName + " not found"); System.out.println(orgName + " not found");
@ -402,19 +380,18 @@ public class DocumentStructure {
return true; return true;
} }
private void createTOCItem(DocumentPart docPart) { private void createTOCItem(DocumentPart docPart) {
String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath(); String tocItemUri = TS + TOCITEM + "/" + docID + docPart.getSafePath();
Resource tocItem = m.createIndividual(tocItemName,itemClass); Resource tocItem = m.createIndividual(tocItemUri,itemClass);
tocItem.addProperty( RDFS.label, docPart.getName()); tocItem.addProperty( RDFS.label, docPart.getName());
Property pointsTo = m.createProperty(TS + "pointsTo"); Property pointsTo = m.createProperty(POINTS_TO);
Property itemNumber = m.createProperty(TS + "itemNumber"); Property itemNumber = m.createProperty(ITEM_NUMBER);
Property hasTOCItem = m.createProperty(TS + "hasTOCItem"); Property hasTOCItem = m.createProperty(HAS_TOC_ITEM);
tocItem.addLiteral(itemNumber, docPart.getNumber()); tocItem.addLiteral(itemNumber, docPart.getNumber());
m.add(tocItem, pointsTo, tocLevels.get(docPart.getPath())); m.add(tocItem, pointsTo, tocLevels.get(docPart.getPath()));
if (!docPart.getPath().isEmpty()) { if (!docPart.getPath().isEmpty()) {
Resource parent = tocLevels.get(docPart.getParentPath()); Resource parent = tocLevels.get(docPart.getParentPath());
m.add(parent, hasTOCItem, tocItem); m.add(parent, hasTOCItem, tocItem);
} }
} }
private void attachExcerpt(DocumentPart docPart, Resource element) { private void attachExcerpt(DocumentPart docPart, Resource element) {
@ -426,7 +403,7 @@ public class DocumentStructure {
} }
Resource excerpt = createExcerpt(docPart); Resource excerpt = createExcerpt(docPart);
excerpt.addProperty( RDFS.label, docPart.getName()); excerpt.addProperty( RDFS.label, docPart.getName());
Property hasText = m.createProperty(TS + "hasText"); Property hasText = m.createProperty(HAS_TEXT);
element.addProperty(hasText, excerpt); element.addProperty(hasText, excerpt);
if (!docPart.isMasterPart()) { if (!docPart.isMasterPart()) {
addMetadataProperties(excerpt, docPart); addMetadataProperties(excerpt, docPart);
@ -435,11 +412,6 @@ public class DocumentStructure {
} }
} }
public void createTree() {
createElements();
createTOCItems();
}
private void createTOCItems() { private void createTOCItems() {
Set<String> paths = inputParts.keySet(); Set<String> paths = inputParts.keySet();
for (String path : paths) { for (String path : paths) {
@ -462,48 +434,10 @@ public class DocumentStructure {
} }
} }
private void setDocID(String fileName) { private void formatDocID(String fileName) {
String identifier = fileName.replaceAll("\\s+", "_"); String identifier = fileName.replaceAll("\\s+", "_");
identifier = transliterateToEn(identifier); identifier = transliterateToEn(identifier);
identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", ""); identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", "");
this.docID = identifier; this.docID = identifier;
} }
private String transliterateToEn(String fileName) {
return fileName
.replaceAll("[аА]", "a")
.replaceAll("[бБ]", "b")
.replaceAll("[вВ]", "v")
.replaceAll("[гГ]", "g")
.replaceAll("[дД]", "d")
.replaceAll("[еЕ]", "e")
.replaceAll("[ёЁ]", "e")
.replaceAll("[жЖ]", "zh")
.replaceAll("[зЗ]", "z")
.replaceAll("[иИ]", "i")
.replaceAll("[йЙ]", "y")
.replaceAll("[кК]", "k")
.replaceAll("[лЛ]", "l")
.replaceAll("[мМ]", "m")
.replaceAll("[нН]", "n")
.replaceAll("[оО]", "o")
.replaceAll("[пП]", "p")
.replaceAll("[рР]", "r")
.replaceAll("[сС]", "s")
.replaceAll("[тТ]", "t")
.replaceAll("[уУ]", "u")
.replaceAll("[фФ]", "f")
.replaceAll("[хХ]", "kh")
.replaceAll("[цЦ]", "ts")
.replaceAll("[чЧ]", "ch")
.replaceAll("[шШ]", "sh")
.replaceAll("[щЩ]", "sch")
.replaceAll("[ъЪ]", "")
.replaceAll("[ыЫ]", "y")
.replaceAll("[ьЬ]", "")
.replaceAll("[эЭ]", "e")
.replaceAll("[юЮ]", "yu")
.replaceAll("[яЯ]", "ya");
}
} }

View file

@ -17,17 +17,15 @@ public class RDFDocumentResult implements OutputFile {
private ConverterResult xhtmlResult; private ConverterResult xhtmlResult;
private String sFileName; private String sFileName;
private XhtmlConfig config; private XhtmlConfig config;
private DocumentStructure rdfStructure = null; private RDFDocument rdfDocument = null;
public RDFDocumentResult(Vector<XhtmlDocument> outFiles, String fileName, XhtmlConfig config) { public RDFDocumentResult(Vector<XhtmlDocument> outFiles, String fileName, XhtmlConfig config) {
this.sFileName = Misc.removeExtension(fileName); this.sFileName = Misc.removeExtension(fileName);
this.config = config; this.config = config;
Metadata metadata = new Metadata(); MetadataContainer metadata = new MetadataContainer();
metadata.read(config.getCSVMetadataFile()); metadata.read(config.getCSVMetadataFile());
rdfStructure = new DocumentStructure(outFiles,sFileName,config,metadata); rdfDocument = new RDFDocument(outFiles,sFileName,config,metadata);
rdfStructure.createTree();
} }
@ -48,7 +46,7 @@ public class RDFDocumentResult implements OutputFile {
} }
@Override public void write(OutputStream os) throws IOException { @Override public void write(OutputStream os) throws IOException {
rdfStructure.printModel(os); rdfDocument.printModel(os);
} }
} }

View file

@ -1,11 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
<html>
<head>
<title>The package writer2latex.util</title>
</head>
<body>
<p>Some general utility classes.</p>
</body>
</html>

View file

@ -0,0 +1,41 @@
package w2phtml.util;
public class Transliteration {
public static String transliterateToEn(String fileName) {
return fileName
.replaceAll("[аА]", "a")
.replaceAll("[бБ]", "b")
.replaceAll("[вВ]", "v")
.replaceAll("[гГ]", "g")
.replaceAll("[дД]", "d")
.replaceAll("[еЕ]", "e")
.replaceAll("[ёЁ]", "e")
.replaceAll("[жЖ]", "zh")
.replaceAll("[зЗ]", "z")
.replaceAll("[иИ]", "i")
.replaceAll("[йЙ]", "y")
.replaceAll("[кК]", "k")
.replaceAll("[лЛ]", "l")
.replaceAll("[мМ]", "m")
.replaceAll("[нН]", "n")
.replaceAll("[оО]", "o")
.replaceAll("[пП]", "p")
.replaceAll("[рР]", "r")
.replaceAll("[сС]", "s")
.replaceAll("[тТ]", "t")
.replaceAll("[уУ]", "u")
.replaceAll("[фФ]", "f")
.replaceAll("[хХ]", "kh")
.replaceAll("[цЦ]", "ts")
.replaceAll("[чЧ]", "ch")
.replaceAll("[шШ]", "sh")
.replaceAll("[щЩ]", "sch")
.replaceAll("[ъЪ]", "")
.replaceAll("[ыЫ]", "y")
.replaceAll("[ьЬ]", "")
.replaceAll("[эЭ]", "e")
.replaceAll("[юЮ]", "yu")
.replaceAll("[яЯ]", "ya");
}
}