refactoring 1
This commit is contained in:
parent
f0c43c83e3
commit
1454d68b4a
7 changed files with 83 additions and 121 deletions
|
@ -25,7 +25,7 @@ import org.slf4j.LoggerFactory;
|
|||
import com.sun.star.uno.XComponentContext;
|
||||
|
||||
import w2phtml.Application;
|
||||
import w2phtml.rdf.Metadata;
|
||||
import w2phtml.rdf.MetadataContainer;
|
||||
|
||||
public class ConversionExecutor {
|
||||
|
||||
|
|
|
@ -37,9 +37,9 @@ public class DocumentPart {
|
|||
private String parentPath;
|
||||
private String name = null;
|
||||
private String order = null;
|
||||
private Metadata metadata = null;
|
||||
private MetadataContainer metadata = null;
|
||||
|
||||
public DocumentPart(XhtmlDocument document,Metadata metadata) {
|
||||
public DocumentPart(XhtmlDocument document,MetadataContainer metadata) {
|
||||
this.excerptDoc = document;
|
||||
this.metadata = metadata;
|
||||
extractPath();
|
||||
|
@ -50,7 +50,7 @@ public class DocumentPart {
|
|||
extractAnnotationMetadata();
|
||||
}
|
||||
|
||||
public DocumentPart(String path, Metadata metadata) {
|
||||
public DocumentPart(String path, MetadataContainer metadata) {
|
||||
this.path = path;
|
||||
this.metadata = metadata;
|
||||
this.name = "";
|
||||
|
|
|
@ -19,28 +19,28 @@ import org.slf4j.LoggerFactory;
|
|||
import com.opencsv.CSVReaderHeaderAware;
|
||||
import com.opencsv.exceptions.CsvValidationException;
|
||||
|
||||
public class Metadata {
|
||||
public class MetadataContainer {
|
||||
private static final String SUBTITLE = "subtitle";
|
||||
private static final String FILENAME = "Filename";
|
||||
private static final String SECTION = "Section";
|
||||
private static final Logger logger = LoggerFactory.getLogger(Metadata.class);
|
||||
private static final Logger logger = LoggerFactory.getLogger(MetadataContainer.class);
|
||||
|
||||
private HashMap<String, ArrayList<Map<String, String>>> sectionsMetadata;
|
||||
|
||||
public Metadata() {
|
||||
public MetadataContainer() {
|
||||
sectionsMetadata = new HashMap<String, ArrayList<Map<String, String>>>();
|
||||
|
||||
}
|
||||
|
||||
public void read(String metadataFilePath) {
|
||||
if (metadataFilePath == null) {
|
||||
public void read(String filePath) {
|
||||
if (filePath == null) {
|
||||
return;
|
||||
}
|
||||
File file = new File(metadataFilePath);
|
||||
File file = new File(filePath);
|
||||
if (!file.exists() || !file.canRead()) {
|
||||
return;
|
||||
}
|
||||
readCSVFile(metadataFilePath);
|
||||
readCSVFile(filePath);
|
||||
}
|
||||
|
||||
private void readCSVFile(String filePath) {
|
|
@ -20,6 +20,7 @@ import org.apache.jena.rdf.model.Resource;
|
|||
import org.apache.jena.vocabulary.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import static w2phtml.util.Transliteration.*;
|
||||
|
||||
import w2phtml.xhtml.XhtmlConfig;
|
||||
import w2phtml.xhtml.XhtmlDocument;
|
||||
|
@ -27,36 +28,32 @@ import w2phtml.xhtml.XhtmlDocument;
|
|||
import org.apache.jena.rdf.model.Property;
|
||||
|
||||
|
||||
public class DocumentStructure {
|
||||
public class RDFDocument {
|
||||
|
||||
private static final String FORMAT_RDF = "RDF/XML-ABBREV";
|
||||
private static final String AFFILIATED_ORGANIZATION_POSTAL_CODE = "affiliated organization postal code";
|
||||
|
||||
private static final String AFFILIATED_ORGANIZATION_ADDRESS = "affiliated organization address";
|
||||
|
||||
private static final String AFFILIATED_ORGANIZATION_OFFICIAL_NAME = "affiliated organization official name";
|
||||
|
||||
private static final String AFFILIATED_ORGANIZATION_NAME = "affiliated organization name";
|
||||
|
||||
private static final String AUTHOR_INITIALS = "author initials";
|
||||
|
||||
private static final String AUTHOR_EMAIL = "author email";
|
||||
|
||||
private static final String AUTHOR_FAMILY = "author family";
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DocumentStructure.class);
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(RDFDocument.class);
|
||||
private static final String AUTHOR_GIVEN_NAME = "author given name";
|
||||
private static final String MODIFICATION_TIME = "modificationTime";
|
||||
private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt";
|
||||
private static final String TOCITEM = "TOCItem";
|
||||
private static final String TS = "https://litvinovg.pro/text_structures#";
|
||||
private static final String POINTS_TO = TS + "pointsTo";
|
||||
private static final String ITEM_NUMBER = TS + "itemNumber";
|
||||
private static final String HAS_TOC_ITEM = TS + "hasTOCItem";
|
||||
private static final String HAS_TEXT = TS + "hasText";
|
||||
//private static final String PARSERNAME = "w2phtml";
|
||||
private static final String TEXT_EXCERPT = "textExcerpt";
|
||||
private static final String TOC_LEVEL = "TOCLevel";
|
||||
private static final String TABLE_OF_CONTENTS = "TOC";
|
||||
private static final String PARTICIPANT = "publicationParticipant";
|
||||
private static final String ORGANIZATION = "organization";
|
||||
|
||||
private static final String PUBLICATION = "publication";
|
||||
private static final String BOOK = "book";
|
||||
private static final String JOURNAL = "journal";
|
||||
|
@ -81,10 +78,10 @@ public class DocumentStructure {
|
|||
|
||||
private HashMap<String, Resource> tocLevels;
|
||||
private HashMap<String, DocumentPart> inputParts;
|
||||
private Metadata metadata;
|
||||
private MetadataContainer metadata;
|
||||
private Date currentTime;
|
||||
|
||||
public DocumentStructure(Vector<XhtmlDocument> files,String fileName, XhtmlConfig config,Metadata metadata) {
|
||||
public RDFDocument(Vector<XhtmlDocument> files,String fileName, XhtmlConfig config, MetadataContainer metadata) {
|
||||
this.tocLevels = new HashMap<String, Resource>();
|
||||
this.inputParts = new HashMap<String, DocumentPart>();
|
||||
this.m = ModelFactory.createOntologyModel();
|
||||
|
@ -95,10 +92,9 @@ public class DocumentStructure {
|
|||
this.organizationClass = textOntology.createClass(TS + ORGANIZATION);
|
||||
this.participantClass = textOntology.createClass(TS + PARTICIPANT);
|
||||
this.itemClass = textOntology.createClass(TS + TOCITEM);
|
||||
setDocID(fileName);
|
||||
formatDocID(fileName);
|
||||
this.currentTime = Calendar.getInstance().getTime();
|
||||
String publicationType = config.getRDFType();
|
||||
setPublicationType(publicationType);
|
||||
setPublicationType(config.getRDFType());
|
||||
this.documentClass = textOntology.createClass(TS + documentType);
|
||||
this.excerptClass = textOntology.createClass(TS + excerptType);
|
||||
for(int i = 0 ; i< files.size();i++) {
|
||||
|
@ -108,6 +104,12 @@ public class DocumentStructure {
|
|||
addPart(part);
|
||||
}
|
||||
addEmptyParts();
|
||||
createElements();
|
||||
createTOCItems();
|
||||
}
|
||||
|
||||
public void printModel(OutputStream os) {
|
||||
m.write(os,FORMAT_RDF);
|
||||
}
|
||||
|
||||
private void setPublicationType(String publicationType) {
|
||||
|
@ -155,30 +157,6 @@ public class DocumentStructure {
|
|||
}
|
||||
return emptyPath.toString();
|
||||
}
|
||||
|
||||
public void printModel(String fileName) {
|
||||
File outFile = new File(fileName + ".rdf");
|
||||
FileWriter fw = null;
|
||||
try {
|
||||
outFile.createNewFile();
|
||||
fw = new FileWriter(outFile);
|
||||
m.write(fw,"RDF/XML-ABBREV");
|
||||
|
||||
} catch (IOException e) {
|
||||
System.out.println("File couldn't be created");
|
||||
e.printStackTrace();
|
||||
} finally {
|
||||
try {
|
||||
fw.close();
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
public void printModel(OutputStream os) {
|
||||
m.write(os,"RDF/XML-ABBREV");
|
||||
}
|
||||
|
||||
private void addPart(DocumentPart docExcerpt) {
|
||||
inputParts.put(docExcerpt.getPath(), docExcerpt);
|
||||
|
@ -273,13 +251,13 @@ public class DocumentStructure {
|
|||
Property initialsProperty = m.createProperty(TS + "participantInitials");
|
||||
participant.addProperty( initialsProperty, authorInitials.trim());
|
||||
}
|
||||
attachOrganization(participant, map, order, postfix);
|
||||
addOrganization(participant, map, order, postfix);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void attachOrganization(Resource participant, Map<String, String> map, String order, String postfix) {
|
||||
private void addOrganization(Resource participant, Map<String, String> map, String order, String postfix) {
|
||||
String orgName = map.get(AFFILIATED_ORGANIZATION_NAME + postfix);
|
||||
if (orgName == null) {
|
||||
System.out.println(orgName + " not found");
|
||||
|
@ -402,19 +380,18 @@ public class DocumentStructure {
|
|||
return true;
|
||||
}
|
||||
private void createTOCItem(DocumentPart docPart) {
|
||||
String tocItemName = TS + TOCITEM + "/" + docID + docPart.getSafePath();
|
||||
Resource tocItem = m.createIndividual(tocItemName,itemClass);
|
||||
String tocItemUri = TS + TOCITEM + "/" + docID + docPart.getSafePath();
|
||||
Resource tocItem = m.createIndividual(tocItemUri,itemClass);
|
||||
tocItem.addProperty( RDFS.label, docPart.getName());
|
||||
Property pointsTo = m.createProperty(TS + "pointsTo");
|
||||
Property itemNumber = m.createProperty(TS + "itemNumber");
|
||||
Property hasTOCItem = m.createProperty(TS + "hasTOCItem");
|
||||
Property pointsTo = m.createProperty(POINTS_TO);
|
||||
Property itemNumber = m.createProperty(ITEM_NUMBER);
|
||||
Property hasTOCItem = m.createProperty(HAS_TOC_ITEM);
|
||||
tocItem.addLiteral(itemNumber, docPart.getNumber());
|
||||
m.add(tocItem, pointsTo, tocLevels.get(docPart.getPath()));
|
||||
if (!docPart.getPath().isEmpty()) {
|
||||
Resource parent = tocLevels.get(docPart.getParentPath());
|
||||
m.add(parent, hasTOCItem, tocItem);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void attachExcerpt(DocumentPart docPart, Resource element) {
|
||||
|
@ -426,7 +403,7 @@ public class DocumentStructure {
|
|||
}
|
||||
Resource excerpt = createExcerpt(docPart);
|
||||
excerpt.addProperty( RDFS.label, docPart.getName());
|
||||
Property hasText = m.createProperty(TS + "hasText");
|
||||
Property hasText = m.createProperty(HAS_TEXT);
|
||||
element.addProperty(hasText, excerpt);
|
||||
if (!docPart.isMasterPart()) {
|
||||
addMetadataProperties(excerpt, docPart);
|
||||
|
@ -435,11 +412,6 @@ public class DocumentStructure {
|
|||
}
|
||||
}
|
||||
|
||||
public void createTree() {
|
||||
createElements();
|
||||
createTOCItems();
|
||||
}
|
||||
|
||||
private void createTOCItems() {
|
||||
Set<String> paths = inputParts.keySet();
|
||||
for (String path : paths) {
|
||||
|
@ -462,48 +434,10 @@ public class DocumentStructure {
|
|||
}
|
||||
}
|
||||
|
||||
private void setDocID(String fileName) {
|
||||
private void formatDocID(String fileName) {
|
||||
String identifier = fileName.replaceAll("\\s+", "_");
|
||||
identifier = transliterateToEn(identifier);
|
||||
identifier = identifier.replaceAll("[^a-zA-Z0-9_.-]", "");
|
||||
this.docID = identifier;
|
||||
}
|
||||
|
||||
private String transliterateToEn(String fileName) {
|
||||
return fileName
|
||||
.replaceAll("[аА]", "a")
|
||||
.replaceAll("[бБ]", "b")
|
||||
.replaceAll("[вВ]", "v")
|
||||
.replaceAll("[гГ]", "g")
|
||||
.replaceAll("[дД]", "d")
|
||||
.replaceAll("[еЕ]", "e")
|
||||
.replaceAll("[ёЁ]", "e")
|
||||
.replaceAll("[жЖ]", "zh")
|
||||
.replaceAll("[зЗ]", "z")
|
||||
.replaceAll("[иИ]", "i")
|
||||
.replaceAll("[йЙ]", "y")
|
||||
.replaceAll("[кК]", "k")
|
||||
.replaceAll("[лЛ]", "l")
|
||||
.replaceAll("[мМ]", "m")
|
||||
.replaceAll("[нН]", "n")
|
||||
.replaceAll("[оО]", "o")
|
||||
.replaceAll("[пП]", "p")
|
||||
.replaceAll("[рР]", "r")
|
||||
.replaceAll("[сС]", "s")
|
||||
.replaceAll("[тТ]", "t")
|
||||
.replaceAll("[уУ]", "u")
|
||||
.replaceAll("[фФ]", "f")
|
||||
.replaceAll("[хХ]", "kh")
|
||||
.replaceAll("[цЦ]", "ts")
|
||||
.replaceAll("[чЧ]", "ch")
|
||||
.replaceAll("[шШ]", "sh")
|
||||
.replaceAll("[щЩ]", "sch")
|
||||
.replaceAll("[ъЪ]", "")
|
||||
.replaceAll("[ыЫ]", "y")
|
||||
.replaceAll("[ьЬ]", "")
|
||||
.replaceAll("[эЭ]", "e")
|
||||
.replaceAll("[юЮ]", "yu")
|
||||
.replaceAll("[яЯ]", "ya");
|
||||
}
|
||||
|
||||
}
|
|
@ -17,17 +17,15 @@ public class RDFDocumentResult implements OutputFile {
|
|||
private ConverterResult xhtmlResult;
|
||||
private String sFileName;
|
||||
private XhtmlConfig config;
|
||||
private DocumentStructure rdfStructure = null;
|
||||
private RDFDocument rdfDocument = null;
|
||||
|
||||
|
||||
public RDFDocumentResult(Vector<XhtmlDocument> outFiles, String fileName, XhtmlConfig config) {
|
||||
this.sFileName = Misc.removeExtension(fileName);
|
||||
this.config = config;
|
||||
Metadata metadata = new Metadata();
|
||||
MetadataContainer metadata = new MetadataContainer();
|
||||
metadata.read(config.getCSVMetadataFile());
|
||||
rdfStructure = new DocumentStructure(outFiles,sFileName,config,metadata);
|
||||
rdfStructure.createTree();
|
||||
|
||||
rdfDocument = new RDFDocument(outFiles,sFileName,config,metadata);
|
||||
}
|
||||
|
||||
|
||||
|
@ -48,7 +46,7 @@ public class RDFDocumentResult implements OutputFile {
|
|||
}
|
||||
|
||||
@Override public void write(OutputStream os) throws IOException {
|
||||
rdfStructure.printModel(os);
|
||||
rdfDocument.printModel(os);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,11 +0,0 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN">
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>The package writer2latex.util</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<p>Some general utility classes.</p>
|
||||
</body>
|
||||
</html>
|
41
src/main/java/w2phtml/util/Transliteration.java
Normal file
41
src/main/java/w2phtml/util/Transliteration.java
Normal file
|
@ -0,0 +1,41 @@
|
|||
package w2phtml.util;
|
||||
|
||||
public class Transliteration {
|
||||
|
||||
public static String transliterateToEn(String fileName) {
|
||||
return fileName
|
||||
.replaceAll("[аА]", "a")
|
||||
.replaceAll("[бБ]", "b")
|
||||
.replaceAll("[вВ]", "v")
|
||||
.replaceAll("[гГ]", "g")
|
||||
.replaceAll("[дД]", "d")
|
||||
.replaceAll("[еЕ]", "e")
|
||||
.replaceAll("[ёЁ]", "e")
|
||||
.replaceAll("[жЖ]", "zh")
|
||||
.replaceAll("[зЗ]", "z")
|
||||
.replaceAll("[иИ]", "i")
|
||||
.replaceAll("[йЙ]", "y")
|
||||
.replaceAll("[кК]", "k")
|
||||
.replaceAll("[лЛ]", "l")
|
||||
.replaceAll("[мМ]", "m")
|
||||
.replaceAll("[нН]", "n")
|
||||
.replaceAll("[оО]", "o")
|
||||
.replaceAll("[пП]", "p")
|
||||
.replaceAll("[рР]", "r")
|
||||
.replaceAll("[сС]", "s")
|
||||
.replaceAll("[тТ]", "t")
|
||||
.replaceAll("[уУ]", "u")
|
||||
.replaceAll("[фФ]", "f")
|
||||
.replaceAll("[хХ]", "kh")
|
||||
.replaceAll("[цЦ]", "ts")
|
||||
.replaceAll("[чЧ]", "ch")
|
||||
.replaceAll("[шШ]", "sh")
|
||||
.replaceAll("[щЩ]", "sch")
|
||||
.replaceAll("[ъЪ]", "")
|
||||
.replaceAll("[ыЫ]", "y")
|
||||
.replaceAll("[ьЬ]", "")
|
||||
.replaceAll("[эЭ]", "e")
|
||||
.replaceAll("[юЮ]", "yu")
|
||||
.replaceAll("[яЯ]", "ya");
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue