Extended CSV metadata to support multiple authors with affiliations

This commit is contained in:
Georgy Litvinov 2021-03-25 14:20:08 +01:00
parent b7aced55ab
commit 8424511e9b

View file

@ -18,6 +18,8 @@ import org.apache.jena.ontology.OntModel;
import org.apache.jena.rdf.model.ModelFactory; import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Resource;
import org.apache.jena.vocabulary.*; import org.apache.jena.vocabulary.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import w2phtml.xhtml.XhtmlConfig; import w2phtml.xhtml.XhtmlConfig;
import w2phtml.xhtml.XhtmlDocument; import w2phtml.xhtml.XhtmlDocument;
@ -26,6 +28,24 @@ import org.apache.jena.rdf.model.Property;
public class DocumentStructure { public class DocumentStructure {
private static final String AFFILIATED_ORGANIZATION_POSTAL_CODE = "affiliated organization postal code";
private static final String AFFILIATED_ORGANIZATION_ADDRESS = "affiliated organization address";
private static final String AFFILIATED_ORGANIZATION_OFFICIAL_NAME = "affiliated organization official name";
private static final String AFFILIATED_ORGANIZATION_NAME = "affiliated organization name";
private static final String AUTHOR_INITIALS = "author initials";
private static final String AUTHOR_EMAIL = "author email";
private static final String AUTHOR_FAMILY = "author family";
private static final Logger logger = LoggerFactory.getLogger(DocumentStructure.class);
private static final String AUTHOR_GIVEN_NAME = "author given name";
private static final String MODIFICATION_TIME = "modificationTime"; private static final String MODIFICATION_TIME = "modificationTime";
private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt"; private static final String HTML_EXCERPT_PROPERTY = "htmlExcerpt";
private static final String TOCITEM = "TOCItem"; private static final String TOCITEM = "TOCItem";
@ -211,80 +231,79 @@ public class DocumentStructure {
mainResource.addProperty( property, todayAsString); mainResource.addProperty( property, todayAsString);
} }
private void addAuthor(Resource resource, DocumentPart docPart) { private void addAuthor(Resource excerpt, DocumentPart docPart) {
String order = docPart.getOrder(); String order = docPart.getOrder();
ArrayList<Map<String, String>> sectionMeta = metadata.getSection(order); ArrayList<Map<String, String>> sectionMeta = metadata.getSection(order);
for (Map<String, String> map : sectionMeta) { for (Map<String, String> map : sectionMeta) {
Set<String> names = map.keySet(); Set<String> names = map.keySet();
if (names.contains("author given name")) { for (String givenNameKey : names) {
attachAuthor(resource, map, order); if (givenNameKey.startsWith(AUTHOR_GIVEN_NAME)) {
String postfix = givenNameKey.replaceFirst(AUTHOR_GIVEN_NAME, "");
if (!postfix.matches("[a-zA-Z0-9]*")) {
logger.error("Metadata author postfix is bad. Should contain only symbols a-zA-Z0-9, but "
+ postfix + " found. Author givenName field is " + givenNameKey );
System.exit(1);
}
String participantURI = TS + PARTICIPANT + "/" + PARSERNAME + "_" + docID + order + "author" + postfix;
Resource participant = m.createResource(participantURI, participantClass);
Property hasAuthor = m.createProperty(TS + "hasAuthor");
excerpt.addProperty(hasAuthor, participant);
String givenName = map.get(givenNameKey);
Property givenNameProperty = m.createProperty(TS + "participantGivenName");
participant.addProperty( givenNameProperty, givenName.trim());
String family = map.get(AUTHOR_FAMILY + postfix);
if (family != null) {
Property familyProperty = m.createProperty(TS + "participantFamily");
participant.addProperty( familyProperty, family.trim());
}
String labelName = givenNameKey.trim();
if (family != null ) {
labelName += " " + family;
}
participant.addProperty(RDFS.label, labelName);
String email = map.get(AUTHOR_EMAIL + postfix);
if (email != null) {
Property emailProperty = m.createProperty(TS + "participantEmail");
participant.addProperty( emailProperty, email.trim());
}
String authorInitials = map.get(AUTHOR_INITIALS + postfix);
if (email != null) {
Property initialsProperty = m.createProperty(TS + "participantInitials");
participant.addProperty( initialsProperty, authorInitials.trim());
}
attachOrganization(participant, map, order, postfix);
}
} }
} }
} }
private void attachAuthor(Resource resource, Map<String, String> map, String order) { private void attachOrganization(Resource participant, Map<String, String> map, String order, String postfix) {
String participantURI = TS + PARTICIPANT + "/" + PARSERNAME + "_" + docID + order ; String orgName = map.get(AFFILIATED_ORGANIZATION_NAME + postfix);
Resource participant = m.createResource(participantURI, participantClass);
Property hasAuthor = m.createProperty(TS + "hasAuthor");
resource.addProperty(hasAuthor, participant);
createParticipant(map, order, participant);
}
private void createParticipant(Map<String, String> map, String order, Resource participant) {
String givenName = map.get("author given name");
if (givenName != null) {
Property givenNameProperty = m.createProperty(TS + "participantGivenName");
participant.addProperty( givenNameProperty, givenName.trim());
}
String family = map.get("author family");
if (family != null) {
Property familyProperty = m.createProperty(TS + "participantFamily");
participant.addProperty( familyProperty, family.trim());
}
if (givenName != null ) {
String labelName = givenName.trim();
if (family != null ) {
labelName += " " + family;
}
participant.addProperty(RDFS.label, labelName);
}
String email = map.get("author email");
if (email != null) {
Property emailProperty = m.createProperty(TS + "participantEmail");
participant.addProperty( emailProperty, email.trim());
}
String authorInitials = map.get("author initials");
if (email != null) {
Property initialsProperty = m.createProperty(TS + "participantInitials");
participant.addProperty( initialsProperty, authorInitials.trim());
}
attachOrganization(participant, map, order);
}
private void attachOrganization(Resource participant, Map<String, String> map, String order) {
String orgName = map.get("affiliated organization name");
System.out.println(orgName);
if (orgName == null) { if (orgName == null) {
System.out.println(orgName + " not found");
logger.warn("No " + AFFILIATED_ORGANIZATION_NAME + postfix + " found.");
return; return;
} }
String organizationUri = TS + ORGANIZATION + "/" + PARSERNAME + "_" + docID + order; String organizationUri = TS + ORGANIZATION + "/" + PARSERNAME + "_" + docID + order + "author" + postfix;
Resource organization = m.createResource(organizationUri, organizationClass); Resource organization = m.createResource(organizationUri, organizationClass);
Property affiliatedWith = m.createProperty(TS + "affiliatedWith"); Property affiliatedWith = m.createProperty(TS + "affiliatedWith");
participant.addProperty(affiliatedWith, organization); participant.addProperty(affiliatedWith, organization);
organization.addProperty(RDFS.label, orgName); organization.addProperty(RDFS.label, orgName);
String officialName = map.get("affiliated organization official name"); String officialName = map.get(AFFILIATED_ORGANIZATION_OFFICIAL_NAME + postfix);
if (officialName != null) { if (officialName != null) {
Property officialNameProperty = m.createProperty(TS + "officialOrganizationName"); Property officialNameProperty = m.createProperty(TS + "officialOrganizationName");
organization.addProperty(officialNameProperty, officialName); organization.addProperty(officialNameProperty, officialName);
} }
String orgAddress = map.get("affiliated organization address"); String orgAddress = map.get(AFFILIATED_ORGANIZATION_ADDRESS + postfix);
if (orgAddress != null) { if (orgAddress != null) {
Property orgAddressProperty = m.createProperty(TS + "organizationAddress"); Property orgAddressProperty = m.createProperty(TS + "organizationAddress");
organization.addProperty(orgAddressProperty, orgAddress); organization.addProperty(orgAddressProperty, orgAddress);
} }
String postalCode = map.get("affiliated organization postal code"); String postalCode = map.get(AFFILIATED_ORGANIZATION_POSTAL_CODE + postfix);
if (postalCode != null) { if (postalCode != null) {
Property postalCodeProperty = m.createProperty(TS + "organizationPostalCode"); Property postalCodeProperty = m.createProperty(TS + "organizationPostalCode");
organization.addProperty(postalCodeProperty, postalCode); organization.addProperty(postalCodeProperty, postalCode);