vivo-1011 updating search for lcsh and agrovoc to not use skosapi jar
This commit is contained in:
parent
5b51c73b30
commit
bade3f147d
3 changed files with 361 additions and 359 deletions
|
@ -51,6 +51,7 @@ import com.hp.hpl.jena.rdf.model.Resource;
|
||||||
|
|
||||||
import edu.cornell.mannlib.semservices.bo.Concept;
|
import edu.cornell.mannlib.semservices.bo.Concept;
|
||||||
import edu.cornell.mannlib.semservices.service.ExternalConceptService;
|
import edu.cornell.mannlib.semservices.service.ExternalConceptService;
|
||||||
|
import edu.cornell.mannlib.semservices.util.SKOSUtils;
|
||||||
import edu.cornell.mannlib.semservices.util.XMLUtils;
|
import edu.cornell.mannlib.semservices.util.XMLUtils;
|
||||||
|
|
||||||
public class AgrovocService implements ExternalConceptService {
|
public class AgrovocService implements ExternalConceptService {
|
||||||
|
@ -70,16 +71,15 @@ public class AgrovocService implements ExternalConceptService {
|
||||||
@Override
|
@Override
|
||||||
public List<Concept> getConcepts(String term) throws Exception {
|
public List<Concept> getConcepts(String term) throws Exception {
|
||||||
List<Concept> conceptList = new ArrayList<Concept>();
|
List<Concept> conceptList = new ArrayList<Concept>();
|
||||||
//System.out.println("Searching for term: "+ term);
|
|
||||||
String result = getURIByTermAndLangXML(this.ontologyName, term, this.searchMode, this.format, this.lang);
|
String result = getURIByTermAndLangXML(this.ontologyName, term, this.searchMode, this.format, this.lang);
|
||||||
// return empty conceptList if conceptUri is empty
|
// return empty conceptList if conceptUri is empty
|
||||||
if (StringUtils.isEmpty(result)) {
|
if (StringUtils.isEmpty(result)) {
|
||||||
return conceptList;
|
return conceptList;
|
||||||
}
|
}
|
||||||
|
|
||||||
// First create a new SKOSManager
|
// Get the concept URI
|
||||||
String conceptUri = getConceptURIFromRDF(result);
|
String conceptUri = getConceptURIFromRDF(result);
|
||||||
SKOSManager manager = new SKOSManager();
|
|
||||||
|
|
||||||
// return empty conceptList if conceptUri is empty
|
// return empty conceptList if conceptUri is empty
|
||||||
if (StringUtils.isEmpty(conceptUri)) {
|
if (StringUtils.isEmpty(conceptUri)) {
|
||||||
|
@ -89,11 +89,37 @@ public class AgrovocService implements ExternalConceptService {
|
||||||
try {
|
try {
|
||||||
uri = new URI(conceptUri);
|
uri = new URI(conceptUri);
|
||||||
} catch (URISyntaxException e) {
|
} catch (URISyntaxException e) {
|
||||||
e.printStackTrace();
|
logger.error("Error occurred with creating the URI ", e);
|
||||||
return conceptList;
|
return conceptList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Returns concept information in the format specified, which is currently XML
|
||||||
|
//This will return
|
||||||
|
String conceptInfo = this.getConceptInfoByURI(this.ontologyName, conceptUri, this.format);
|
||||||
|
if(StringUtils.isNotEmpty(conceptInfo)) {
|
||||||
|
Concept c = this.createConcept("true", conceptUri, conceptInfo);
|
||||||
|
if(c != null) {
|
||||||
|
//Get definition from dbpedia references stored in the close Match list
|
||||||
|
List<String> closeMatches = c.getCloseMatchURIList();
|
||||||
|
for(String closeMatch: closeMatches) {
|
||||||
|
|
||||||
|
if (closeMatch.startsWith("http://dbpedia.org")) {
|
||||||
|
String description = getDbpediaDescription(closeMatch);
|
||||||
|
//System.out.println("description: "+ description);
|
||||||
|
c.setDefinition(description);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
conceptList.add(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Get the concept itself using Agrovoc's own service or OWL ontology manager
|
||||||
|
|
||||||
|
|
||||||
//System.out.println("uri: "+uri);
|
//System.out.println("uri: "+uri);
|
||||||
|
/*
|
||||||
SKOSDataset dataset = manager.loadDataset(uri);
|
SKOSDataset dataset = manager.loadDataset(uri);
|
||||||
|
|
||||||
for (SKOSConcept skosConcept : dataset.getSKOSConcepts()) {
|
for (SKOSConcept skosConcept : dataset.getSKOSConcepts()) {
|
||||||
|
@ -153,91 +179,10 @@ public class AgrovocService implements ExternalConceptService {
|
||||||
logger.debug("Literal retrieved for altlabel was null and was ignored");
|
logger.debug("Literal retrieved for altlabel was null and was ignored");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
concept.setAltLabelList(altLabelList);
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// get the broader property URI
|
}*/
|
||||||
List<String> broaderURIList = new ArrayList<String>();
|
|
||||||
for (SKOSAnnotation annotation: skosConcept.getSKOSAnnotationsByURI(dataset, manager.getSKOSDataFactory().getSKOSBroaderProperty().getURI())) {
|
|
||||||
String value = new String();
|
|
||||||
if (annotation.isAnnotationByConstant()) {
|
|
||||||
SKOSLiteral literal = annotation.getAnnotationValueAsConstant();
|
|
||||||
value = literal.getLiteral();
|
|
||||||
//System.out.println("broder uri: "+ value);
|
|
||||||
} else {
|
|
||||||
// annotation is some resource
|
|
||||||
SKOSEntity entity = annotation.getAnnotationValue();
|
|
||||||
value = entity.getURI().toString();
|
|
||||||
}
|
|
||||||
//System.out.println("broader uri: "+value);
|
|
||||||
broaderURIList.add(value);
|
|
||||||
}
|
|
||||||
concept.setBroaderURIList(broaderURIList);
|
|
||||||
|
|
||||||
// get the narrower property URI
|
|
||||||
List<String> narrowerURIList = new ArrayList<String>();
|
|
||||||
for (SKOSAnnotation annotation: skosConcept.getSKOSAnnotationsByURI(dataset, manager.getSKOSDataFactory().getSKOSNarrowerProperty().getURI())) {
|
|
||||||
String value = new String();
|
|
||||||
if (annotation.isAnnotationByConstant()) {
|
|
||||||
SKOSLiteral literal = annotation.getAnnotationValueAsConstant();
|
|
||||||
value = literal.getLiteral();
|
|
||||||
//System.out.println("narrower uri: "+ value);
|
|
||||||
} else {
|
|
||||||
// annotation is some resource
|
|
||||||
SKOSEntity entity = annotation.getAnnotationValue();
|
|
||||||
value = entity.getURI().toString();
|
|
||||||
}
|
|
||||||
//System.out.println("narrower uri: "+value);
|
|
||||||
narrowerURIList.add(value);
|
|
||||||
}
|
|
||||||
concept.setNarrowerURIList(narrowerURIList);
|
|
||||||
|
|
||||||
// exact match
|
|
||||||
List<String> exactMatchURIList = new ArrayList<String>();
|
|
||||||
for (SKOSAnnotation annotation: skosConcept.getSKOSAnnotationsByURI(dataset, manager.getSKOSDataFactory().getSKOSExactMatchProperty().getURI())) {
|
|
||||||
String value = new String();
|
|
||||||
if (annotation.isAnnotationByConstant()) {
|
|
||||||
SKOSLiteral literal = annotation.getAnnotationValueAsConstant();
|
|
||||||
value = literal.getLiteral();
|
|
||||||
//System.out.println("exact match: "+ value);
|
|
||||||
} else {
|
|
||||||
// annotation is some resource
|
|
||||||
SKOSEntity entity = annotation.getAnnotationValue();
|
|
||||||
value = entity.getURI().toString();
|
|
||||||
}
|
|
||||||
//System.out.println("exact match: "+value);
|
|
||||||
exactMatchURIList.add(value);
|
|
||||||
}
|
|
||||||
concept.setExactMatchURIList(exactMatchURIList);
|
|
||||||
|
|
||||||
// close match
|
|
||||||
List<String> closeMatchURIList = new ArrayList<String>();
|
|
||||||
for (SKOSAnnotation annotation: skosConcept.getSKOSAnnotationsByURI(dataset, manager.getSKOSDataFactory().getSKOSCloseMatchProperty().getURI())) {
|
|
||||||
String value = new String();
|
|
||||||
if (annotation.isAnnotationByConstant()) {
|
|
||||||
SKOSLiteral literal = annotation.getAnnotationValueAsConstant();
|
|
||||||
value = literal.getLiteral();
|
|
||||||
//System.out.println("close match: "+ value);
|
|
||||||
} else {
|
|
||||||
// annotation is some resource
|
|
||||||
SKOSEntity entity = annotation.getAnnotationValue();
|
|
||||||
value = entity.getURI().toString();
|
|
||||||
}
|
|
||||||
//System.out.println("close match: "+value);
|
|
||||||
closeMatchURIList.add(value);
|
|
||||||
if (value.startsWith("http://dbpedia.org")) {
|
|
||||||
String description = getDbpediaDescription(value);
|
|
||||||
//System.out.println("description: "+ description);
|
|
||||||
concept.setDefinition(description);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
concept.setCloseMatchURIList(closeMatchURIList);
|
|
||||||
|
|
||||||
conceptList.add(concept);
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
return conceptList;
|
return conceptList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,6 +191,32 @@ public class AgrovocService implements ExternalConceptService {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public Concept createConcept(String bestMatch, String skosConceptURI, String results) {
|
||||||
|
|
||||||
|
Concept concept = new Concept();
|
||||||
|
//System.out.println("Concept: " + skosConcept.getURI());
|
||||||
|
concept.setUri(skosConceptURI);
|
||||||
|
concept.setConceptId(stripConceptId(skosConceptURI));
|
||||||
|
concept.setBestMatch(bestMatch);
|
||||||
|
concept.setDefinedBy(schemeUri);
|
||||||
|
concept.setSchemeURI(this.schemeUri);
|
||||||
|
concept.setType("");
|
||||||
|
String lang = "";
|
||||||
|
//Will need to get the language attribute
|
||||||
|
|
||||||
|
//Utilize the XML directly instead of the SKOS API
|
||||||
|
try {
|
||||||
|
concept = SKOSUtils.createConceptUsingXML(concept, results, "xmlns", "en");
|
||||||
|
|
||||||
|
} catch(Exception ex) {
|
||||||
|
logger.debug("Error occurred for creating concept " + skosConceptURI, ex);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
return concept;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Deprecated
|
@Deprecated
|
||||||
protected String getTermcodeByTerm(String term) throws Exception {
|
protected String getTermcodeByTerm(String term) throws Exception {
|
||||||
|
@ -511,5 +482,24 @@ public class AgrovocService implements ExternalConceptService {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Get concept using agrovoc service
|
||||||
|
protected String getConceptInfoByURI(String ontologyName, String conceptURI, String format) {
|
||||||
|
String result = new String();
|
||||||
|
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
|
||||||
|
try {
|
||||||
|
URL url = new URL(AgrovocWS_address);
|
||||||
|
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
|
||||||
|
result = agrovoc_service.getConceptByURI(ontologyName, conceptURI, format);
|
||||||
|
} catch (ServiceException e) {
|
||||||
|
logger.error("service exception", e);
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (RemoteException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
} catch (MalformedURLException e) {
|
||||||
|
e.printStackTrace();
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,42 +6,21 @@ import java.io.BufferedReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.net.MalformedURLException;
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLEncoder;
|
import java.net.URLEncoder;
|
||||||
import java.rmi.RemoteException;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
import javax.xml.rpc.ServiceException;
|
|
||||||
|
|
||||||
import net.sf.json.JSONArray;
|
|
||||||
import net.sf.json.JSONObject;
|
import net.sf.json.JSONObject;
|
||||||
import net.sf.json.JSONSerializer;
|
import net.sf.json.JSONSerializer;
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.fao.www.webservices.AgrovocWS.ACSWWebService;
|
|
||||||
import org.fao.www.webservices.AgrovocWS.ACSWWebServiceServiceLocator;
|
|
||||||
import org.semanticweb.skos.SKOSAnnotation;
|
|
||||||
import org.semanticweb.skos.SKOSConcept;
|
|
||||||
import org.semanticweb.skos.SKOSDataFactory;
|
|
||||||
import org.semanticweb.skos.SKOSDataProperty;
|
|
||||||
import org.semanticweb.skos.SKOSDataRelationAssertion;
|
|
||||||
import org.semanticweb.skos.SKOSDataset;
|
|
||||||
import org.semanticweb.skos.SKOSEntity;
|
|
||||||
import org.semanticweb.skos.SKOSLiteral;
|
|
||||||
import org.semanticweb.skos.SKOSObjectRelationAssertion;
|
|
||||||
import org.semanticweb.skos.SKOSUntypedLiteral;
|
|
||||||
import org.semanticweb.skos.properties.*;
|
|
||||||
import org.semanticweb.skosapibinding.SKOSManager;
|
|
||||||
import org.w3c.dom.Attr;
|
import org.w3c.dom.Attr;
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
import org.w3c.dom.NamedNodeMap;
|
import org.w3c.dom.NamedNodeMap;
|
||||||
|
@ -49,19 +28,9 @@ import org.w3c.dom.Node;
|
||||||
import org.w3c.dom.NodeList;
|
import org.w3c.dom.NodeList;
|
||||||
import org.xml.sax.SAXException;
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import com.hp.hpl.jena.query.Query;
|
|
||||||
import com.hp.hpl.jena.query.QueryExecution;
|
|
||||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
|
||||||
import com.hp.hpl.jena.query.QueryFactory;
|
|
||||||
import com.hp.hpl.jena.query.QuerySolution;
|
|
||||||
import com.hp.hpl.jena.query.ResultSet;
|
|
||||||
import com.hp.hpl.jena.rdf.model.Literal;
|
|
||||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
|
||||||
import com.hp.hpl.jena.rdf.model.Resource;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.semservices.bo.Concept;
|
import edu.cornell.mannlib.semservices.bo.Concept;
|
||||||
import edu.cornell.mannlib.semservices.exceptions.ConceptsNotFoundException;
|
|
||||||
import edu.cornell.mannlib.semservices.service.ExternalConceptService;
|
import edu.cornell.mannlib.semservices.service.ExternalConceptService;
|
||||||
|
import edu.cornell.mannlib.semservices.util.SKOSUtils;
|
||||||
import edu.cornell.mannlib.semservices.util.XMLUtils;
|
import edu.cornell.mannlib.semservices.util.XMLUtils;
|
||||||
|
|
||||||
public class LCSHService implements ExternalConceptService {
|
public class LCSHService implements ExternalConceptService {
|
||||||
|
@ -69,14 +38,9 @@ public class LCSHService implements ExternalConceptService {
|
||||||
protected final Log log = LogFactory.getLog(getClass());
|
protected final Log log = LogFactory.getLog(getClass());
|
||||||
private final String skosSuffix = ".skos.rdf";
|
private final String skosSuffix = ".skos.rdf";
|
||||||
private final String hostUri = "http://id.loc.gov";
|
private final String hostUri = "http://id.loc.gov";
|
||||||
private java.lang.String LCSHWS_address = hostUri + "/authorities/subjects";
|
|
||||||
private final String schemeUri = hostUri + "/authorities/subjects";
|
private final String schemeUri = hostUri + "/authorities/subjects";
|
||||||
private final String baseUri = hostUri + "/search/";
|
private final String baseUri = hostUri + "/search/";
|
||||||
private final String ontologyName = "LCSH";
|
|
||||||
private final String format = "SKOS";
|
|
||||||
private final String lang = "en";
|
|
||||||
private final String codeName = "hasCodeAgrovoc";
|
|
||||||
private final String searchMode = "Exact Match";
|
|
||||||
protected final String dbpedia_endpoint = " http://dbpedia.org/sparql";
|
protected final String dbpedia_endpoint = " http://dbpedia.org/sparql";
|
||||||
//Property uris used for SKOS
|
//Property uris used for SKOS
|
||||||
protected final String SKOSNotePropertyURI = "http://www.w3.org/2004/02/skos/core#note";
|
protected final String SKOSNotePropertyURI = "http://www.w3.org/2004/02/skos/core#note";
|
||||||
|
@ -131,10 +95,10 @@ public class LCSHService implements ExternalConceptService {
|
||||||
// that might exist
|
// that might exist
|
||||||
private List<Concept> processOutput(String results) throws Exception {
|
private List<Concept> processOutput(String results) throws Exception {
|
||||||
List<Concept> conceptList = new ArrayList<Concept>();
|
List<Concept> conceptList = new ArrayList<Concept>();
|
||||||
SKOSManager manager = new SKOSManager();
|
//SKOSManager manager = new SKOSManager();
|
||||||
// Get uris from the results
|
// Get uris from the results
|
||||||
// Properties we will be querying for
|
// Properties we will be querying for
|
||||||
SKOSDataFactory sdf = manager.getSKOSDataFactory();
|
//SKOSDataFactory sdf = manager.getSKOSDataFactory();
|
||||||
|
|
||||||
|
|
||||||
List<String> uris = getConceptURIFromXML(results);
|
List<String> uris = getConceptURIFromXML(results);
|
||||||
|
@ -155,20 +119,10 @@ public class LCSHService implements ExternalConceptService {
|
||||||
return conceptList;
|
return conceptList;
|
||||||
}
|
}
|
||||||
log.debug("loading concept uri " + conceptUriString);
|
log.debug("loading concept uri " + conceptUriString);
|
||||||
SKOSDataset dataset = manager.loadDataset(conceptURI);
|
Concept c = this.createConcept(bestMatch, conceptUriString);
|
||||||
Set<SKOSConcept> skosConcepts = dataset.getSKOSConcepts();
|
|
||||||
log.debug("Number of skos concepts " + skosConcepts.size());
|
|
||||||
|
|
||||||
for (SKOSConcept skosConcept : skosConcepts) {
|
|
||||||
//Close matches are also being returned in list of skos concepts and
|
|
||||||
//we are interested in getting the main concept we requested only
|
|
||||||
if(skosConcept.getURI().toString().equals(baseConceptURI)) {
|
|
||||||
Concept c = this.createConcept(sdf, bestMatch, skosConcept, dataset);
|
|
||||||
if(c != null) {
|
if(c != null) {
|
||||||
conceptList.add(c);
|
conceptList.add(c);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
i++;
|
i++;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -176,12 +130,13 @@ public class LCSHService implements ExternalConceptService {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
//Load individual concept using a request
|
||||||
|
//private
|
||||||
|
|
||||||
//Will use skos if does not encounter error from skos api, otherwise will use regular XML parsing techniques
|
public Concept createConcept(String bestMatch, String skosConceptURI) {
|
||||||
public Concept createConcept(SKOSDataFactory skosDataFactory, String bestMatch, SKOSConcept skosConcept, SKOSDataset dataset) {
|
|
||||||
|
|
||||||
Concept concept = new Concept();
|
Concept concept = new Concept();
|
||||||
String skosConceptURI = skosConcept.getURI().toString();
|
|
||||||
log.debug("SKOSConceptURI is " + skosConceptURI);
|
log.debug("SKOSConceptURI is " + skosConceptURI);
|
||||||
// get skos version of uri
|
// get skos version of uri
|
||||||
|
|
||||||
|
@ -192,208 +147,21 @@ public class LCSHService implements ExternalConceptService {
|
||||||
concept.setSchemeURI(schemeUri);
|
concept.setSchemeURI(schemeUri);
|
||||||
concept.setType("");
|
concept.setType("");
|
||||||
|
|
||||||
//Get the skos annotations first to see if there is an error triggered, if so try and see if we can instead utilize XML
|
//Utilize the XML directly instead of the SKOS API
|
||||||
//For some of the SKOS concepts, a null pointer exception occurs while XML processing still works
|
|
||||||
//I do not yet know the reasons, hjk54
|
|
||||||
try {
|
try {
|
||||||
Set<SKOSAnnotation> skosAnnots = skosConcept
|
concept = SKOSUtils.createConceptUsingXMLFromURI(concept, skosConceptURI, "abbreviated");
|
||||||
.getSKOSAnnotations(dataset);
|
|
||||||
} catch(NullPointerException ex) {
|
|
||||||
concept = createConceptUsingXML(concept, bestMatch, skosConcept);
|
|
||||||
return concept;
|
|
||||||
} catch(Exception ex) {
|
} catch(Exception ex) {
|
||||||
log.debug("Error occurred for annotation retrieval for skos concept " + skosConceptURI, ex);
|
log.debug("Error occurred for annotation retrieval for skos concept " + skosConceptURI, ex);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
concept = this.createConceptUsingSKOS(skosDataFactory, concept, skosConcept, dataset);
|
|
||||||
return concept;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Concept createConceptUsingSKOS(SKOSDataFactory skosDataFactory, Concept concept, SKOSConcept skosConcept, SKOSDataset dataset) {
|
|
||||||
|
|
||||||
SKOSPrefLabelProperty prefLabelProperty = skosDataFactory.getSKOSPrefLabelProperty();
|
|
||||||
SKOSAltLabelProperty altLabelProperty = skosDataFactory.getSKOSAltLabelProperty();
|
|
||||||
|
|
||||||
try {
|
|
||||||
List<String> labelLiterals = this.getSKOSLiteralValues(skosConcept
|
|
||||||
.getSKOSRelatedConstantByProperty(dataset,
|
|
||||||
prefLabelProperty));
|
|
||||||
if(labelLiterals.size() > 0) {
|
|
||||||
concept.setLabel(labelLiterals.get(0));
|
|
||||||
} else {
|
|
||||||
//This is an error because there should be at least one label returned
|
|
||||||
log.debug("The number of preferred labels is not greater than zero");
|
|
||||||
}
|
|
||||||
|
|
||||||
// get altLabels
|
|
||||||
List<String> altLabelList = this.getSKOSLiteralValues(skosConcept
|
|
||||||
.getSKOSRelatedConstantByProperty(dataset, altLabelProperty));
|
|
||||||
concept.setAltLabelList(altLabelList);
|
|
||||||
|
|
||||||
// See if we can get a description as well
|
|
||||||
List<String> notes = this.getSKOSAnnotationValues(skosConcept
|
|
||||||
.getSKOSAnnotationsByURI(dataset, new URI(this.SKOSNotePropertyURI)));
|
|
||||||
|
|
||||||
concept.setDefinition(StringUtils.join(notes, ","));
|
|
||||||
|
|
||||||
// get the broader property URI
|
|
||||||
List<String> broaderURIList = this.getSKOSAnnotationValues(skosConcept
|
|
||||||
.getSKOSAnnotationsByURI(dataset, new URI(this.SKOSBroaderURI)));
|
|
||||||
concept.setBroaderURIList(broaderURIList);
|
|
||||||
|
|
||||||
// get the narrower property URI
|
|
||||||
List<String> narrowerURIList = this.getSKOSAnnotationValues(skosConcept
|
|
||||||
.getSKOSAnnotationsByURI(dataset, new URI(this.SKOSNarrowerURI)));
|
|
||||||
concept.setNarrowerURIList(narrowerURIList);
|
|
||||||
|
|
||||||
// exact match
|
|
||||||
List<String> exactMatchURIList = this.getSKOSAnnotationValues(skosConcept
|
|
||||||
.getSKOSAnnotationsByURI(dataset,
|
|
||||||
new URI(this.SKOSExactMatchURI)));
|
|
||||||
concept.setExactMatchURIList(exactMatchURIList);
|
|
||||||
|
|
||||||
// close match
|
|
||||||
List<String> closeMatchURIList = this.getSKOSAnnotationValues(skosConcept
|
|
||||||
.getSKOSAnnotationsByURI(dataset,
|
|
||||||
new URI(this.SKOSCloseMatchURI)));
|
|
||||||
concept.setCloseMatchURIList(closeMatchURIList);
|
|
||||||
log.debug("add concept to list");
|
|
||||||
} catch (Exception ex) {
|
|
||||||
log.debug("Exception occurred for -" + skosConcept.getURI()
|
|
||||||
+ "- " + ex.getMessage(), ex);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return concept;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private List<String> getSKOSLiteralValues(Set<SKOSLiteral> skosLiterals) {
|
|
||||||
String lang = "";
|
|
||||||
List<String> literalValues = new ArrayList<String>();
|
|
||||||
for (SKOSLiteral literal : skosLiterals) {
|
|
||||||
if(literal != null) {
|
|
||||||
if (!literal.isTyped()) {
|
|
||||||
// if it has language
|
|
||||||
SKOSUntypedLiteral untypedLiteral = literal
|
|
||||||
.getAsSKOSUntypedLiteral();
|
|
||||||
if (untypedLiteral.hasLang()) {
|
|
||||||
lang = untypedLiteral.getLang();
|
|
||||||
} else {
|
|
||||||
lang = "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// log.debug("literal: "+ literal.getLiteral());
|
|
||||||
if (lang.equals("en")) {
|
|
||||||
log.debug("literal value: " + literal.getLiteral());
|
|
||||||
literalValues.add(literal.getLiteral());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
log.debug("Literal returned was null so was ignored");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return literalValues;
|
|
||||||
}
|
|
||||||
|
|
||||||
//For a given set of annotations (for example, for a specific property)
|
|
||||||
private List<String> getSKOSAnnotationValues(Set<SKOSAnnotation> skosAnnotations) {
|
|
||||||
List<String> valuesList = new ArrayList<String>();
|
|
||||||
for (SKOSAnnotation annotation : skosAnnotations) {
|
|
||||||
String value = this.getSKOSAnnotationStringValue(annotation);
|
|
||||||
valuesList.add(value);
|
|
||||||
}
|
|
||||||
return valuesList;
|
|
||||||
}
|
|
||||||
|
|
||||||
//Get string value for annotation
|
|
||||||
private String getSKOSAnnotationStringValue(SKOSAnnotation annotation) {
|
|
||||||
String value = new String();
|
|
||||||
if (annotation.isAnnotationByConstant()) {
|
|
||||||
SKOSLiteral literal = annotation
|
|
||||||
.getAnnotationValueAsConstant();
|
|
||||||
value = literal.getLiteral();
|
|
||||||
log.debug("broder uri: " + value);
|
|
||||||
} else {
|
|
||||||
// annotation is some resource
|
|
||||||
SKOSEntity entity = annotation.getAnnotationValue();
|
|
||||||
value = entity.getURI().toString();
|
|
||||||
}
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
//this method relies on the XML of the single SKOS rdf concept in case the SKOS api throws a null pointer exception
|
|
||||||
private Concept createConceptUsingXML(Concept concept, String bestMatch,
|
|
||||||
SKOSConcept skosConcept) {
|
|
||||||
String conceptUriString = skosConcept.getURI().toString() + this.skosSuffix;;
|
|
||||||
|
|
||||||
URL conceptURL = null;
|
|
||||||
try {
|
|
||||||
conceptURL = new URL(conceptUriString);
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error("Exception occurred in instantiating URL for " + conceptUriString, e);
|
|
||||||
//If the url is having trouble, just return null for the concept
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
log.debug("loading concept uri " + conceptUriString);
|
|
||||||
|
|
||||||
|
|
||||||
String results = null;
|
|
||||||
try {
|
|
||||||
|
|
||||||
StringWriter sw = new StringWriter();
|
|
||||||
|
|
||||||
|
|
||||||
BufferedReader in = new BufferedReader(new InputStreamReader(
|
|
||||||
conceptURL.openStream()));
|
|
||||||
String inputLine;
|
|
||||||
while ((inputLine = in.readLine()) != null) {
|
|
||||||
sw.write(inputLine);
|
|
||||||
}
|
|
||||||
in.close();
|
|
||||||
|
|
||||||
results = sw.toString();
|
|
||||||
log.debug(results);
|
|
||||||
} catch (Exception ex) {
|
|
||||||
log.error("Error occurred in getting concept from the URL " + conceptUriString, ex);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
try {
|
|
||||||
Document doc = XMLUtils.parse(results);
|
|
||||||
List<String> labelLiterals = this.getValuesFromXMLNodes(doc, "skos:prefLabel", null);
|
|
||||||
if(labelLiterals.size() > 0) {
|
|
||||||
concept.setLabel(labelLiterals.get(0));
|
|
||||||
} else {
|
|
||||||
//This is an error because there should be at least one label returned
|
|
||||||
log.debug("The number of preferred labels is not greater than zero");
|
|
||||||
}
|
|
||||||
List<String> altLabelList = this.getValuesFromXMLNodes(doc, "skos:altLabel", null);
|
|
||||||
concept.setAltLabelList(altLabelList);
|
|
||||||
|
|
||||||
List<String> broaderURIList = this.getValuesFromXMLNodes(doc, "skos:broader", "rdf:resource");
|
|
||||||
concept.setBroaderURIList(broaderURIList);
|
|
||||||
List<String> narrowerURIList = this.getValuesFromXMLNodes(doc, "skos:narrower", "rdf:resource");
|
|
||||||
concept.setNarrowerURIList(narrowerURIList);
|
|
||||||
|
|
||||||
List<String> exactMatchURIList = this.getValuesFromXMLNodes(doc, "skos:exactMatch", "rdf:resource");
|
|
||||||
concept.setExactMatchURIList(exactMatchURIList);
|
|
||||||
List<String> closeMatchURIList = this.getValuesFromXMLNodes(doc, "skos:closeMatch", "rdf:resource");
|
|
||||||
concept.setCloseMatchURIList(closeMatchURIList);
|
|
||||||
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
log.error("error occurred in parsing " + results, e);
|
|
||||||
} catch (SAXException e) {
|
|
||||||
log.error("error occurred in parsing " + results, e);
|
|
||||||
} catch (ParserConfigurationException e) {
|
|
||||||
log.error("error occurred in parsing " + results, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return concept;
|
return concept;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private String getSKOSURI(String uri) {
|
private String getSKOSURI(String uri) {
|
||||||
// Strip .xml at the end and replace with .skos.rdf
|
// Strip .xml at the end and replace with .skos.rdf
|
||||||
String skosURI = uri;
|
String skosURI = uri;
|
||||||
|
@ -495,31 +263,7 @@ public class LCSHService implements ExternalConceptService {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public List<String> getValuesFromXMLNodes(Document doc, String tagName, String attributeName) {
|
|
||||||
NodeList nodes = doc.getElementsByTagName(tagName);
|
|
||||||
|
|
||||||
return getValuesFromXML(nodes, attributeName);
|
|
||||||
}
|
|
||||||
|
|
||||||
//Returns list of values based on nodes and whether or not a specific attribute name should be used or just the text content
|
|
||||||
public List<String> getValuesFromXML(NodeList nodes, String attributeName) {
|
|
||||||
int len = nodes.getLength();
|
|
||||||
int i;
|
|
||||||
List<String> values = new ArrayList<String>();
|
|
||||||
for (i = 0; i < len; i++) {
|
|
||||||
Node node = nodes.item(i);
|
|
||||||
if(attributeName != null && !attributeName.isEmpty()) {
|
|
||||||
NamedNodeMap attrs = node.getAttributes();
|
|
||||||
Attr a = (Attr)attrs.getNamedItem(attributeName);
|
|
||||||
if(a != null) {
|
|
||||||
values.add(a.getTextContent());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
values.add(node.getTextContent());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return values;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
268
src/edu/cornell/mannlib/semservices/util/SKOSUtils.java
Normal file
268
src/edu/cornell/mannlib/semservices/util/SKOSUtils.java
Normal file
|
@ -0,0 +1,268 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
/* We are no longer using the SKOS API since Vitro has moved to V 4.0 of OWL API which does not appear to be compatible.
|
||||||
|
This file will contain methods used for reading SKOS as XML and parsing it for the properties
|
||||||
|
we want to extract*/
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.semservices.util;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.StringWriter;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.w3c.dom.Attr;
|
||||||
|
import org.w3c.dom.Document;
|
||||||
|
import org.w3c.dom.NamedNodeMap;
|
||||||
|
import org.w3c.dom.Node;
|
||||||
|
import org.w3c.dom.NodeList;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.semservices.bo.Concept;
|
||||||
|
|
||||||
|
public class SKOSUtils {
|
||||||
|
protected final static Log log = LogFactory.getLog(SKOSUtils.class);
|
||||||
|
|
||||||
|
public static String getConceptXML(String conceptUriString) {
|
||||||
|
URL conceptURL = null;
|
||||||
|
try {
|
||||||
|
conceptURL = new URL(conceptUriString);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Exception occurred in instantiating URL for "
|
||||||
|
+ conceptUriString, e);
|
||||||
|
// If the url is having trouble, just return null for the concept
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
log.debug("loading concept uri " + conceptUriString);
|
||||||
|
|
||||||
|
String results = null;
|
||||||
|
try {
|
||||||
|
|
||||||
|
StringWriter sw = new StringWriter();
|
||||||
|
|
||||||
|
BufferedReader in = new BufferedReader(new InputStreamReader(
|
||||||
|
conceptURL.openStream()));
|
||||||
|
String inputLine;
|
||||||
|
while ((inputLine = in.readLine()) != null) {
|
||||||
|
sw.write(inputLine);
|
||||||
|
}
|
||||||
|
in.close();
|
||||||
|
|
||||||
|
results = sw.toString();
|
||||||
|
log.debug(results);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
log.error("Error occurred in getting concept from the URL "
|
||||||
|
+ conceptUriString, ex);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Downloading the XML from the URI itself
|
||||||
|
//No language tag support here but can be specified if need be at this level as well
|
||||||
|
public static Concept createConceptUsingXMLFromURI(Concept concept,
|
||||||
|
String conceptUriString, String relationshipScheme) {
|
||||||
|
String results = getConceptXML(conceptUriString);
|
||||||
|
if (StringUtils.isEmpty(results)) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return createConceptUsingXML(concept, results, relationshipScheme, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Create concept given the actual XML (results_
|
||||||
|
// Lang tag value, if populated, will return pref label and alt label which
|
||||||
|
// match that language tag value
|
||||||
|
public static Concept createConceptUsingXML(Concept concept,
|
||||||
|
String results, String relationshipScheme, String langTagValue) {
|
||||||
|
|
||||||
|
HashMap<String, String> relationshipHash = getRelationshipHash(relationshipScheme);
|
||||||
|
try {
|
||||||
|
Document doc = XMLUtils.parse(results);
|
||||||
|
// Preferred label
|
||||||
|
List<String> labelLiterals = new ArrayList<String>();
|
||||||
|
if (StringUtils.isNotEmpty(langTagValue)) {
|
||||||
|
labelLiterals = getValuesFromXMLNodes(doc,
|
||||||
|
getPrefLabelTag(relationshipHash), "xml:lang", langTagValue);
|
||||||
|
} else {
|
||||||
|
labelLiterals = getValuesFromXMLNodes(doc,
|
||||||
|
getPrefLabelTag(relationshipHash), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (labelLiterals.size() > 0) {
|
||||||
|
concept.setLabel(labelLiterals.get(0));
|
||||||
|
} else {
|
||||||
|
// This is an error because there should be at least one label
|
||||||
|
// returned
|
||||||
|
log.debug("The number of preferred labels is not greater than zero");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Alternate label
|
||||||
|
|
||||||
|
List<String> altLabelList = new ArrayList<String>();
|
||||||
|
//if language tag is specified, get node values matching that language tag
|
||||||
|
if (StringUtils.isNotEmpty(langTagValue)) {
|
||||||
|
altLabelList = getValuesFromXMLNodes(doc,
|
||||||
|
//TODO: Check if xml:lang or a different version should be used
|
||||||
|
getAltLabelTag(relationshipHash), "xml:lang", langTagValue);
|
||||||
|
} else {
|
||||||
|
altLabelList = getValuesFromXMLNodes(doc,
|
||||||
|
getAltLabelTag(relationshipHash), null);
|
||||||
|
}
|
||||||
|
concept.setAltLabelList(altLabelList);
|
||||||
|
|
||||||
|
|
||||||
|
//Broder, narrower, exact match, and close match properties
|
||||||
|
List<String> broaderURIList = getValuesFromXMLNodes(doc,
|
||||||
|
getBroaderTag(relationshipHash), "rdf:resource");
|
||||||
|
concept.setBroaderURIList(broaderURIList);
|
||||||
|
List<String> narrowerURIList = getValuesFromXMLNodes(doc,
|
||||||
|
getNarrowerTag(relationshipHash), "rdf:resource");
|
||||||
|
concept.setNarrowerURIList(narrowerURIList);
|
||||||
|
|
||||||
|
List<String> exactMatchURIList = getValuesFromXMLNodes(doc,
|
||||||
|
getExactMatchTag(relationshipHash), "rdf:resource");
|
||||||
|
concept.setExactMatchURIList(exactMatchURIList);
|
||||||
|
List<String> closeMatchURIList = getValuesFromXMLNodes(doc,
|
||||||
|
getCloseMatchTag(relationshipHash), "rdf:resource");
|
||||||
|
concept.setCloseMatchURIList(closeMatchURIList);
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("error occurred in parsing " + results, e);
|
||||||
|
} catch (SAXException e) {
|
||||||
|
log.error("error occurred in parsing " + results, e);
|
||||||
|
} catch (ParserConfigurationException e) {
|
||||||
|
log.error("error occurred in parsing " + results, e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return concept;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// Default to English for search results but this should be made
|
||||||
|
// configurable
|
||||||
|
public static List<String> getValuesFromXMLNodes(Document doc,
|
||||||
|
String tagName, String attributeName) {
|
||||||
|
return getValuesFromXMLNodes(doc, tagName, attributeName, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<String> getValuesFromXMLNodes(Document doc,
|
||||||
|
String tagName, String attributeName, String matchAttributeValue) {
|
||||||
|
NodeList nodes = doc.getElementsByTagName(tagName);
|
||||||
|
|
||||||
|
return getValuesFromXML(nodes, attributeName, matchAttributeValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns list of values based on nodes and whether or not a specific
|
||||||
|
// attribute name should be used or just the text content
|
||||||
|
// Attribute name returns the value for the attribute on the node
|
||||||
|
// MatchAttributeValue: returns NODE values that MATCH this value for
|
||||||
|
// attributeName
|
||||||
|
public static List<String> getValuesFromXML(NodeList nodes,
|
||||||
|
String attributeName, String matchAttributeValue) {
|
||||||
|
int len = nodes.getLength();
|
||||||
|
int i;
|
||||||
|
|
||||||
|
List<String> values = new ArrayList<String>();
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
Node node = nodes.item(i);
|
||||||
|
// If no attribute name specified, then get the node content
|
||||||
|
if (StringUtils.isEmpty(attributeName)) {
|
||||||
|
values.add(node.getTextContent());
|
||||||
|
} else {
|
||||||
|
// Attribute name is specified
|
||||||
|
// Get the value for the attribute itself
|
||||||
|
String attributeValue = getAttributeValue(attributeName, node);
|
||||||
|
// If no matching value for attribute specified, return the
|
||||||
|
// value of the attribute itself
|
||||||
|
// e.g. value of "lang" attribute which is "en"
|
||||||
|
if (StringUtils.isEmpty(matchAttributeValue)) {
|
||||||
|
values.add(attributeValue);
|
||||||
|
} else {
|
||||||
|
// match attribute and match value are both specified, so
|
||||||
|
// return NODE value that matches attribute value for given
|
||||||
|
// attribute name
|
||||||
|
// e.g. preferred label node value where lang = "en"
|
||||||
|
if (attributeValue.equals(matchAttributeValue)) {
|
||||||
|
values.add(node.getTextContent());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return values;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getAttributeValue(String attributeName, Node node) {
|
||||||
|
NamedNodeMap attrs = node.getAttributes();
|
||||||
|
Attr a = (Attr) attrs.getNamedItem(attributeName);
|
||||||
|
if (a != null) {
|
||||||
|
return a.getTextContent();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The Hash will depend on the particular RDF results
|
||||||
|
// TODO: Refactor this in a better method
|
||||||
|
public static HashMap<String, String> getRelationshipHash(String tagset) {
|
||||||
|
HashMap<String, String> relationshipHash = new HashMap<String, String>();
|
||||||
|
String[] tagsArray = { "prefLabel", "altLabel", "broader", "narrower",
|
||||||
|
"exactMatch", "closeMatch" };
|
||||||
|
List<String> tags = Arrays.asList(tagsArray);
|
||||||
|
|
||||||
|
switch (tagset) {
|
||||||
|
case "xmlns":
|
||||||
|
for (String tag : tags) {
|
||||||
|
relationshipHash.put(tag, tag);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case "abbreviated":
|
||||||
|
for (String tag : tags) {
|
||||||
|
relationshipHash.put(tag, "skos:" + tag);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return relationshipHash;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getPrefLabelTag(
|
||||||
|
HashMap<String, String> relationshipHash) {
|
||||||
|
return relationshipHash.get("prefLabel");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getAltLabelTag(HashMap<String, String> relationshipHash) {
|
||||||
|
return relationshipHash.get("altLabel");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getCloseMatchTag(
|
||||||
|
HashMap<String, String> relationshipHash) {
|
||||||
|
return relationshipHash.get("closeMatch");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getExactMatchTag(
|
||||||
|
HashMap<String, String> relationshipHash) {
|
||||||
|
return relationshipHash.get("exactMatch");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getBroaderTag(HashMap<String, String> relationshipHash) {
|
||||||
|
return relationshipHash.get("broader");
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getNarrowerTag(HashMap<String, String> relationshipHash) {
|
||||||
|
return relationshipHash.get("narrower");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue