VIVO-1023 expanded Agrovoc results and replaced XML parsing with RDF model parsing for both LCSH and Agrovoc

This commit is contained in:
hudajkhan 2015-04-24 12:38:46 -04:00
parent e8a2bf60b3
commit 583423495e
3 changed files with 582 additions and 551 deletions

View file

@ -2,7 +2,6 @@
package edu.cornell.mannlib.semservices.service.impl;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
@ -14,12 +13,18 @@ import java.net.URL;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.rpc.ServiceException;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import net.sf.json.JSONSerializer;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -48,142 +53,183 @@ import edu.cornell.mannlib.semservices.util.SKOSUtils;
import edu.cornell.mannlib.semservices.util.XMLUtils;
import edu.cornell.mannlib.vitro.webapp.web.URLEncoder;
public class AgrovocService implements ExternalConceptService {
public class AgrovocService implements ExternalConceptService {
protected final Log logger = LogFactory.getLog(getClass());
private java.lang.String AgrovocWS_address = "http://agrovoc.fao.org/axis/services/SKOSWS";
private final String schemeUri = "http://aims.fao.org/aos/agrovoc/agrovocScheme";
private final String ontologyName = "agrovoc";
private final String format = "SKOS";
private final String lang = "en";
private final String searchMode = "starts with";//Used to be Exact Match, or exact word or starts with
protected final String dbpedia_endpoint = " http://dbpedia.org/sparql";
// URL to get all the information for a concept
protected final Log logger = LogFactory.getLog(getClass());
private java.lang.String AgrovocWS_address = "http://agrovoc.fao.org/axis/services/SKOSWS";
private final String schemeUri = "http://aims.fao.org/aos/agrovoc/agrovocScheme";
private final String ontologyName = "agrovoc";
private final String format = "SKOS";
private final String lang = "en";
private final String searchMode = "Exact Match";
protected final String dbpedia_endpoint = " http://dbpedia.org/sparql";
//URL to get all the information for a concept
protected final String conceptSkosMosURL = "http://aims.fao.org/skosmos/rest/v1/agrovoc/data?";
protected final String conceptSkosMosBase = "http://aims.fao.org/skosmos/rest/v1/";
protected final String conceptsSkosMosSearch = conceptSkosMosBase + "search?";
protected final String conceptSkosMosURL = conceptSkosMosBase + "/agrovoc/data?";
@Override
public List<Concept> getConcepts(String term) throws Exception {
List<Concept> conceptList = new ArrayList<Concept>();
String result = getURIByTermAndLangXML(this.ontologyName, term, this.searchMode, this.format, this.lang);
// return empty conceptList if conceptUri is empty
if (StringUtils.isEmpty(result)) {
return conceptList;
}
// Get the concept URI
String conceptUri = getConceptURIFromRDF(result);
// return empty conceptList if conceptUri is empty
if (StringUtils.isEmpty(conceptUri)) {
return conceptList;
}
URI uri = null;
try {
uri = new URI(conceptUri);
} catch (URISyntaxException e) {
logger.error("Error occurred with creating the URI ", e);
//For the RDF webservices mechanism, utilize the following
/*
String result = getTermExpansion(this.ontologyName, term,
this.searchMode, this.format, this.lang);
// return empty conceptList if conceptUri is empty
if (StringUtils.isEmpty(result)) {
return conceptList;
}
// Get the list of the concept URIs in the RDF
List<String> conceptUris = getConceptURIsListFromRDF(result);
*/
//Returns concept information in the format specified, which is currently XML
//Utilizing Agrovoc's getConceptInfo returns alternate and preferred labels but
//none of the exact match or close match descriptions
Concept c = this.createConcept("true", conceptUri);
if(c != null) {
//Get definition from dbpedia references stored in the close Match list
List<String> closeMatches = c.getCloseMatchURIList();
for(String closeMatch: closeMatches) {
if (closeMatch.startsWith("http://dbpedia.org")) {
String description = getDbpediaDescription(closeMatch);
//System.out.println("description: "+ description);
c.setDefinition(description);
}
//For the SKOSMos search mechanism, utilize this instead
String result = getSKOSMosSearchResults(term, this.lang);
List<String> conceptUris = getConceptURIsListFromSkosMosResult(result);
if (conceptUris.size() == 0)
return conceptList;
int conceptCounter = 0;
HashSet<String> encounteredURI = new HashSet<String>();
// Loop through each of these URIs and load using the SKOSManager
for (String conceptUri : conceptUris) {
conceptCounter++;
if (StringUtils.isEmpty(conceptUri)) {
// If the conceptURI is empty, keep going
continue;
}
if(encounteredURI.contains(conceptUri)) {
//If we have already encountered this concept URI, do not redisplay or reprocess
continue;
}
encounteredURI.add(conceptUri);
// Test and see if the URI is valid
URI uri = null;
try {
uri = new URI(conceptUri);
} catch (URISyntaxException e) {
logger.error("Error occurred with creating the URI ", e);
continue;
}
// Returns concept information in the format specified, which is
// currently XML
// Utilizing Agrovoc's getConceptInfo returns alternate and
// preferred labels but
// none of the exact match or close match descriptions
String bestMatch = "false";
//Assume the first result is considered the 'best match'
//Although that is not something we are actually retrieving from the service itself explicitly
if(conceptCounter == 1) {
bestMatch = "true";
}
Concept c = this.createConcept(bestMatch, conceptUri);
if (c != null) {
// Get definition from dbpedia references stored in the close
// Match list
List<String> closeMatches = c.getCloseMatchURIList();
for (String closeMatch : closeMatches) {
if (closeMatch.startsWith("http://dbpedia.org")) {
try {
String description = getDbpediaDescription(closeMatch);
// System.out.println("description: "+ description);
c.setDefinition(description);
} catch (Exception ex) {
logger.error("An error occurred in the process of retrieving dbpedia description", ex);
}
}
}
conceptList.add(c);
}
conceptList.add(c);
}
return conceptList;
}
public List<Concept> processResults(String term) throws Exception {
return getConcepts(term);
}
public Concept createConcept(String bestMatch, String skosConceptURI) {
Concept concept = new Concept();
concept.setUri(skosConceptURI);
concept.setConceptId(stripConceptId(skosConceptURI));
concept.setBestMatch(bestMatch);
concept.setDefinedBy(schemeUri);
concept.setSchemeURI(this.schemeUri);
concept.setType("");
String encodedURI = URLEncoder.encode(skosConceptURI);
String encodedFormat = URLEncoder.encode("application/rdf+xml");
String url = conceptSkosMosURL + "uri=" + encodedURI + "&format="+ encodedFormat;
//Utilize the XML directly instead of the SKOS API
public List<Concept> processResults(String term) throws Exception {
return getConcepts(term);
}
public Concept createConcept(String bestMatch, String skosConceptURI) {
Concept concept = new Concept();
concept.setUri(skosConceptURI);
concept.setConceptId(stripConceptId(skosConceptURI));
concept.setBestMatch(bestMatch);
concept.setDefinedBy(schemeUri);
concept.setSchemeURI(this.schemeUri);
concept.setType("");
String encodedURI = URLEncoder.encode(skosConceptURI);
String encodedFormat = URLEncoder.encode("application/rdf+xml");
String url = conceptSkosMosURL + "uri=" + encodedURI + "&format="
+ encodedFormat;
// Utilize the XML directly instead of the SKOS API
try {
concept = SKOSUtils.createConceptUsingXMLFromURI(concept, url, "en");
} catch(Exception ex) {
logger.debug("Error occurred for creating concept " + skosConceptURI, ex);
concept = SKOSUtils
.createConceptUsingXMLFromURL(concept, url, "en", false);
} catch (Exception ex) {
logger.debug("Error occurred for creating concept "
+ skosConceptURI, ex);
return null;
}
return concept;
}
@Deprecated
protected String getTermcodeByTerm(String term) throws Exception {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getTermcodeByTerm(term);
} catch (ServiceException e) {
logger.error("service exception", e);
throw e;
} catch (RemoteException e) {
logger.error("remote exception", e);
throw e;
} catch (MalformedURLException e) {
logger.error("malformed URL exception", e);
throw e;
}
return result;
}
protected String getTermCodeByTermAndLangXML(String ontologyName, String searchString, String lang, String codeName, String format) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getTermCodeByTermAndLangXML(ontologyName, searchString, lang, codeName, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
@Deprecated
protected String getTermcodeByTerm(String term) throws Exception {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getTermcodeByTerm(term);
} catch (ServiceException e) {
logger.error("service exception", e);
throw e;
} catch (RemoteException e) {
logger.error("remote exception", e);
throw e;
} catch (MalformedURLException e) {
logger.error("malformed URL exception", e);
throw e;
}
return result;
}
protected String getTermCodeByTermAndLangXML(String ontologyName,
String searchString, String lang, String codeName, String format) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getTermCodeByTermAndLangXML(ontologyName,
searchString, lang, codeName, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getURIByTermAndLangXML(String ontologyName, String term,
String searchMode, String format, String lang) {
@ -205,102 +251,122 @@ public class AgrovocService implements ExternalConceptService {
return result;
}
// Creating method for term expansion
protected String getTermExpansion(String ontologyName, String term,
String searchMode, String format, String lang) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getTermExpansion(ontologyName, term,
format, searchMode, lang); // the actual call has this order
// for parameters
} catch (ServiceException e) {
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
protected String getConceptInfoByTermcodeXML(String termcode, String format) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getConceptInfoByTermcodeXML(termcode, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
return result;
}
protected String getConceptByKeyword(String ontologyName, String searchString, String format, String searchMode, String lang) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getConceptByKeyword(ontologyName, searchString, format, searchMode, lang);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
protected String getConceptInfoByTermcodeXML(String termcode, String format) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getConceptInfoByTermcodeXML(termcode,
format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
return result;
}
protected String getWsdl() {
String result = new String();
try {
protected String getConceptByKeyword(String ontologyName,
String searchString, String format, String searchMode, String lang) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getConceptByKeyword(ontologyName,
searchString, format, searchMode, lang);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
StringWriter sw = new StringWriter();
URL rss = new URL(this.AgrovocWS_address + "?wsdl");
return result;
}
BufferedReader in = new BufferedReader(new InputStreamReader(rss.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
sw.write(inputLine);
}
in.close();
protected String getWsdl() {
String result = new String();
try {
result = sw.toString();
StringWriter sw = new StringWriter();
URL rss = new URL(this.AgrovocWS_address + "?wsdl");
} catch (Exception ex) {
logger.error("error occurred in servlet", ex);
}
return result;
}
BufferedReader in = new BufferedReader(new InputStreamReader(
rss.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
sw.write(inputLine);
}
in.close();
result = sw.toString();
} catch (Exception ex) {
logger.error("error occurred in servlet", ex);
}
return result;
}
public List<Concept> getConceptsByURIWithSparql(String uri)
throws Exception {
// deprecating this method...just return an empty list
List<Concept> conceptList = new ArrayList<Concept>();
return conceptList;
}
protected String getAgrovocTermCode(String rdf) throws Exception {
String termcode = new String();
try {
Document doc = XMLUtils.parse(rdf);
NodeList nodes = doc.getElementsByTagName("hasCodeAgrovoc");
if (nodes.item(0) != null) {
Node node = nodes.item(0);
termcode = node.getTextContent();
}
} catch (SAXException e) {
// e.printStackTrace();
throw e;
} catch (ParserConfigurationException e) {
// e.printStackTrace();
throw e;
} catch (IOException e) {
// e.printStackTrace();
throw e;
}
return termcode;
}
public List<Concept> getConceptsByURIWithSparql(String uri) throws Exception {
// deprecating this method...just return an empty list
List<Concept> conceptList = new ArrayList<Concept>();
return conceptList;
}
protected String getAgrovocTermCode(String rdf) throws Exception {
String termcode = new String();
try {
Document doc = XMLUtils.parse(rdf);
NodeList nodes = doc.getElementsByTagName("hasCodeAgrovoc");
if (nodes.item(0) != null) {
Node node = nodes.item(0);
termcode = node.getTextContent();
}
} catch (SAXException e) {
//e.printStackTrace();
throw e;
} catch (ParserConfigurationException e) {
//e.printStackTrace();
throw e;
} catch (IOException e) {
//e.printStackTrace();
throw e;
}
return termcode;
}
protected String getConceptURIFromRDF(String rdf) {
String conceptUri = new String();
try {
@ -311,21 +377,53 @@ public class AgrovocService implements ExternalConceptService {
NamedNodeMap attrs = node.getAttributes();
Attr idAttr = (Attr) attrs.getNamedItem("rdf:about");
conceptUri = idAttr.getTextContent();
} catch (IOException e) {
} catch (IOException e) {
e.printStackTrace();
System.err.println("rdf: "+rdf);
} catch (SAXException e) {
System.err.println("rdf: " + rdf);
} catch (SAXException e) {
e.printStackTrace();
System.err.println("rdf: "+rdf);
} catch (ParserConfigurationException e) {
System.err.println("rdf: " + rdf);
} catch (ParserConfigurationException e) {
e.printStackTrace();
System.err.println("rdf: "+rdf);
System.err.println("rdf: " + rdf);
}
return conceptUri;
}
protected String getDbpediaDescription(String uri) throws Exception{
// When utilizing the getTermExpansion method, will get a list of URIs back
// and not just one URI
protected List<String> getConceptURIsListFromRDF(String rdf) {
List<String> conceptUris = new ArrayList<String>();
try {
Document doc = XMLUtils.parse(rdf);
NodeList nodes = doc.getElementsByTagName("skos:Concept");
int numberNodes = nodes.getLength();
int n;
for (n = 0; n < numberNodes; n++) {
Node node = nodes.item(n);
NamedNodeMap attrs = node.getAttributes();
Attr idAttr = (Attr) attrs.getNamedItem("rdf:about");
String conceptUri = idAttr.getTextContent();
conceptUris.add(conceptUri);
}
} catch (IOException e) {
e.printStackTrace();
System.err.println("rdf: " + rdf);
} catch (SAXException e) {
e.printStackTrace();
System.err.println("rdf: " + rdf);
} catch (ParserConfigurationException e) {
e.printStackTrace();
System.err.println("rdf: " + rdf);
}
return conceptUris;
}
protected String getDbpediaDescription(String uri) throws Exception {
String descriptionSource = " (Source: DBpedia)";
String description = new String();
String qs = ""
@ -336,7 +434,7 @@ public class AgrovocService implements ExternalConceptService {
+ "SELECT DISTINCT ?description WHERE { \n" + "<" + uri
+ "> rdfs:comment ?description . \n"
+ "FILTER (LANG(?description)='en' ) \n" + "}";
//System.out.println(qs);
// System.out.println(qs);
List<HashMap> resultList = new ArrayList<HashMap>();
QueryExecution qexec = null;
try {
@ -378,52 +476,119 @@ public class AgrovocService implements ExternalConceptService {
} catch (Exception ex) {
throw ex;
}
//Adding source so it is clear that this description comes from DBPedia
// Adding source so it is clear that this description comes from DBPedia
return description + descriptionSource;
}
/**
* @param uri
* @return
*/
protected String stripConceptId(String uri) {
String conceptId = new String();
int lastslash = uri.lastIndexOf('/');
conceptId = uri.substring(lastslash + 1, uri.length());
return conceptId;
}
/**
* @param str
* @return
*/
protected String extractConceptId(String str) {
try {
return str.substring(1, str.length() - 1);
} catch (Exception ex) {
return "";
}
}
//Get concept using agrovoc service
protected String getConceptInfoByURI(String ontologyName, String conceptURI, String format) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getConceptByURI(ontologyName, conceptURI, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
/**
* @param uri
* @return
*/
protected String stripConceptId(String uri) {
String conceptId = new String();
int lastslash = uri.lastIndexOf('/');
conceptId = uri.substring(lastslash + 1, uri.length());
return conceptId;
}
return result;
}
/**
* @param str
* @return
*/
protected String extractConceptId(String str) {
try {
return str.substring(1, str.length() - 1);
} catch (Exception ex) {
return "";
}
}
// Get concept using agrovoc service
protected String getConceptInfoByURI(String ontologyName,
String conceptURI, String format) {
String result = new String();
ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
ACSWWebService agrovoc_service = locator.getACSWWebService(url);
result = agrovoc_service.getConceptByURI(ontologyName, conceptURI,
format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
/**
* The code here utilizes the SKOSMOS REST API for Agrovoc
* This returns JSON LD so we would parse JSON instead of RDF
* The code above can still be utilized if we need to employ the web services directly
*/
//Get search results for a particular term and language code
private String getSKOSMosSearchResults(String term, String lang) {
String urlEncodedTerm = URLEncoder.encode(term);
//Utilize 'starts with' using the * operator at the end
String searchUrlString = this.conceptsSkosMosSearch + "query=" + urlEncodedTerm + "*" + "&lang=" + lang;
URL searchURL = null;
try {
searchURL = new URL(searchUrlString);
} catch (Exception e) {
logger.error("Exception occurred in instantiating URL for "
+ searchUrlString, e);
// If the url is having trouble, just return null for the concept
return null;
}
String results = null;
try {
StringWriter sw = new StringWriter();
BufferedReader in = new BufferedReader(new InputStreamReader(
searchURL.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
sw.write(inputLine);
}
in.close();
results = sw.toString();
logger.debug(results);
} catch (Exception ex) {
logger.error("Error occurred in getting concept from the URL "
+ searchUrlString, ex);
return null;
}
return results;
}
//JSON-LD array
private List<String> getConceptURIsListFromSkosMosResult(String results) {
List<String> conceptURIs = new ArrayList<String>();
JSONObject json = (JSONObject) JSONSerializer.toJSON(results);
//Format should be: { ..."results":["uri":uri...]
if (json.containsKey("results")) {
JSONArray jsonArray = json.getJSONArray("results");
int numberResults = jsonArray.size();
int i;
for(i = 0; i < numberResults; i++) {
JSONObject jsonObject = jsonArray.getJSONObject(i);
if(jsonObject.containsKey("uri")) {
conceptURIs.add(jsonObject.getString("uri"));
}
}
}
return conceptURIs;
}
}

View file

@ -41,16 +41,8 @@ public class LCSHService implements ExternalConceptService {
private final String schemeUri = hostUri + "/authorities/subjects";
private final String baseUri = hostUri + "/search/";
protected final String dbpedia_endpoint = " http://dbpedia.org/sparql";
//Property uris used for SKOS
protected final String SKOSNotePropertyURI = "http://www.w3.org/2004/02/skos/core#note";
protected final String SKOSPrefLabelURI = "http://www.w3.org/2004/02/skos/core#prefLabel";
protected final String SKOSAltLabelURI = "http://www.w3.org/2008/05/skos-xl#altLabel";
protected final String SKOSBroaderURI = "http://www.w3.org/2004/02/skos/core#broader";
protected final String SKOSNarrowerURI = "http://www.w3.org/2004/02/skos/core#narrower";
protected final String SKOSExactMatchURI = "http://www.w3.org/2004/02/skos/core#exactMatch";
protected final String SKOSCloseMatchURI = "http://www.w3.org/2004/02/skos/core#closeMatch";
@Override
public List<Concept> getConcepts(String term) throws Exception {
List<Concept> conceptList = new ArrayList<Concept>();
@ -95,12 +87,7 @@ public class LCSHService implements ExternalConceptService {
// that might exist
private List<Concept> processOutput(String results) throws Exception {
List<Concept> conceptList = new ArrayList<Concept>();
//SKOSManager manager = new SKOSManager();
// Get uris from the results
// Properties we will be querying for
//SKOSDataFactory sdf = manager.getSKOSDataFactory();
List<String> uris = getConceptURIFromXML(results);
String bestMatch = "true";
int i = 0;
@ -109,17 +96,19 @@ public class LCSHService implements ExternalConceptService {
bestMatch = "false";
}
log.debug("-" + uri + "-");
String conceptUriString = getSKOSURI(uri);
//This is the URL for retrieving the concept - the pattern is http://id.loc.gov/authorities/subjects/sh85014203.skos.rdf
//This is not the URI itself which would be http://id.loc.gov/authorities/subjects/sh85014203
String conceptURLString = getSKOSURL(uri);
String baseConceptURI = getConceptURI(uri);
URI conceptURI = null;
URL conceptURL = null;
try {
conceptURI = new URI(conceptUriString);
} catch (URISyntaxException e) {
log.error("URI syntax exception in trying to get concept uri " + conceptUriString, e);
conceptURL = new URL(conceptURLString);
} catch (Exception e) {
log.error("Error in trying to retrieve concept " + conceptURLString, e);
return conceptList;
}
log.debug("loading concept uri " + conceptUriString);
Concept c = this.createConcept(bestMatch, conceptUriString);
log.debug("loading concept uri " + conceptURLString);
Concept c = this.createConcept(bestMatch, conceptURLString, baseConceptURI);
if(c != null) {
conceptList.add(c);
}
@ -133,7 +122,7 @@ public class LCSHService implements ExternalConceptService {
//Load individual concept using a request
//private
public Concept createConcept(String bestMatch, String skosConceptURI) {
public Concept createConcept(String bestMatch, String conceptURLString, String skosConceptURI) {
Concept concept = new Concept();
@ -150,7 +139,8 @@ public class LCSHService implements ExternalConceptService {
//Utilize the XML directly instead of the SKOS API
try {
//LCSH doesn't need a language tag right now as results in english
concept = SKOSUtils.createConceptUsingXMLFromURI(concept, skosConceptURI, null);
//Also want to add skos notes as definition
concept = SKOSUtils.createConceptUsingXMLFromURL(concept, conceptURLString, null, true);
} catch(Exception ex) {
log.debug("Error occurred for annotation retrieval for skos concept " + skosConceptURI, ex);
@ -163,7 +153,7 @@ public class LCSHService implements ExternalConceptService {
private String getSKOSURI(String uri) {
private String getSKOSURL(String uri) {
// Strip .xml at the end and replace with .skos.rdf
String skosURI = uri;
if (uri.endsWith(".xml")) {

View file

@ -7,26 +7,24 @@
package edu.cornell.mannlib.semservices.util;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import edu.cornell.mannlib.semservices.bo.Concept;
@ -69,37 +67,61 @@ public class SKOSUtils {
}
// Downloading the XML from the URI itself
//No language tag support here but can be specified if need be at this level as well
public static Concept createConceptUsingXMLFromURI(Concept concept,
String conceptUriString, String langTagValue) {
String results = getConceptXML(conceptUriString);
// No language tag support here but can be specified if need be at this
// level as well
public static Concept createConceptUsingXMLFromURL(Concept concept,
String conceptURLString, String langTagValue, boolean addNotes) {
String results = getConceptXML(conceptURLString);
if (StringUtils.isEmpty(results)) {
return null;
}
return createConceptUsingXML(concept, results, langTagValue);
// return createConceptUsingXML(concept, results, langTagValue);
return createConceptUsingXMLModel(concept, results, langTagValue,
addNotes);
}
// Because of the fact the xml returns matches by tag name, and the XML may
// look like <skos:narrower><skos:Concept ..><skos:broader
// rdf:resource:"conceptURI">
// where conceptURI is the concept that is the subject of skos:narrower, we
// need to ensure we are not returning the same uri as that of the main
// concept
public static List<String> removeConceptURIFromList(List<String> uris,
String conceptURI) {
// remove will return a boolean if the value exists in the list and is
// removed
// if/when it returns false, the URI is not in the list
while (uris.remove(conceptURI)) {
}
;
return uris;
}
/**
* The above code, although functional, does not take advantage of the fact
* that we can actually read and query the RDF in precisely the manner we
* wish.
*/
// Create concept given the actual XML (results_
// Lang tag value, if populated, will return pref label and alt label which
// match that language tag value
public static Concept createConceptUsingXML(Concept concept,
String results, String langTagValue) {
public static Concept createConceptUsingXMLModel(Concept concept,
String results, String langTagValue, boolean addNotes) {
HashMap<String, String> relationshipHash = getRelationshipHash();
try {
Document doc = XMLUtils.parse(results);
// Preferred label
List<String> labelLiterals = new ArrayList<String>();
if (StringUtils.isNotEmpty(langTagValue)) {
labelLiterals = getValuesFromXMLNodes(doc,
getPrefLabelTag(relationshipHash), "xml:lang", langTagValue);
} else {
labelLiterals = getValuesFromXMLNodes(doc,
getPrefLabelTag(relationshipHash), null);
}
String conceptURI = concept.getUri();
// Load Model from RDF
StringReader reader = new StringReader(results);
Model model = ModelFactory.createDefaultModel();
model.read(reader, null, "RDF/XML");
// Execute the following query to get the information we want for
// this resource
// Preferred label
List<String> labelLiterals = getPrefLabelsFromModel(conceptURI,
model, langTagValue);
if (labelLiterals.size() > 0) {
concept.setLabel(labelLiterals.get(0));
} else {
@ -110,281 +132,135 @@ public class SKOSUtils {
// Alternate label
List<String> altLabelList = new ArrayList<String>();
//if language tag is specified, get node values matching that language tag
if (StringUtils.isNotEmpty(langTagValue)) {
altLabelList = getValuesFromXMLNodes(doc,
//TODO: Check if xml:lang or a different version should be used
getAltLabelTag(relationshipHash), "xml:lang", langTagValue);
} else {
altLabelList = getValuesFromXMLNodes(doc,
getAltLabelTag(relationshipHash), null);
}
List<String> altLabelList = getAltLabelsFromModel(conceptURI,
model, langTagValue);
concept.setAltLabelList(altLabelList);
//Broder, narrower, exact match, and close match properties
String conceptURI = concept.getUri();
List<String> broaderURIList = getBroaderOrNarrowerURIs(doc, getBroaderTag(relationshipHash));
broaderURIList = removeConceptURIFromList(broaderURIList, conceptURI);
// Broder, narrower, exact match, and close match properties
List<String> broaderURIList = getBroaderURIsFromModel(conceptURI,
model);
// broaderURIList = removeConceptURIFromList(broaderURIList,
// conceptURI);
concept.setBroaderURIList(broaderURIList);
List<String> narrowerURIList = getBroaderOrNarrowerURIs(doc, getNarrowerTag(relationshipHash));
narrowerURIList = removeConceptURIFromList(narrowerURIList, conceptURI);
List<String> narrowerURIList = getNarrowerURIsFromModel(conceptURI,
model);
// narrowerURIList = removeConceptURIFromList(narrowerURIList,
// conceptURI);
concept.setNarrowerURIList(narrowerURIList);
List<String> exactMatchURIList = getCloseOrExactMatchURIs(doc, getExactMatchTag(relationshipHash));
exactMatchURIList = removeConceptURIFromList(exactMatchURIList, conceptURI);
List<String> exactMatchURIList = getExactMatchURIsFromModel(
conceptURI, model);
// exactMatchURIList = removeConceptURIFromList(exactMatchURIList,
// conceptURI);
concept.setExactMatchURIList(exactMatchURIList);
List<String> closeMatchURIList = getCloseOrExactMatchURIs(doc, getCloseMatchTag(relationshipHash));
closeMatchURIList = removeConceptURIFromList(closeMatchURIList, conceptURI);
List<String> closeMatchURIList = getCloseMatchURIsFromModel(
conceptURI, model);
// closeMatchURIList = removeConceptURIFromList(closeMatchURIList,
// conceptURI);
concept.setCloseMatchURIList(closeMatchURIList);
} catch (IOException e) {
log.error("error occurred in parsing " + results, e);
} catch (SAXException e) {
log.error("error occurred in parsing " + results, e);
} catch (ParserConfigurationException e) {
// Notes may exist, in which case they should be employed
if (addNotes) {
List<String> notes = getNotesFromModel(conceptURI, model,
langTagValue);
if (notes.size() > 0) {
concept.setDefinition(notes.get(0));
}
}
} catch (Exception e) {
log.error("error occurred in parsing " + results, e);
}
return concept;
}
//Because of the fact the xml returns matches by tag name, and the XML may look like <skos:narrower><skos:Concept ..><skos:broader rdf:resource:"conceptURI">
//where conceptURI is the concept that is the subject of skos:narrower, we need to ensure we are not returning the same uri as that of the main concept
public static List<String> removeConceptURIFromList(List<String> uris, String conceptURI) {
//remove will return a boolean if the value exists in the list and is removed
//if/when it returns false, the URI is not in the list
while(uris.remove(conceptURI)) {};
return uris;
private static List<String> getPrefLabelsFromModel(String conceptURI,
Model model, String langTagValue) {
String propertyURI = "http://www.w3.org/2004/02/skos/core#prefLabel";
return getLabelsFromModel(conceptURI, propertyURI, model, langTagValue);
}
// Default to English for search results but this should be made
// configurable
public static List<String> getValuesFromXMLNodes(Document doc,
String tagName, String attributeName) {
return getValuesFromXMLNodes(doc, tagName, attributeName, null);
private static List<String> getAltLabelsFromModel(String conceptURI,
Model model, String langTagValue) {
String propertyURI = "http://www.w3.org/2004/02/skos/core#altLabel";
return getLabelsFromModel(conceptURI, propertyURI, model, langTagValue);
}
public static List<String> getValuesFromXMLNodes(Document doc,
String tagName, String attributeName, String matchAttributeValue) {
NodeList nodes = doc.getElementsByTagName(tagName);
return getValuesFromXML(nodes, attributeName, matchAttributeValue);
}
// Returns list of values based on nodes and whether or not a specific
// attribute name should be used or just the text content
// Attribute name returns the value for the attribute on the node
// MatchAttributeValue: returns NODE values that MATCH this value for
// attributeName
//Extending this based on specific SKOSMos search for Agrovoc, sometimes
//results in format <skos:closeMatch rdf:resource "x">, other times in format <skos:closeMatch> <rfd:Description rdf:about="matchURI">..
//closeMatch and exactMatch use these patterns
//broader and narrower may be either rdf:resource pattern above or <skos:broader><skos:Concept rdf:about="...">...etc.
public static List<String> getValuesFromXML(NodeList nodes,
String attributeName, String matchAttributeValue) {
int len = nodes.getLength();
int i;
List<String> values = new ArrayList<String>();
for (i = 0; i < len; i++) {
Node node = nodes.item(i);
String nodeValue = getNodeValue(node, attributeName, matchAttributeValue);
if(StringUtils.isNotEmpty(nodeValue)) {
values.add(nodeValue);
}
}
return values;
}
public static String getNodeValue(Node node, String attributeName, String matchAttributeValue) {
String value = null;
if (StringUtils.isEmpty(attributeName)) {
value = node.getTextContent();
} else {
// Attribute name is specified
// Get the value for the attribute itself
String attributeValue = getAttributeValue(attributeName, node);
// If no matching value for attribute specified, return the
// value of the attribute itself
// e.g. value of "lang" attribute which is "en"
if (StringUtils.isEmpty(matchAttributeValue)) {
value = attributeValue;
} else {
// match attribute and match value are both specified, so
// return NODE value that matches attribute value for given
// attribute name
// e.g. preferred label node value where lang = "en"
if (attributeValue.equals(matchAttributeValue)) {
value = node.getTextContent();
private static List<String> getLabelsFromModel(String conceptURI,
String propertyURI, Model model, String langTagValue) {
List<String> labels = new ArrayList<String>();
StmtIterator statements = model.listStatements(
ResourceFactory.createResource(conceptURI),
ResourceFactory.createProperty(propertyURI), (RDFNode) null);
while (statements.hasNext()) {
Statement statement = statements.nextStatement();
RDFNode node = statement.getObject();
if (node != null && node.isLiteral()) {
String label = node.asLiteral().getString();
if (StringUtils.isNotEmpty(langTagValue)) {
String language = node.asLiteral().getLanguage();
if (language != null && language.equals(langTagValue)) {
labels.add(label);
}
} else {
labels.add(label);
}
}
}
return value;
}
public static String getAttributeValue(String attributeName, Node node) {
NamedNodeMap attrs = node.getAttributes();
Attr a = (Attr) attrs.getNamedItem(attributeName);
if (a != null) {
return a.getTextContent();
}
return null;
return labels;
}
// The Hash will depend on the particular RDF results
// TODO: Refactor this in a better method
public static HashMap<String, String> getRelationshipHash() {
HashMap<String, String> relationshipHash = new HashMap<String, String>();
String[] tagsArray = { "prefLabel", "altLabel", "broader", "narrower",
"exactMatch", "closeMatch" };
List<String> tags = Arrays.asList(tagsArray);
private static List<String> getNotesFromModel(String conceptURI,
Model model, String langTagValue) {
String propertyURI = "http://www.w3.org/2004/02/skos/core#note";
return getLabelsFromModel(conceptURI, propertyURI, model, langTagValue);
}
for (String tag : tags) {
relationshipHash.put(tag, "skos:" + tag);
}
return relationshipHash;
private static List<String> getCloseMatchURIsFromModel(String conceptURI,
Model model) {
String propertyURI = "http://www.w3.org/2004/02/skos/core#closeMatch";
return getRelatedURIsFromModel(conceptURI, propertyURI, model);
}
public static String getPrefLabelTag(
HashMap<String, String> relationshipHash) {
return relationshipHash.get("prefLabel");
private static List<String> getExactMatchURIsFromModel(String conceptURI,
Model model) {
String propertyURI = "http://www.w3.org/2004/02/skos/core#exactMatch";
return getRelatedURIsFromModel(conceptURI, propertyURI, model);
}
public static String getAltLabelTag(HashMap<String, String> relationshipHash) {
return relationshipHash.get("altLabel");
private static List<String> getNarrowerURIsFromModel(String conceptURI,
Model model) {
String propertyURI = "http://www.w3.org/2004/02/skos/core#narrower";
return getRelatedURIsFromModel(conceptURI, propertyURI, model);
}
public static String getCloseMatchTag(
HashMap<String, String> relationshipHash) {
return relationshipHash.get("closeMatch");
private static List<String> getBroaderURIsFromModel(String conceptURI,
Model model) {
String propertyURI = "http://www.w3.org/2004/02/skos/core#broader";
return getRelatedURIsFromModel(conceptURI, propertyURI, model);
}
public static String getExactMatchTag(
HashMap<String, String> relationshipHash) {
return relationshipHash.get("exactMatch");
}
private static List<String> getRelatedURIsFromModel(String conceptURI,
String propertyURI, Model model) {
List<String> URIs = new ArrayList<String>();
NodeIterator nodeIterator = model.listObjectsOfProperty(
ResourceFactory.createResource(conceptURI),
ResourceFactory.createProperty(propertyURI));
public static String getBroaderTag(HashMap<String, String> relationshipHash) {
return relationshipHash.get("broader");
}
public static String getNarrowerTag(HashMap<String, String> relationshipHash) {
return relationshipHash.get("narrower");
}
/**
*
* Broader, narrower, close match, and exact match may be nested values - e.g. Agrovoc
* Even with Agrovoc, they may be nested sometimes and not be nested other times
* The code below handles both situations so the URIs can be returned
*/
//Broader and narrower values
//Attribute name will be language tag
public static List<String> getBroaderOrNarrowerURIs(Document doc,
String tagName) {
NodeList nodes = doc.getElementsByTagName(tagName);
List<String> uris = getPossiblyNestedValuesFromXML(nodes, "rdf:resource", "skos:Concept", "rdf:about");
return uris;
}
//Close and exact match
public static List<String> getCloseOrExactMatchURIs(Document doc,
String tagName) {
NodeList nodes = doc.getElementsByTagName(tagName);
List<String> uris = getPossiblyNestedValuesFromXML(nodes, "rdf:resource", "rdf:Description", "rdf:about");
return uris;
}
public static List<String> getPossiblyNestedValuesFromXML(NodeList nodes, String nodeAttributeName, String childNodeTagName, String childNodeAttributeName ) {
int len = nodes.getLength();
int i;
List<String> values = new ArrayList<String>();
for (i = 0; i < len; i++) {
Node node = nodes.item(i);
//String nodeValue = getNodeValue(node, attributeName, matchAttributeValue);
String nodeValue = getPossiblyNestedNodeValue(node, nodeAttributeName, childNodeTagName, childNodeAttributeName);
if(StringUtils.isNotEmpty(nodeValue)) {
values.add(nodeValue);
while (nodeIterator.hasNext()) {
RDFNode node = nodeIterator.nextNode();
if (node.isResource() && node.asResource().getURI() != null) {
String URI = node.asResource().getURI();
URIs.add(URI);
}
}
return values;
}
//Given node = <tag attrb="attrbvalue">
//If tag has no attribute that matches attributeName with attributevalue
//and tag has nested children with a given tag name, i.e. <tag><nestedtag nestedattributename=nestedattributevalue>
//then retrieve the nested attribute value
//For example:
//if the node looks like <skos:closeMatch rdf:resource="x"> then get x
//but if the node looks like <skos:closeMatch><rdf:description rdf:about="x"> then get x
public static String getPossiblyNestedNodeValue(Node node, String nodeAttributeName, String childNodeTagName,
String childNodeAttributeName) {
String value = null;
String attributeValue = getAttributeValue(nodeAttributeName, node);
if(StringUtils.isNotEmpty(attributeValue)) {
value = attributeValue;
} else {
//Check child nodes and see if any of those have the same name as childNodeTagName
NodeList childNodes = node.getChildNodes();
int numberNodes = childNodes.getLength();
int i;
for(i = 0; i < numberNodes; i++) {
Node childNode = childNodes.item(i);
String nodeName = childNode.getNodeName();
if(nodeName.equals(childNodeTagName)) {
value = getAttributeValue(childNodeAttributeName, childNode);
break; //will only get the first one
}
}
}
return value;
}
//Custom cases for Agrovoc and/or similar patterns if they exist
//get about URI from <tag> <rdf:Description about="x"> - returns "x"
public static String getTagNestedAbout(Node n) {
NodeList childNodes = n.getChildNodes();
int numberNodes = childNodes.getLength();
int i;
for(i = 0; i < numberNodes; i++) {
Node childNode = childNodes.item(i);
String nodeName = childNode.getNodeName();
String aboutValue = getAttributeValue("about", childNode);
}
return null;
}
//get about URI from <tag><skos:Concept about="x">, returns "x"
public static String getTagNestedSKOSConceptAbout(Node n) {
NodeList childNodes = n.getChildNodes();
int numberNodes = childNodes.getLength();
int i;
for(i = 0; i < numberNodes; i++) {
Node childNode = childNodes.item(i);
String nodeName = childNode.getNodeName();
String aboutValue = getAttributeValue("about", childNode);
}
return null;
}
return URIs;
}
}