diff --git a/src/edu/cornell/mannlib/semservices/service/impl/AgrovocService.java b/src/edu/cornell/mannlib/semservices/service/impl/AgrovocService.java index 46baeac8..8f636c07 100644 --- a/src/edu/cornell/mannlib/semservices/service/impl/AgrovocService.java +++ b/src/edu/cornell/mannlib/semservices/service/impl/AgrovocService.java @@ -2,7 +2,6 @@ package edu.cornell.mannlib.semservices.service.impl; - import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; @@ -14,12 +13,18 @@ import java.net.URL; import java.rmi.RemoteException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.LinkedHashSet; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import javax.xml.rpc.ServiceException; +import net.sf.json.JSONArray; +import net.sf.json.JSONObject; +import net.sf.json.JSONSerializer; + import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -48,142 +53,183 @@ import edu.cornell.mannlib.semservices.util.SKOSUtils; import edu.cornell.mannlib.semservices.util.XMLUtils; import edu.cornell.mannlib.vitro.webapp.web.URLEncoder; -public class AgrovocService implements ExternalConceptService { +public class AgrovocService implements ExternalConceptService { + + protected final Log logger = LogFactory.getLog(getClass()); + private java.lang.String AgrovocWS_address = "http://agrovoc.fao.org/axis/services/SKOSWS"; + private final String schemeUri = "http://aims.fao.org/aos/agrovoc/agrovocScheme"; + private final String ontologyName = "agrovoc"; + private final String format = "SKOS"; + private final String lang = "en"; + private final String searchMode = "starts with";//Used to be Exact Match, or exact word or starts with + protected final String dbpedia_endpoint = " http://dbpedia.org/sparql"; + // URL to get all the information for a concept - protected final Log logger = LogFactory.getLog(getClass()); - private java.lang.String AgrovocWS_address = "http://agrovoc.fao.org/axis/services/SKOSWS"; - private final String schemeUri = "http://aims.fao.org/aos/agrovoc/agrovocScheme"; - private final String ontologyName = "agrovoc"; - private final String format = "SKOS"; - private final String lang = "en"; - private final String searchMode = "Exact Match"; - protected final String dbpedia_endpoint = " http://dbpedia.org/sparql"; - //URL to get all the information for a concept - protected final String conceptSkosMosURL = "http://aims.fao.org/skosmos/rest/v1/agrovoc/data?"; - + protected final String conceptSkosMosBase = "http://aims.fao.org/skosmos/rest/v1/"; + protected final String conceptsSkosMosSearch = conceptSkosMosBase + "search?"; + protected final String conceptSkosMosURL = conceptSkosMosBase + "/agrovoc/data?"; @Override public List getConcepts(String term) throws Exception { List conceptList = new ArrayList(); - String result = getURIByTermAndLangXML(this.ontologyName, term, this.searchMode, this.format, this.lang); - // return empty conceptList if conceptUri is empty - if (StringUtils.isEmpty(result)) { - return conceptList; - } - // Get the concept URI - String conceptUri = getConceptURIFromRDF(result); - - - // return empty conceptList if conceptUri is empty - if (StringUtils.isEmpty(conceptUri)) { - return conceptList; - } - URI uri = null; - try { - uri = new URI(conceptUri); - } catch (URISyntaxException e) { - logger.error("Error occurred with creating the URI ", e); + //For the RDF webservices mechanism, utilize the following + /* + String result = getTermExpansion(this.ontologyName, term, + this.searchMode, this.format, this.lang); + + // return empty conceptList if conceptUri is empty + if (StringUtils.isEmpty(result)) { return conceptList; } + + // Get the list of the concept URIs in the RDF + List conceptUris = getConceptURIsListFromRDF(result); + */ - //Returns concept information in the format specified, which is currently XML - //Utilizing Agrovoc's getConceptInfo returns alternate and preferred labels but - //none of the exact match or close match descriptions - - Concept c = this.createConcept("true", conceptUri); - if(c != null) { - //Get definition from dbpedia references stored in the close Match list - List closeMatches = c.getCloseMatchURIList(); - for(String closeMatch: closeMatches) { - - if (closeMatch.startsWith("http://dbpedia.org")) { - String description = getDbpediaDescription(closeMatch); - //System.out.println("description: "+ description); - c.setDefinition(description); - } + //For the SKOSMos search mechanism, utilize this instead + String result = getSKOSMosSearchResults(term, this.lang); + List conceptUris = getConceptURIsListFromSkosMosResult(result); + if (conceptUris.size() == 0) + return conceptList; + int conceptCounter = 0; + + HashSet encounteredURI = new HashSet(); + + // Loop through each of these URIs and load using the SKOSManager + for (String conceptUri : conceptUris) { + conceptCounter++; + if (StringUtils.isEmpty(conceptUri)) { + // If the conceptURI is empty, keep going + continue; + } + if(encounteredURI.contains(conceptUri)) { + //If we have already encountered this concept URI, do not redisplay or reprocess + continue; + } + encounteredURI.add(conceptUri); + + // Test and see if the URI is valid + URI uri = null; + try { + uri = new URI(conceptUri); + } catch (URISyntaxException e) { + logger.error("Error occurred with creating the URI ", e); + continue; + } + // Returns concept information in the format specified, which is + // currently XML + // Utilizing Agrovoc's getConceptInfo returns alternate and + // preferred labels but + // none of the exact match or close match descriptions + String bestMatch = "false"; + //Assume the first result is considered the 'best match' + //Although that is not something we are actually retrieving from the service itself explicitly + if(conceptCounter == 1) { + bestMatch = "true"; + } + Concept c = this.createConcept(bestMatch, conceptUri); + if (c != null) { + // Get definition from dbpedia references stored in the close + // Match list + List closeMatches = c.getCloseMatchURIList(); + for (String closeMatch : closeMatches) { + + if (closeMatch.startsWith("http://dbpedia.org")) { + try { + String description = getDbpediaDescription(closeMatch); + // System.out.println("description: "+ description); + c.setDefinition(description); + } catch (Exception ex) { + logger.error("An error occurred in the process of retrieving dbpedia description", ex); + } + } + } + conceptList.add(c); } - conceptList.add(c); } - + return conceptList; } - public List processResults(String term) throws Exception { - return getConcepts(term); - } - - - public Concept createConcept(String bestMatch, String skosConceptURI) { - Concept concept = new Concept(); - concept.setUri(skosConceptURI); - concept.setConceptId(stripConceptId(skosConceptURI)); - concept.setBestMatch(bestMatch); - concept.setDefinedBy(schemeUri); - concept.setSchemeURI(this.schemeUri); - concept.setType(""); - - String encodedURI = URLEncoder.encode(skosConceptURI); - String encodedFormat = URLEncoder.encode("application/rdf+xml"); - String url = conceptSkosMosURL + "uri=" + encodedURI + "&format="+ encodedFormat; - - //Utilize the XML directly instead of the SKOS API + + + + + public List processResults(String term) throws Exception { + return getConcepts(term); + } + + public Concept createConcept(String bestMatch, String skosConceptURI) { + + Concept concept = new Concept(); + concept.setUri(skosConceptURI); + concept.setConceptId(stripConceptId(skosConceptURI)); + concept.setBestMatch(bestMatch); + concept.setDefinedBy(schemeUri); + concept.setSchemeURI(this.schemeUri); + concept.setType(""); + + String encodedURI = URLEncoder.encode(skosConceptURI); + String encodedFormat = URLEncoder.encode("application/rdf+xml"); + String url = conceptSkosMosURL + "uri=" + encodedURI + "&format=" + + encodedFormat; + + // Utilize the XML directly instead of the SKOS API try { - - concept = SKOSUtils.createConceptUsingXMLFromURI(concept, url, "en"); - - } catch(Exception ex) { - logger.debug("Error occurred for creating concept " + skosConceptURI, ex); + + concept = SKOSUtils + .createConceptUsingXMLFromURL(concept, url, "en", false); + + } catch (Exception ex) { + logger.debug("Error occurred for creating concept " + + skosConceptURI, ex); return null; } - - + return concept; } - - - @Deprecated - protected String getTermcodeByTerm(String term) throws Exception { - String result = new String(); - ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); - try { - URL url = new URL(AgrovocWS_address); - ACSWWebService agrovoc_service = locator.getACSWWebService(url); - result = agrovoc_service.getTermcodeByTerm(term); - } catch (ServiceException e) { - logger.error("service exception", e); - throw e; - } catch (RemoteException e) { - logger.error("remote exception", e); - throw e; - } catch (MalformedURLException e) { - logger.error("malformed URL exception", e); - throw e; - } - return result; - } - - - protected String getTermCodeByTermAndLangXML(String ontologyName, String searchString, String lang, String codeName, String format) { - String result = new String(); - ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); - try { - URL url = new URL(AgrovocWS_address); - ACSWWebService agrovoc_service = locator.getACSWWebService(url); - result = agrovoc_service.getTermCodeByTermAndLangXML(ontologyName, searchString, lang, codeName, format); - } catch (ServiceException e) { - logger.error("service exception", e); - e.printStackTrace(); - } catch (RemoteException e) { - e.printStackTrace(); - } catch (MalformedURLException e) { - e.printStackTrace(); - } - return result; - } + @Deprecated + protected String getTermcodeByTerm(String term) throws Exception { + String result = new String(); + ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); + try { + URL url = new URL(AgrovocWS_address); + ACSWWebService agrovoc_service = locator.getACSWWebService(url); + result = agrovoc_service.getTermcodeByTerm(term); + } catch (ServiceException e) { + logger.error("service exception", e); + throw e; + } catch (RemoteException e) { + logger.error("remote exception", e); + throw e; + } catch (MalformedURLException e) { + logger.error("malformed URL exception", e); + throw e; + } + return result; + } - + protected String getTermCodeByTermAndLangXML(String ontologyName, + String searchString, String lang, String codeName, String format) { + String result = new String(); + ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); + try { + URL url = new URL(AgrovocWS_address); + ACSWWebService agrovoc_service = locator.getACSWWebService(url); + result = agrovoc_service.getTermCodeByTermAndLangXML(ontologyName, + searchString, lang, codeName, format); + } catch (ServiceException e) { + logger.error("service exception", e); + e.printStackTrace(); + } catch (RemoteException e) { + e.printStackTrace(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + return result; + } protected String getURIByTermAndLangXML(String ontologyName, String term, String searchMode, String format, String lang) { @@ -205,102 +251,122 @@ public class AgrovocService implements ExternalConceptService { return result; } - - - + // Creating method for term expansion + protected String getTermExpansion(String ontologyName, String term, + String searchMode, String format, String lang) { + String result = new String(); + ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); + try { + URL url = new URL(AgrovocWS_address); + ACSWWebService agrovoc_service = locator.getACSWWebService(url); + result = agrovoc_service.getTermExpansion(ontologyName, term, + format, searchMode, lang); // the actual call has this order + // for parameters + } catch (ServiceException e) { + e.printStackTrace(); + } catch (RemoteException e) { + e.printStackTrace(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } - protected String getConceptInfoByTermcodeXML(String termcode, String format) { - String result = new String(); - ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); - try { - URL url = new URL(AgrovocWS_address); - ACSWWebService agrovoc_service = locator.getACSWWebService(url); - result = agrovoc_service.getConceptInfoByTermcodeXML(termcode, format); - } catch (ServiceException e) { - logger.error("service exception", e); - e.printStackTrace(); - } catch (RemoteException e) { - e.printStackTrace(); - } catch (MalformedURLException e) { - e.printStackTrace(); - } + return result; + } - return result; - } - - protected String getConceptByKeyword(String ontologyName, String searchString, String format, String searchMode, String lang) { - String result = new String(); - ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); - try { - URL url = new URL(AgrovocWS_address); - ACSWWebService agrovoc_service = locator.getACSWWebService(url); - result = agrovoc_service.getConceptByKeyword(ontologyName, searchString, format, searchMode, lang); - } catch (ServiceException e) { - logger.error("service exception", e); - e.printStackTrace(); - } catch (RemoteException e) { - e.printStackTrace(); - } catch (MalformedURLException e) { - e.printStackTrace(); - } + protected String getConceptInfoByTermcodeXML(String termcode, String format) { + String result = new String(); + ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); + try { + URL url = new URL(AgrovocWS_address); + ACSWWebService agrovoc_service = locator.getACSWWebService(url); + result = agrovoc_service.getConceptInfoByTermcodeXML(termcode, + format); + } catch (ServiceException e) { + logger.error("service exception", e); + e.printStackTrace(); + } catch (RemoteException e) { + e.printStackTrace(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } - return result; - } - - + return result; + } - protected String getWsdl() { - String result = new String(); - try { + protected String getConceptByKeyword(String ontologyName, + String searchString, String format, String searchMode, String lang) { + String result = new String(); + ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); + try { + URL url = new URL(AgrovocWS_address); + ACSWWebService agrovoc_service = locator.getACSWWebService(url); + result = agrovoc_service.getConceptByKeyword(ontologyName, + searchString, format, searchMode, lang); + } catch (ServiceException e) { + logger.error("service exception", e); + e.printStackTrace(); + } catch (RemoteException e) { + e.printStackTrace(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } - StringWriter sw = new StringWriter(); - URL rss = new URL(this.AgrovocWS_address + "?wsdl"); + return result; + } - BufferedReader in = new BufferedReader(new InputStreamReader(rss.openStream())); - String inputLine; - while ((inputLine = in.readLine()) != null) { - sw.write(inputLine); - } - in.close(); + protected String getWsdl() { + String result = new String(); + try { - result = sw.toString(); + StringWriter sw = new StringWriter(); + URL rss = new URL(this.AgrovocWS_address + "?wsdl"); - } catch (Exception ex) { - logger.error("error occurred in servlet", ex); - } - return result; - } + BufferedReader in = new BufferedReader(new InputStreamReader( + rss.openStream())); + String inputLine; + while ((inputLine = in.readLine()) != null) { + sw.write(inputLine); + } + in.close(); + result = sw.toString(); + + } catch (Exception ex) { + logger.error("error occurred in servlet", ex); + } + return result; + } + + public List getConceptsByURIWithSparql(String uri) + throws Exception { + // deprecating this method...just return an empty list + List conceptList = new ArrayList(); + return conceptList; + } + + protected String getAgrovocTermCode(String rdf) throws Exception { + String termcode = new String(); + try { + Document doc = XMLUtils.parse(rdf); + NodeList nodes = doc.getElementsByTagName("hasCodeAgrovoc"); + if (nodes.item(0) != null) { + Node node = nodes.item(0); + termcode = node.getTextContent(); + } + + } catch (SAXException e) { + // e.printStackTrace(); + throw e; + } catch (ParserConfigurationException e) { + // e.printStackTrace(); + throw e; + } catch (IOException e) { + // e.printStackTrace(); + throw e; + } + return termcode; + } - public List getConceptsByURIWithSparql(String uri) throws Exception { - // deprecating this method...just return an empty list - List conceptList = new ArrayList(); - return conceptList; - } - - protected String getAgrovocTermCode(String rdf) throws Exception { - String termcode = new String(); - try { - Document doc = XMLUtils.parse(rdf); - NodeList nodes = doc.getElementsByTagName("hasCodeAgrovoc"); - if (nodes.item(0) != null) { - Node node = nodes.item(0); - termcode = node.getTextContent(); - } - - } catch (SAXException e) { - //e.printStackTrace(); - throw e; - } catch (ParserConfigurationException e) { - //e.printStackTrace(); - throw e; - } catch (IOException e) { - //e.printStackTrace(); - throw e; - } - return termcode; - } - protected String getConceptURIFromRDF(String rdf) { String conceptUri = new String(); try { @@ -311,21 +377,53 @@ public class AgrovocService implements ExternalConceptService { NamedNodeMap attrs = node.getAttributes(); Attr idAttr = (Attr) attrs.getNamedItem("rdf:about"); conceptUri = idAttr.getTextContent(); - } catch (IOException e) { + } catch (IOException e) { e.printStackTrace(); - System.err.println("rdf: "+rdf); - } catch (SAXException e) { + System.err.println("rdf: " + rdf); + } catch (SAXException e) { e.printStackTrace(); - System.err.println("rdf: "+rdf); - } catch (ParserConfigurationException e) { + System.err.println("rdf: " + rdf); + } catch (ParserConfigurationException e) { e.printStackTrace(); - System.err.println("rdf: "+rdf); + System.err.println("rdf: " + rdf); } return conceptUri; } - - protected String getDbpediaDescription(String uri) throws Exception{ + + // When utilizing the getTermExpansion method, will get a list of URIs back + // and not just one URI + protected List getConceptURIsListFromRDF(String rdf) { + List conceptUris = new ArrayList(); + try { + Document doc = XMLUtils.parse(rdf); + NodeList nodes = doc.getElementsByTagName("skos:Concept"); + int numberNodes = nodes.getLength(); + int n; + for (n = 0; n < numberNodes; n++) { + Node node = nodes.item(n); + NamedNodeMap attrs = node.getAttributes(); + Attr idAttr = (Attr) attrs.getNamedItem("rdf:about"); + String conceptUri = idAttr.getTextContent(); + conceptUris.add(conceptUri); + } + + + } catch (IOException e) { + e.printStackTrace(); + System.err.println("rdf: " + rdf); + } catch (SAXException e) { + e.printStackTrace(); + System.err.println("rdf: " + rdf); + } catch (ParserConfigurationException e) { + e.printStackTrace(); + System.err.println("rdf: " + rdf); + } + return conceptUris; + + } + + protected String getDbpediaDescription(String uri) throws Exception { String descriptionSource = " (Source: DBpedia)"; String description = new String(); String qs = "" @@ -336,7 +434,7 @@ public class AgrovocService implements ExternalConceptService { + "SELECT DISTINCT ?description WHERE { \n" + "<" + uri + "> rdfs:comment ?description . \n" + "FILTER (LANG(?description)='en' ) \n" + "}"; - //System.out.println(qs); + // System.out.println(qs); List resultList = new ArrayList(); QueryExecution qexec = null; try { @@ -378,52 +476,119 @@ public class AgrovocService implements ExternalConceptService { } catch (Exception ex) { throw ex; } - //Adding source so it is clear that this description comes from DBPedia + // Adding source so it is clear that this description comes from DBPedia return description + descriptionSource; } - - /** - * @param uri - * @return - */ - protected String stripConceptId(String uri) { - String conceptId = new String(); - int lastslash = uri.lastIndexOf('/'); - conceptId = uri.substring(lastslash + 1, uri.length()); - return conceptId; - } - /** - * @param str - * @return - */ - protected String extractConceptId(String str) { - try { - return str.substring(1, str.length() - 1); - } catch (Exception ex) { - return ""; - } - } - - - //Get concept using agrovoc service - protected String getConceptInfoByURI(String ontologyName, String conceptURI, String format) { - String result = new String(); - ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); - try { - URL url = new URL(AgrovocWS_address); - ACSWWebService agrovoc_service = locator.getACSWWebService(url); - result = agrovoc_service.getConceptByURI(ontologyName, conceptURI, format); - } catch (ServiceException e) { - logger.error("service exception", e); - e.printStackTrace(); - } catch (RemoteException e) { - e.printStackTrace(); - } catch (MalformedURLException e) { - e.printStackTrace(); - } + /** + * @param uri + * @return + */ + protected String stripConceptId(String uri) { + String conceptId = new String(); + int lastslash = uri.lastIndexOf('/'); + conceptId = uri.substring(lastslash + 1, uri.length()); + return conceptId; + } - return result; - } + /** + * @param str + * @return + */ + protected String extractConceptId(String str) { + try { + return str.substring(1, str.length() - 1); + } catch (Exception ex) { + return ""; + } + } + + // Get concept using agrovoc service + protected String getConceptInfoByURI(String ontologyName, + String conceptURI, String format) { + String result = new String(); + ACSWWebServiceServiceLocator locator = new ACSWWebServiceServiceLocator(); + try { + URL url = new URL(AgrovocWS_address); + ACSWWebService agrovoc_service = locator.getACSWWebService(url); + result = agrovoc_service.getConceptByURI(ontologyName, conceptURI, + format); + } catch (ServiceException e) { + logger.error("service exception", e); + e.printStackTrace(); + } catch (RemoteException e) { + e.printStackTrace(); + } catch (MalformedURLException e) { + e.printStackTrace(); + } + + return result; + } + + /** + * The code here utilizes the SKOSMOS REST API for Agrovoc + * This returns JSON LD so we would parse JSON instead of RDF + * The code above can still be utilized if we need to employ the web services directly + */ + //Get search results for a particular term and language code + private String getSKOSMosSearchResults(String term, String lang) { + String urlEncodedTerm = URLEncoder.encode(term); + //Utilize 'starts with' using the * operator at the end + String searchUrlString = this.conceptsSkosMosSearch + "query=" + urlEncodedTerm + "*" + "&lang=" + lang; + URL searchURL = null; + try { + searchURL = new URL(searchUrlString); + } catch (Exception e) { + logger.error("Exception occurred in instantiating URL for " + + searchUrlString, e); + // If the url is having trouble, just return null for the concept + return null; + } + + String results = null; + try { + + StringWriter sw = new StringWriter(); + + BufferedReader in = new BufferedReader(new InputStreamReader( + searchURL.openStream())); + String inputLine; + while ((inputLine = in.readLine()) != null) { + sw.write(inputLine); + } + in.close(); + + results = sw.toString(); + logger.debug(results); + } catch (Exception ex) { + logger.error("Error occurred in getting concept from the URL " + + searchUrlString, ex); + return null; + } + return results; + + } + + //JSON-LD array + private List getConceptURIsListFromSkosMosResult(String results) { + List conceptURIs = new ArrayList(); + JSONObject json = (JSONObject) JSONSerializer.toJSON(results); + //Format should be: { ..."results":["uri":uri...] + if (json.containsKey("results")) { + JSONArray jsonArray = json.getJSONArray("results"); + int numberResults = jsonArray.size(); + int i; + for(i = 0; i < numberResults; i++) { + JSONObject jsonObject = jsonArray.getJSONObject(i); + if(jsonObject.containsKey("uri")) { + conceptURIs.add(jsonObject.getString("uri")); + } + } + } + return conceptURIs; + } + + + } diff --git a/src/edu/cornell/mannlib/semservices/service/impl/LCSHService.java b/src/edu/cornell/mannlib/semservices/service/impl/LCSHService.java index 10b1b285..582977aa 100644 --- a/src/edu/cornell/mannlib/semservices/service/impl/LCSHService.java +++ b/src/edu/cornell/mannlib/semservices/service/impl/LCSHService.java @@ -41,16 +41,8 @@ public class LCSHService implements ExternalConceptService { private final String schemeUri = hostUri + "/authorities/subjects"; private final String baseUri = hostUri + "/search/"; - protected final String dbpedia_endpoint = " http://dbpedia.org/sparql"; - //Property uris used for SKOS - protected final String SKOSNotePropertyURI = "http://www.w3.org/2004/02/skos/core#note"; - protected final String SKOSPrefLabelURI = "http://www.w3.org/2004/02/skos/core#prefLabel"; - protected final String SKOSAltLabelURI = "http://www.w3.org/2008/05/skos-xl#altLabel"; - protected final String SKOSBroaderURI = "http://www.w3.org/2004/02/skos/core#broader"; - protected final String SKOSNarrowerURI = "http://www.w3.org/2004/02/skos/core#narrower"; - protected final String SKOSExactMatchURI = "http://www.w3.org/2004/02/skos/core#exactMatch"; - protected final String SKOSCloseMatchURI = "http://www.w3.org/2004/02/skos/core#closeMatch"; - + + @Override public List getConcepts(String term) throws Exception { List conceptList = new ArrayList(); @@ -95,12 +87,7 @@ public class LCSHService implements ExternalConceptService { // that might exist private List processOutput(String results) throws Exception { List conceptList = new ArrayList(); - //SKOSManager manager = new SKOSManager(); // Get uris from the results - // Properties we will be querying for - //SKOSDataFactory sdf = manager.getSKOSDataFactory(); - - List uris = getConceptURIFromXML(results); String bestMatch = "true"; int i = 0; @@ -109,17 +96,19 @@ public class LCSHService implements ExternalConceptService { bestMatch = "false"; } log.debug("-" + uri + "-"); - String conceptUriString = getSKOSURI(uri); + //This is the URL for retrieving the concept - the pattern is http://id.loc.gov/authorities/subjects/sh85014203.skos.rdf + //This is not the URI itself which would be http://id.loc.gov/authorities/subjects/sh85014203 + String conceptURLString = getSKOSURL(uri); String baseConceptURI = getConceptURI(uri); - URI conceptURI = null; + URL conceptURL = null; try { - conceptURI = new URI(conceptUriString); - } catch (URISyntaxException e) { - log.error("URI syntax exception in trying to get concept uri " + conceptUriString, e); + conceptURL = new URL(conceptURLString); + } catch (Exception e) { + log.error("Error in trying to retrieve concept " + conceptURLString, e); return conceptList; } - log.debug("loading concept uri " + conceptUriString); - Concept c = this.createConcept(bestMatch, conceptUriString); + log.debug("loading concept uri " + conceptURLString); + Concept c = this.createConcept(bestMatch, conceptURLString, baseConceptURI); if(c != null) { conceptList.add(c); } @@ -133,7 +122,7 @@ public class LCSHService implements ExternalConceptService { //Load individual concept using a request //private - public Concept createConcept(String bestMatch, String skosConceptURI) { + public Concept createConcept(String bestMatch, String conceptURLString, String skosConceptURI) { Concept concept = new Concept(); @@ -150,7 +139,8 @@ public class LCSHService implements ExternalConceptService { //Utilize the XML directly instead of the SKOS API try { //LCSH doesn't need a language tag right now as results in english - concept = SKOSUtils.createConceptUsingXMLFromURI(concept, skosConceptURI, null); + //Also want to add skos notes as definition + concept = SKOSUtils.createConceptUsingXMLFromURL(concept, conceptURLString, null, true); } catch(Exception ex) { log.debug("Error occurred for annotation retrieval for skos concept " + skosConceptURI, ex); @@ -163,7 +153,7 @@ public class LCSHService implements ExternalConceptService { - private String getSKOSURI(String uri) { + private String getSKOSURL(String uri) { // Strip .xml at the end and replace with .skos.rdf String skosURI = uri; if (uri.endsWith(".xml")) { diff --git a/src/edu/cornell/mannlib/semservices/util/SKOSUtils.java b/src/edu/cornell/mannlib/semservices/util/SKOSUtils.java index 63998f23..4998212f 100644 --- a/src/edu/cornell/mannlib/semservices/util/SKOSUtils.java +++ b/src/edu/cornell/mannlib/semservices/util/SKOSUtils.java @@ -7,26 +7,24 @@ package edu.cornell.mannlib.semservices.util; import java.io.BufferedReader; -import java.io.IOException; import java.io.InputStreamReader; +import java.io.StringReader; import java.io.StringWriter; import java.net.URL; import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; import java.util.List; -import javax.xml.parsers.ParserConfigurationException; - import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.w3c.dom.Attr; -import org.w3c.dom.Document; -import org.w3c.dom.NamedNodeMap; -import org.w3c.dom.Node; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; + +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.NodeIterator; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; import edu.cornell.mannlib.semservices.bo.Concept; @@ -69,37 +67,61 @@ public class SKOSUtils { } // Downloading the XML from the URI itself - //No language tag support here but can be specified if need be at this level as well - public static Concept createConceptUsingXMLFromURI(Concept concept, - String conceptUriString, String langTagValue) { - String results = getConceptXML(conceptUriString); + // No language tag support here but can be specified if need be at this + // level as well + public static Concept createConceptUsingXMLFromURL(Concept concept, + String conceptURLString, String langTagValue, boolean addNotes) { + String results = getConceptXML(conceptURLString); if (StringUtils.isEmpty(results)) { return null; } - return createConceptUsingXML(concept, results, langTagValue); + + // return createConceptUsingXML(concept, results, langTagValue); + return createConceptUsingXMLModel(concept, results, langTagValue, + addNotes); + } + // Because of the fact the xml returns matches by tag name, and the XML may + // look like + // where conceptURI is the concept that is the subject of skos:narrower, we + // need to ensure we are not returning the same uri as that of the main + // concept + public static List removeConceptURIFromList(List uris, + String conceptURI) { + // remove will return a boolean if the value exists in the list and is + // removed + // if/when it returns false, the URI is not in the list + while (uris.remove(conceptURI)) { + } + ; + return uris; + } + /** + * The above code, although functional, does not take advantage of the fact + * that we can actually read and query the RDF in precisely the manner we + * wish. + */ - // Create concept given the actual XML (results_ - // Lang tag value, if populated, will return pref label and alt label which - // match that language tag value - public static Concept createConceptUsingXML(Concept concept, - String results, String langTagValue) { + public static Concept createConceptUsingXMLModel(Concept concept, + String results, String langTagValue, boolean addNotes) { - HashMap relationshipHash = getRelationshipHash(); try { - Document doc = XMLUtils.parse(results); - // Preferred label - List labelLiterals = new ArrayList(); - if (StringUtils.isNotEmpty(langTagValue)) { - labelLiterals = getValuesFromXMLNodes(doc, - getPrefLabelTag(relationshipHash), "xml:lang", langTagValue); - } else { - labelLiterals = getValuesFromXMLNodes(doc, - getPrefLabelTag(relationshipHash), null); - } + String conceptURI = concept.getUri(); + // Load Model from RDF + StringReader reader = new StringReader(results); + Model model = ModelFactory.createDefaultModel(); + model.read(reader, null, "RDF/XML"); + + // Execute the following query to get the information we want for + // this resource + + // Preferred label + List labelLiterals = getPrefLabelsFromModel(conceptURI, + model, langTagValue); if (labelLiterals.size() > 0) { concept.setLabel(labelLiterals.get(0)); } else { @@ -110,281 +132,135 @@ public class SKOSUtils { // Alternate label - List altLabelList = new ArrayList(); - //if language tag is specified, get node values matching that language tag - if (StringUtils.isNotEmpty(langTagValue)) { - altLabelList = getValuesFromXMLNodes(doc, - //TODO: Check if xml:lang or a different version should be used - getAltLabelTag(relationshipHash), "xml:lang", langTagValue); - } else { - altLabelList = getValuesFromXMLNodes(doc, - getAltLabelTag(relationshipHash), null); - } + List altLabelList = getAltLabelsFromModel(conceptURI, + model, langTagValue); concept.setAltLabelList(altLabelList); - - //Broder, narrower, exact match, and close match properties - String conceptURI = concept.getUri(); - List broaderURIList = getBroaderOrNarrowerURIs(doc, getBroaderTag(relationshipHash)); - broaderURIList = removeConceptURIFromList(broaderURIList, conceptURI); + // Broder, narrower, exact match, and close match properties + + List broaderURIList = getBroaderURIsFromModel(conceptURI, + model); + // broaderURIList = removeConceptURIFromList(broaderURIList, + // conceptURI); concept.setBroaderURIList(broaderURIList); - List narrowerURIList = getBroaderOrNarrowerURIs(doc, getNarrowerTag(relationshipHash)); - narrowerURIList = removeConceptURIFromList(narrowerURIList, conceptURI); + List narrowerURIList = getNarrowerURIsFromModel(conceptURI, + model); + // narrowerURIList = removeConceptURIFromList(narrowerURIList, + // conceptURI); concept.setNarrowerURIList(narrowerURIList); - List exactMatchURIList = getCloseOrExactMatchURIs(doc, getExactMatchTag(relationshipHash)); - exactMatchURIList = removeConceptURIFromList(exactMatchURIList, conceptURI); + List exactMatchURIList = getExactMatchURIsFromModel( + conceptURI, model); + // exactMatchURIList = removeConceptURIFromList(exactMatchURIList, + // conceptURI); concept.setExactMatchURIList(exactMatchURIList); - List closeMatchURIList = getCloseOrExactMatchURIs(doc, getCloseMatchTag(relationshipHash)); - closeMatchURIList = removeConceptURIFromList(closeMatchURIList, conceptURI); + List closeMatchURIList = getCloseMatchURIsFromModel( + conceptURI, model); + // closeMatchURIList = removeConceptURIFromList(closeMatchURIList, + // conceptURI); concept.setCloseMatchURIList(closeMatchURIList); - } catch (IOException e) { - log.error("error occurred in parsing " + results, e); - } catch (SAXException e) { - log.error("error occurred in parsing " + results, e); - } catch (ParserConfigurationException e) { + // Notes may exist, in which case they should be employed + if (addNotes) { + List notes = getNotesFromModel(conceptURI, model, + langTagValue); + if (notes.size() > 0) { + concept.setDefinition(notes.get(0)); + } + } + + } catch (Exception e) { log.error("error occurred in parsing " + results, e); } return concept; } - - //Because of the fact the xml returns matches by tag name, and the XML may look like - //where conceptURI is the concept that is the subject of skos:narrower, we need to ensure we are not returning the same uri as that of the main concept - public static List removeConceptURIFromList(List uris, String conceptURI) { - //remove will return a boolean if the value exists in the list and is removed - //if/when it returns false, the URI is not in the list - while(uris.remove(conceptURI)) {}; - return uris; + + private static List getPrefLabelsFromModel(String conceptURI, + Model model, String langTagValue) { + String propertyURI = "http://www.w3.org/2004/02/skos/core#prefLabel"; + return getLabelsFromModel(conceptURI, propertyURI, model, langTagValue); } - // Default to English for search results but this should be made - // configurable - public static List getValuesFromXMLNodes(Document doc, - String tagName, String attributeName) { - return getValuesFromXMLNodes(doc, tagName, attributeName, null); + private static List getAltLabelsFromModel(String conceptURI, + Model model, String langTagValue) { + String propertyURI = "http://www.w3.org/2004/02/skos/core#altLabel"; + return getLabelsFromModel(conceptURI, propertyURI, model, langTagValue); } - public static List getValuesFromXMLNodes(Document doc, - String tagName, String attributeName, String matchAttributeValue) { - NodeList nodes = doc.getElementsByTagName(tagName); - - return getValuesFromXML(nodes, attributeName, matchAttributeValue); - } - - // Returns list of values based on nodes and whether or not a specific - // attribute name should be used or just the text content - // Attribute name returns the value for the attribute on the node - // MatchAttributeValue: returns NODE values that MATCH this value for - // attributeName - //Extending this based on specific SKOSMos search for Agrovoc, sometimes - //results in format , other times in format .. - //closeMatch and exactMatch use these patterns - //broader and narrower may be either rdf:resource pattern above or ...etc. - public static List getValuesFromXML(NodeList nodes, - String attributeName, String matchAttributeValue) { - int len = nodes.getLength(); - int i; - - List values = new ArrayList(); - for (i = 0; i < len; i++) { - Node node = nodes.item(i); - String nodeValue = getNodeValue(node, attributeName, matchAttributeValue); - if(StringUtils.isNotEmpty(nodeValue)) { - values.add(nodeValue); - } - } - return values; - } - - public static String getNodeValue(Node node, String attributeName, String matchAttributeValue) { - String value = null; - if (StringUtils.isEmpty(attributeName)) { - value = node.getTextContent(); - } else { - // Attribute name is specified - // Get the value for the attribute itself - String attributeValue = getAttributeValue(attributeName, node); - // If no matching value for attribute specified, return the - // value of the attribute itself - // e.g. value of "lang" attribute which is "en" - if (StringUtils.isEmpty(matchAttributeValue)) { - value = attributeValue; - } else { - // match attribute and match value are both specified, so - // return NODE value that matches attribute value for given - // attribute name - // e.g. preferred label node value where lang = "en" - if (attributeValue.equals(matchAttributeValue)) { - value = node.getTextContent(); + private static List getLabelsFromModel(String conceptURI, + String propertyURI, Model model, String langTagValue) { + List labels = new ArrayList(); + StmtIterator statements = model.listStatements( + ResourceFactory.createResource(conceptURI), + ResourceFactory.createProperty(propertyURI), (RDFNode) null); + while (statements.hasNext()) { + Statement statement = statements.nextStatement(); + RDFNode node = statement.getObject(); + if (node != null && node.isLiteral()) { + String label = node.asLiteral().getString(); + if (StringUtils.isNotEmpty(langTagValue)) { + String language = node.asLiteral().getLanguage(); + if (language != null && language.equals(langTagValue)) { + labels.add(label); + } + } else { + labels.add(label); } } + } - return value; - } - - - public static String getAttributeValue(String attributeName, Node node) { - NamedNodeMap attrs = node.getAttributes(); - Attr a = (Attr) attrs.getNamedItem(attributeName); - if (a != null) { - return a.getTextContent(); - } - return null; + return labels; } - // The Hash will depend on the particular RDF results - // TODO: Refactor this in a better method - public static HashMap getRelationshipHash() { - HashMap relationshipHash = new HashMap(); - String[] tagsArray = { "prefLabel", "altLabel", "broader", "narrower", - "exactMatch", "closeMatch" }; - List tags = Arrays.asList(tagsArray); + private static List getNotesFromModel(String conceptURI, + Model model, String langTagValue) { + String propertyURI = "http://www.w3.org/2004/02/skos/core#note"; + return getLabelsFromModel(conceptURI, propertyURI, model, langTagValue); + } - - - for (String tag : tags) { - relationshipHash.put(tag, "skos:" + tag); - } - - return relationshipHash; + private static List getCloseMatchURIsFromModel(String conceptURI, + Model model) { + String propertyURI = "http://www.w3.org/2004/02/skos/core#closeMatch"; + return getRelatedURIsFromModel(conceptURI, propertyURI, model); } - public static String getPrefLabelTag( - HashMap relationshipHash) { - return relationshipHash.get("prefLabel"); + private static List getExactMatchURIsFromModel(String conceptURI, + Model model) { + String propertyURI = "http://www.w3.org/2004/02/skos/core#exactMatch"; + return getRelatedURIsFromModel(conceptURI, propertyURI, model); } - public static String getAltLabelTag(HashMap relationshipHash) { - return relationshipHash.get("altLabel"); + private static List getNarrowerURIsFromModel(String conceptURI, + Model model) { + String propertyURI = "http://www.w3.org/2004/02/skos/core#narrower"; + return getRelatedURIsFromModel(conceptURI, propertyURI, model); } - public static String getCloseMatchTag( - HashMap relationshipHash) { - return relationshipHash.get("closeMatch"); + private static List getBroaderURIsFromModel(String conceptURI, + Model model) { + String propertyURI = "http://www.w3.org/2004/02/skos/core#broader"; + return getRelatedURIsFromModel(conceptURI, propertyURI, model); } - public static String getExactMatchTag( - HashMap relationshipHash) { - return relationshipHash.get("exactMatch"); - } + private static List getRelatedURIsFromModel(String conceptURI, + String propertyURI, Model model) { + List URIs = new ArrayList(); + NodeIterator nodeIterator = model.listObjectsOfProperty( + ResourceFactory.createResource(conceptURI), + ResourceFactory.createProperty(propertyURI)); - public static String getBroaderTag(HashMap relationshipHash) { - return relationshipHash.get("broader"); - } - - public static String getNarrowerTag(HashMap relationshipHash) { - return relationshipHash.get("narrower"); - } - - - /** - * - * Broader, narrower, close match, and exact match may be nested values - e.g. Agrovoc - * Even with Agrovoc, they may be nested sometimes and not be nested other times - * The code below handles both situations so the URIs can be returned - */ - - //Broader and narrower values - //Attribute name will be language tag - - - public static List getBroaderOrNarrowerURIs(Document doc, - String tagName) { - NodeList nodes = doc.getElementsByTagName(tagName); - List uris = getPossiblyNestedValuesFromXML(nodes, "rdf:resource", "skos:Concept", "rdf:about"); - return uris; - } - - //Close and exact match - public static List getCloseOrExactMatchURIs(Document doc, - String tagName) { - NodeList nodes = doc.getElementsByTagName(tagName); - List uris = getPossiblyNestedValuesFromXML(nodes, "rdf:resource", "rdf:Description", "rdf:about"); - return uris; - } - - - - public static List getPossiblyNestedValuesFromXML(NodeList nodes, String nodeAttributeName, String childNodeTagName, String childNodeAttributeName ) { - int len = nodes.getLength(); - int i; - - List values = new ArrayList(); - for (i = 0; i < len; i++) { - Node node = nodes.item(i); - //String nodeValue = getNodeValue(node, attributeName, matchAttributeValue); - String nodeValue = getPossiblyNestedNodeValue(node, nodeAttributeName, childNodeTagName, childNodeAttributeName); - if(StringUtils.isNotEmpty(nodeValue)) { - values.add(nodeValue); + while (nodeIterator.hasNext()) { + RDFNode node = nodeIterator.nextNode(); + if (node.isResource() && node.asResource().getURI() != null) { + String URI = node.asResource().getURI(); + URIs.add(URI); } } - return values; - } - //Given node = - //If tag has no attribute that matches attributeName with attributevalue - //and tag has nested children with a given tag name, i.e. - //then retrieve the nested attribute value - //For example: - //if the node looks like then get x - //but if the node looks like then get x - public static String getPossiblyNestedNodeValue(Node node, String nodeAttributeName, String childNodeTagName, - String childNodeAttributeName) { - String value = null; - String attributeValue = getAttributeValue(nodeAttributeName, node); - if(StringUtils.isNotEmpty(attributeValue)) { - value = attributeValue; - } else { - //Check child nodes and see if any of those have the same name as childNodeTagName - NodeList childNodes = node.getChildNodes(); - int numberNodes = childNodes.getLength(); - int i; - for(i = 0; i < numberNodes; i++) { - Node childNode = childNodes.item(i); - String nodeName = childNode.getNodeName(); - if(nodeName.equals(childNodeTagName)) { - value = getAttributeValue(childNodeAttributeName, childNode); - break; //will only get the first one - } - - } - - - } - - - return value; - } - - //Custom cases for Agrovoc and/or similar patterns if they exist - //get about URI from - returns "x" - public static String getTagNestedAbout(Node n) { - NodeList childNodes = n.getChildNodes(); - int numberNodes = childNodes.getLength(); - int i; - for(i = 0; i < numberNodes; i++) { - Node childNode = childNodes.item(i); - String nodeName = childNode.getNodeName(); - String aboutValue = getAttributeValue("about", childNode); - } - return null; - } - - //get about URI from , returns "x" - public static String getTagNestedSKOSConceptAbout(Node n) { - NodeList childNodes = n.getChildNodes(); - int numberNodes = childNodes.getLength(); - int i; - for(i = 0; i < numberNodes; i++) { - Node childNode = childNodes.item(i); - String nodeName = childNode.getNodeName(); - String aboutValue = getAttributeValue("about", childNode); - } - return null; - } - + return URIs; + } + } \ No newline at end of file