updates for concept addition and integration of umls/agrovoc search services

This commit is contained in:
hjkhjk54 2011-11-30 18:41:57 +00:00
parent 943315a9c6
commit ce84967d01
28 changed files with 3472 additions and 71 deletions

View file

@ -0,0 +1,11 @@
package edu.cornell.mannlib.semservices.service;
import java.util.List;
import edu.cornell.mannlib.semservices.bo.Concept;
public interface ExternalConceptService {
// this is the only method that needs to be exposed
List<Concept> processResults(String term);
}

View file

@ -0,0 +1,494 @@
package edu.cornell.mannlib.semservices.service.impl;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.net.MalformedURLException;
import java.net.URL;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.rpc.ServiceException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.fao.gilw.aims.webservices.AgrovocWS;
import org.fao.gilw.aims.webservices.AgrovocWSServiceLocator;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import edu.cornell.mannlib.semservices.bo.Concept;
import edu.cornell.mannlib.semservices.service.ExternalConceptService;
import edu.cornell.mannlib.semservices.util.XMLUtils;
public class AgrovocService implements ExternalConceptService {
protected final Log logger = LogFactory.getLog(getClass());
private java.lang.String AgrovocWS_address = "http://www.fao.org/webservices/AgrovocWS";
public List<Concept> processResults(String term) {
List<Concept> conceptList = new ArrayList<Concept>();
String termcode;
try {
termcode = getTermcodeByTerm(term);
} catch (Exception e1) {
logger.error("Could not get termcode from service", e1);
return null;
}
String format = "SKOS";
// if the termcode is null it means that either the service is not responding
// or there was not a match for the string
//System.out.println("Got termcode: "+termcode);
String results = getConceptInfoByTermcodeXML(termcode, format);
//XMLUtils.prettyPrint(results);
try {
Document doc = XMLUtils.parse(results);
String prefLabelQuery = "child::*[@xml:lang='EN']";
NodeList nodes = doc.getElementsByTagName("skos:Concept");
//System.out.println("Found this many nodes: "+ nodes.getLength());
for (int i=0; i < nodes.getLength(); i++) {
Node node = nodes.item(i);
//XMLUtils.serializeNode(node); System.out.println();
Concept concept = new Concept();
concept.setDefinedBy("Agrovoc");
concept.setConceptId(termcode);
NamedNodeMap attrs = node.getAttributes();
Attr idAttr = (Attr) attrs.getNamedItem("rdf:about");
String conceptUri = idAttr.getTextContent();
concept.setUri(conceptUri);
Node prefLabelNode = XMLUtils.getNodeWithXpath(node, prefLabelQuery);
if (prefLabelNode != null) {
String prefLabel = prefLabelNode.getTextContent();
concept.setLabel(prefLabel);
}
conceptList.add(concept);
}
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
return conceptList;
//JSONObject jsonObject = null;
//jsonObject = BeanToJsonSerializer.serializeToJsonObject(conceptList);
//return jsonObject.toString();
}
protected String getAgrovocLanguages() {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getAgrovocLanguages();
} catch (ServiceException e) {
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getTermcodeByTerm(String term) throws Exception {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getTermcodeByTerm(term);
} catch (ServiceException e) {
logger.error("service exception", e);
throw e;
} catch (RemoteException e) {
logger.error("remote exception", e);
throw e;
} catch (MalformedURLException e) {
logger.error("malformed URL exception", e);
throw e;
}
return result;
}
protected String getTermcodeByTermXML(String term, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getTermcodeByTermXML(term, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getTermByLanguage(int termcode, String language) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getTermByLanguage(termcode, language);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getTermByLanguageXML(int termcode, String language, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getTermByLanguageXML(termcode, language, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getTermsListByLanguage2(String termcodes, String language, String sep) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getTermsListByLanguage2(termcodes, language, sep);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getTermsListByLanguageXML(String termcodes, String language, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getTermsListByLanguageXML(termcodes, language, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getAllLabelsByTermcode2(int termcode, String sep) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getAllLabelsByTermcode2(termcode, sep);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getAllLabelsByTermcodeXML(int termcode, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getAllLabelsByTermcodeXML(termcode, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String simpleSearchByMode2(String term, String mode, String sep ) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.simpleSearchByMode2(term, mode, sep);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String simpleSearchByModeXML(String term, String mode, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.simpleSearchByModeXML(term, mode, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String searchByTerm2(String term, String sep) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.searchByTerm2(term, sep);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String searchByTermXML(String term, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.searchByTermXML(term, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String searchCategoryByMode(String term, String lang, String schemeid, String mode, String sep) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.searchCategoryByMode(term, lang, schemeid, mode, sep);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String searchCategoryByModeXML(String term, String mode, String schemeid, String lang, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.searchCategoryByModeXML(term, mode, schemeid, lang, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String[] getConceptInfoByTermcode(String termcode) {
String result[] = null;
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getConceptInfoByTermcode(termcode);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getConceptInfoByTermcodeXML(String termcode, String format) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getConceptInfoByTermcodeXML(termcode, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String[] getDefinitions(int termcode, String lang) {
String[] result = null;
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getDefinitions(termcode, lang);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getDefinitionsXML(int termcode, String lang, String format) {
String result = null;
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getDefinitionsXML(termcode, lang, format);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getTermExpansion(String aQuery, String langugage) {
String result = new String();
AgrovocWSServiceLocator locator = new AgrovocWSServiceLocator();
try {
URL url = new URL(AgrovocWS_address);
AgrovocWS agrovoc_service = locator.getAgrovocWS(url);
result = agrovoc_service.getTermExpansion(aQuery, langugage);
} catch (ServiceException e) {
logger.error("service exception", e);
e.printStackTrace();
} catch (RemoteException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
}
return result;
}
protected String getWsdl() {
String result = new String();
try {
StringWriter sw = new StringWriter();
URL rss = new URL(AgrovocWS_address + "?wsdl");
BufferedReader in = new BufferedReader(new InputStreamReader(rss.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
sw.write(inputLine);
}
in.close();
result = sw.toString();
} catch (Exception ex) {
logger.error("error occurred in servlet", ex);
}
return result;
}
}

View file

@ -0,0 +1,152 @@
package edu.cornell.mannlib.semservices.service.impl;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.net.URL;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
import net.sf.json.JSONSerializer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.semservices.bo.Concept;
import edu.cornell.mannlib.semservices.service.ExternalConceptService;
/**
* @author jaf30
*
*/
public class UMLSService implements ExternalConceptService {
protected final Log logger = LogFactory.getLog(getClass());
private static final String submissionUrl = "http://link.informatics.stonybrook.edu/MeaningLookup/MlServiceServlet?";
private static final String baseUri = "http://link.informatics.stonybrook.edu/umls/CUI/";
public List<Concept> processResults(String term) {
String results = null;
String dataUrl = submissionUrl + "textToProcess="
+ URLEncoder.encode(term) + "&format=json";
try {
StringWriter sw = new StringWriter();
URL rss = new URL(dataUrl);
BufferedReader in = new BufferedReader(new InputStreamReader(rss.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null) {
sw.write(inputLine);
}
in.close();
results = sw.toString();
} catch (Exception ex) {
logger.error("error occurred in servlet", ex);
return null;
}
//System.out.println("results before processing: "+results);
List<Concept> conceptList = processOutput(results);
return conceptList;
}
/**
* @param results
* @return
*/
private List<Concept> processOutput(String results) {
List<Concept> conceptList = new ArrayList<Concept>();
List<String> bestMatchIdList = new ArrayList<String>();
String bestMatchId = new String();
try {
JSONObject json = (JSONObject) JSONSerializer.toJSON( results );
if (json.has("Best Match")) {
//System.out.println("Best Match");
JSONArray bestMatchArray = json.getJSONArray("Best Match");
int len = bestMatchArray.size();
if (len > 1) {
System.out.println("Found this many best matches: "+ len);
}
int i;
for (i = 0; i < len; i++) {
JSONObject o = bestMatchArray.getJSONObject(i);
//System.out.println(o.toString());
Concept concept = new Concept();
concept.setDefinedBy("UMLS");
concept.setBestMatch("true");
String cui = getJsonValue(o, "CUI");
bestMatchIdList.add(cui);
concept.setConceptId(cui);
concept.setLabel(getJsonValue(o, "label"));
concept.setType(getJsonValue(o, "type"));
concept.setDefinition(getJsonValue(o, "definition"));
concept.setUri(baseUri + cui);
conceptList.add(concept);
}
}
if (json.has("All")) {
JSONArray allArray = json.getJSONArray("All");
int len = allArray.size();
//System.out.println("size of best match array: "+ len);
int i;
for (i = 0; i < len; i++) {
JSONObject o = allArray.getJSONObject(i);
//System.out.println(o.toString());
Concept concept = new Concept();
concept.setDefinedBy("UMLS");
String cui = getJsonValue(o, "CUI");
concept.setConceptId(cui);
concept.setLabel(getJsonValue(o, "label"));
concept.setType(getJsonValue(o, "type"));
concept.setDefinition(getJsonValue(o, "definition"));
concept.setUri(baseUri + cui);
// prevent duplicate concepts in list
if (! bestMatchIdList.contains(cui)) {
concept.setBestMatch("false");
conceptList.add(concept);
}
}
}
} catch (Exception ex ) {
ex.printStackTrace();
logger.error("Could not get concepts", ex);
}
return conceptList;
//
// now serialize the list of Concepts to a JSON String
//
//JSONObject jsonObject = null;
//jsonObject = BeanToJsonSerializer.serializeToJsonObject(conceptList);
//System.out.println(jsonObject.toString());
//return jsonObject.toString();
}
/**
* Get a string from a json object or an empty string if there is no value for the given key
* @param obj
* @param key
* @return
*/
protected String getJsonValue(JSONObject obj, String key) {
if (obj.has(key)) {
return obj.getString(key);
} else {
return new String("");
}
}
}