From 01351d2bd95d9eec61f0287dc396fb145b260007 Mon Sep 17 00:00:00 2001 From: j2blake Date: Wed, 16 Oct 2013 12:47:46 -0400 Subject: [PATCH] VIVO-350 Get email and preferred title from VCards --- .../search/solr/VIVOPreferredTitleField.java | 87 ----------- .../search/solr/VIVOValuesFromVcards.java | 140 ++++++++++++++++++ .../search/solr/VivoDocumentModifiers.java | 2 +- 3 files changed, 141 insertions(+), 88 deletions(-) delete mode 100644 src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOPreferredTitleField.java create mode 100644 src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOValuesFromVcards.java diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOPreferredTitleField.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOPreferredTitleField.java deleted file mode 100644 index 04cd894a..00000000 --- a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOPreferredTitleField.java +++ /dev/null @@ -1,87 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.solr; - -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXT; -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXTUNSTEMMED; -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.PREFERRED_TITLE; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.solr.common.SolrInputDocument; - -import com.hp.hpl.jena.query.ResultSet; -import com.hp.hpl.jena.rdf.model.RDFNode; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; -import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; -import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.DocumentModifier; - -/** - * If there are any VCards on this Individual with Title objects, store the text - * in the Preferred Title search field. - */ -public class VIVOPreferredTitleField implements DocumentModifier { - private static final Log log = LogFactory - .getLog(VIVOPreferredTitleField.class); - - private static final String QUERY_TEMPLATE = "" - + "prefix vcard: \n" - + "prefix obo: \n\n" - + "SELECT ?title WHERE { \n" // - + " ?uri obo:ARG_2000028 ?card . \n" - + " ?card a vcard:Individual . \n" - + " ?card vcard:hasTitle ?titleHolder . \n" - + " ?titleHolder vcard:title ?title . \n" // - + "}"; - - private RDFServiceFactory rdfServiceFactory; - private boolean shutdown = false; - - public VIVOPreferredTitleField(RDFServiceFactory rdfServiceFactory) { - this.rdfServiceFactory = rdfServiceFactory; - } - - @Override - public void modifyDocument(Individual individual, SolrInputDocument doc, - StringBuffer addUri) { - if (individual == null) - return; - - String uri = "<" + individual.getURI() + "> "; - String query = QUERY_TEMPLATE.replaceAll("\\?uri", uri); - log.debug("Get preferred title(s) for " + uri); - - try { - RDFService rdfService = rdfServiceFactory.getRDFService(); - ResultSet results = RDFServiceUtils.sparqlSelectQuery(query, - rdfService); - if (results != null) { - while (results.hasNext()) { - log.debug("Next solution"); - RDFNode node = results.nextSolution().get("title"); - if ((node != null) && (node.isLiteral())) { - String title = node.asLiteral().getString(); - doc.addField(PREFERRED_TITLE, title); - doc.addField(ALLTEXT, title); - doc.addField(ALLTEXTUNSTEMMED, title); - log.debug("Preferred Title for " + uri + ": '" + title - + "', '" + node.toString() + "'"); - } - } - } - } catch (Exception e) { - if (!shutdown) { - log.error("problem while running query '" + query + "'", e); - } - } - } - - @Override - public void shutdown() { - shutdown = true; - } - -} diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOValuesFromVcards.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOValuesFromVcards.java new file mode 100644 index 00000000..b4141ca6 --- /dev/null +++ b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VIVOValuesFromVcards.java @@ -0,0 +1,140 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.search.solr; + +import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXT; +import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.ALLTEXTUNSTEMMED; +import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.PREFERRED_TITLE; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.common.SolrInputDocument; + +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.RDFNode; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; +import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.DocumentModifier; + +/** + * If there are any VCards on this Individual with Title objects, store the text + * in the Preferred Title search field, and the ALL_TEXT field. + * + * If there are any VCards on this Individual with EMail objects, store the text + * in the ALL_TEXT field. + */ +public class VIVOValuesFromVcards implements DocumentModifier { + private static final Log log = LogFactory + .getLog(VIVOValuesFromVcards.class); + + private static final String PREFERRED_TITLE_QUERY = "" + + "prefix vcard: \n" + + "prefix obo: \n\n" + + "SELECT ?title WHERE { \n" // + + " ?uri obo:ARG_2000028 ?card . \n" + + " ?card a vcard:Individual . \n" + + " ?card vcard:hasTitle ?titleHolder . \n" + + " ?titleHolder vcard:title ?title . \n" // + + "}"; + + private static final ResultParser PREFERRED_TITLE_PARSER = new ResultParser() { + @Override + public void parse(String uri, QuerySolution solution, SolrInputDocument doc) { + String title = getLiteralValue(solution, "title"); + if (StringUtils.isNotBlank(title)) { + doc.addField(PREFERRED_TITLE, title); + doc.addField(ALLTEXT, title); + doc.addField(ALLTEXTUNSTEMMED, title); + log.debug("Preferred Title for " + uri + ": '" + title + "'"); + } + } + }; + + private static final String EMAIL_QUERY = "" + + "prefix vcard: \n" + + "prefix obo: \n\n" + + "SELECT ?email WHERE { \n" // + + " ?uri obo:ARG_2000028 ?card . \n" + + " ?card a vcard:Individual . \n" + + " ?card vcard:hasEmail ?emailHolder . \n" + + " ?emailHolder vcard:email ?email . \n" // + + "}"; + + private static final ResultParser EMAIL_PARSER = new ResultParser() { + @Override + public void parse(String uri, QuerySolution solution, + SolrInputDocument doc) { + String email = getLiteralValue(solution, "email"); + if (StringUtils.isNotBlank(email)) { + doc.addField(ALLTEXT, email); + doc.addField(ALLTEXTUNSTEMMED, email); + log.debug("Email for " + uri + ": '" + email + "'"); + } + }}; + + private RDFServiceFactory rdfServiceFactory; + private boolean shutdown = false; + + public VIVOValuesFromVcards(RDFServiceFactory rdfServiceFactory) { + this.rdfServiceFactory = rdfServiceFactory; + } + + @Override + public void modifyDocument(Individual individual, SolrInputDocument doc, + StringBuffer addUri) { + if (individual == null) + return; + + processQuery(individual, PREFERRED_TITLE_QUERY, PREFERRED_TITLE_PARSER, + doc); + processQuery(individual, EMAIL_QUERY, EMAIL_PARSER, doc); + } + + private void processQuery(Individual individual, String queryTemplate, + ResultParser resultParser, SolrInputDocument doc) { + String uri = "<" + individual.getURI() + "> "; + String query = queryTemplate.replaceAll("\\?uri", uri); + + try { + RDFService rdfService = rdfServiceFactory.getRDFService(); + ResultSet results = RDFServiceUtils.sparqlSelectQuery(query, + rdfService); + if (results != null) { + while (results.hasNext()) { + log.debug("Next solution"); + QuerySolution solution = results.nextSolution(); + resultParser.parse(uri, solution, doc); + } + } + } catch (Exception e) { + if (!shutdown) { + log.error("problem while running query '" + query + "'", e); + } + } + } + + @Override + public void shutdown() { + shutdown = true; + } + + private abstract static class ResultParser { + public abstract void parse(String uri, QuerySolution solution, SolrInputDocument doc); + + String getLiteralValue(QuerySolution solution, String name) { + RDFNode node = solution.get(name); + if ((node != null) && (node.isLiteral())) { + String value = node.asLiteral().getString(); + if (StringUtils.isNotBlank(value)) { + return value; + } + } + return ""; + } + } +} diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java index f7ada816..927ae69d 100644 --- a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java +++ b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java @@ -47,7 +47,7 @@ public class VivoDocumentModifiers implements javax.servlet.ServletContextListen } modifiers.add(new CalculateParameters(dataset)); - modifiers.add( new VIVOPreferredTitleField( rdfServiceFactory )); + modifiers.add( new VIVOValuesFromVcards( rdfServiceFactory )); modifiers.add( new VivoISFBasicFields( rdfServiceFactory )); modifiers.add( new VivoISFAdvisingFields( rdfServiceFactory )); modifiers.add( new VivoISFEducationFields( rdfServiceFactory ));