Removing rdfs:label from search index document field ALLTEXT. Moving vivo specific DocumentModifiers out of vitro. NIHVIVO-2975

This commit is contained in:
briancaruso 2011-11-01 16:17:26 +00:00
parent 3717da7fea
commit bc9c3d215c
2 changed files with 114 additions and 331 deletions

View file

@ -1,14 +1,10 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -25,346 +21,127 @@ import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
/**
* DocumentModifier that will run SPARQL queries for an
* Individual and add all the columns from all the rows
* in the solution set to the ALLTEXT field.
*
* @author bdc34
*
*/
public class ContextNodeFields implements DocumentModifier{
protected Model model;
protected List<String> queries = new ArrayList<String>();
protected boolean shutdown = false;
protected static ExecutorService threadPool = null;
protected static final int THREAD_POOL_SIZE = 10;
protected static final List<String> singleValuedQueriesForAgent = new ArrayList<String>();
protected static final List<String> singleValuedQueriesForInformationResource = new ArrayList<String>();
protected static final List<String> multiValuedQueriesForAgent = new ArrayList<String>();
protected static final String multiValuedQueryForInformationResource;
protected Log log = LogFactory.getLog(ContextNodeFields.class);
protected boolean shutdown = false;
protected Log log = LogFactory.getLog(ContextNodeFields.class);
public ContextNodeFields(Model model){
this.model = model;
}
/**
* Construct this with a model to query when building Solr Documents and
* a list of the SPARQL queries to run.
*/
protected ContextNodeFields(Model model, List<String> queries){
this.model = model;
this.queries = queries;
}
/**
* Implement this method to get values that will be added to ALLTEXT
* field of solr Document for each individual.
*
* @param individual
* @return StringBuffer with text values to add to ALLTEXT field of solr Document.
*/
protected StringBuffer getValues( Individual individual ){
return executeQueryForValues( individual, queries );
}
@Override
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
log.debug("retrieving context node values..");
public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) {
if( individual == null )
return;
StringBuffer objectProperties = singleThreadExecute( individual, multiValuedQueriesForAgent);
SolrInputField field = doc.getField(VitroSearchTermNames.ALLTEXT);
if( field == null ){
doc.addField(VitroSearchTermNames.ALLTEXT,
objectProperties + " " +
runQuery(individual, multiValuedQueryForInformationResource));
}else{
field.addValue(objectProperties + " " +
runQuery(individual, multiValuedQueryForInformationResource),
field.getBoost());
}
log.debug("context node values are retrieved");
log.debug( "doing context nodes for: " + individual.getURI());
/* get text from the context nodes and add the to ALLTEXT */
StringBuffer values = getValues( individual );
SolrInputField field = doc.getField(VitroSearchTermNames.ALLTEXT);
if( field == null ){
doc.addField(VitroSearchTermNames.ALLTEXT, values);
}else{
field.addValue(values, field.getBoost());
}
}
protected StringBuffer executeQueryForValues( Individual individual, Collection<String> queries){
/* execute all the queries on the list and concat the values to add to all text */
StringBuffer allValues = new StringBuffer("");
QuerySolutionMap initialBinding = new QuerySolutionMap();
initialBinding.add("uri", ResourceFactory.createResource(individual.getURI()));
protected StringBuffer singleThreadExecute(Individual individual, List<String> queries ){
StringBuffer propertyValues = new StringBuffer(" ");
for(String query : queries ){
propertyValues.append(runQuery(individual, query));
}
return propertyValues;
for(String query : queries ){
StringBuffer valuesForQuery = new StringBuffer();
Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
model.getLock().enterCriticalSection(Lock.READ);
try{
QueryExecution qExec =
QueryExecutionFactory.create(sparqlQuery, model, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
valuesForQuery.append(
getTextForRow( results.nextSolution() ) ) ;
}
}catch(Throwable t){
if( ! shutdown )
log.error(t,t);
} finally{
qExec.close();
}
}finally{
model.getLock().leaveCriticalSection();
}
if(log.isDebugEnabled()){
log.debug("query: '" + query + "'");
log.debug("text for query: '" + valuesForQuery.toString() + "'");
}
allValues.append(valuesForQuery);
}
return allValues;
}
public StringBuffer runQuery( Individual individual, String query ){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(individual.getURI());
initialBinding.add("uri", uriResource);
Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ);
model.getLock().enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, model, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
Iterator<String> iter = soln.varNames() ;
while( iter.hasNext()){
String name = iter.next();
RDFNode node = soln.get( name );
if( node != null ){
propertyValues.append(" " + node.toString());
}else{
log.debug(name + " is null");
}
}
}
}catch(Throwable t){
if( ! shutdown )
log.error(t,t);
} finally{
qExec.close();
}
}finally{
model.getLock().leaveCriticalSection();
}
return propertyValues;
}
protected String getTextForRow( QuerySolution row){
if( row == null )
return "";
StringBuffer text = new StringBuffer();
Iterator<String> iter = row.varNames() ;
while( iter.hasNext()){
String name = iter.next();
RDFNode node = row.get( name );
if( node != null ){
text.append(" ").append( node.toString() );
}else{
log.debug(name + " is null");
}
}
return text.toString();
}
protected static final String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
//single valued queries for foaf:Agent
static {
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:hrJobTitle ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:involvedOrganizationName ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:positionForPerson ?f . ?f rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:titleOrRole ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedAuthor ?f . ?f rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:awardConferredBy ?d . ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:awardOrHonorFor ?e . ?e rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
"?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:AwardReceipt . " +
" ?c core:description ?ContextNodeProperty . }");
singleValuedQueriesForAgent.add(prefix + "SELECT DISTINCT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?ContextNodeProperty . "
+ " } ORDER BY ?ContextNodeProperty ");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:degreeEarned ?d . ?d rdfs:label ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:degreeEarned ?d . ?d core:abbreviation ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:majorField ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:departmentOrSchool ?ContextNodeProperty ."
+ " }");
singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ " ?c core:trainingAtOrganization ?e . ?e rdfs:label ?ContextNodeProperty . "
+ " }");
}
//single valued queries for core:InformationResource
static {
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ."
+ "?b rdfs:label ?ContextNodeProperty .}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ " ?uri core:linkedInformationResource ?d ."
+ " ?d rdfs:label ?ContextNodeProperty . }");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ."
+ "}");
singleValuedQueriesForInformationResource.add(prefix +
"SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:hasSubjectArea ?f . ?f core:researchAreaOf ?h . ?h rdfs:label ?ContextNodeProperty ."
+ "}");
}
//multi valued queries
static{
multiValuedQueriesForAgent.add(prefix +
"SELECT " +
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
" (str(?TitleOrRole) as ?titleOrRole) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Position . "
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Relationship . "
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
+ " } ");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
" (str(?Description) as ?description) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:AwardReceipt . "
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy . } . "
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor . } ."
+ " OPTIONAL { ?c core:description ?Description . } . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?OrganizationLabel . "
+ " }");
multiValuedQueriesForAgent.add(prefix +
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
+"}");
}
//multivalued query for core:InformationResource
static {
multiValuedQueryForInformationResource = prefix +
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) " +
"(str(?Features) as ?features) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource .} . "
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea . } "
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
+"}" ;
}
public void shutdown(){
shutdown=true;
}
public void shutdown(){
shutdown=true;
}
}

View file

@ -80,9 +80,15 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
OntModel jenaOntModel = ModelContext.getJenaOntModel(context);
List<DocumentModifier> modifiers = new ArrayList<DocumentModifier>();
modifiers.add(new CalculateParameters(dataset));
modifiers.add(new ContextNodeFields(jenaOntModel));
/* try to get context attribute DocumentModifiers
* and use that as the start of the list of DocumentModifier
* objects. This allows other listeners to add to the basic set of
* DocumentModifiers. */
List<DocumentModifier> modifiers = (List<DocumentModifier>)context.getAttribute("DocumentModifiers");
if( modifiers == null )
modifiers = new ArrayList<DocumentModifier>();
modifiers.add(new NameBoost());
modifiers.add(new ThumbnailImageURL(jenaOntModel));