From bc9c3d215cfc4d4f022dbb8b0a2e177cefbb0752 Mon Sep 17 00:00:00 2001 From: briancaruso Date: Tue, 1 Nov 2011 16:17:26 +0000 Subject: [PATCH] Removing rdfs:label from search index document field ALLTEXT. Moving vivo specific DocumentModifiers out of vitro. NIHVIVO-2975 --- .../webapp/search/solr/ContextNodeFields.java | 433 +++++------------- .../vitro/webapp/search/solr/SolrSetup.java | 12 +- 2 files changed, 114 insertions(+), 331 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ContextNodeFields.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ContextNodeFields.java index 1b7fdf1e5..dddb0b6d6 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ContextNodeFields.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ContextNodeFields.java @@ -1,14 +1,10 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ - package edu.cornell.mannlib.vitro.webapp.search.solr; import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.List; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Future; -import java.util.concurrent.ThreadFactory; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -25,346 +21,127 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.shared.Lock; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames; +/** + * DocumentModifier that will run SPARQL queries for an + * Individual and add all the columns from all the rows + * in the solution set to the ALLTEXT field. + * + * @author bdc34 + * + */ public class ContextNodeFields implements DocumentModifier{ protected Model model; + protected List queries = new ArrayList(); - protected boolean shutdown = false; - protected static ExecutorService threadPool = null; - protected static final int THREAD_POOL_SIZE = 10; - - protected static final List singleValuedQueriesForAgent = new ArrayList(); - protected static final List singleValuedQueriesForInformationResource = new ArrayList(); - protected static final List multiValuedQueriesForAgent = new ArrayList(); - protected static final String multiValuedQueryForInformationResource; - - protected Log log = LogFactory.getLog(ContextNodeFields.class); + protected boolean shutdown = false; + protected Log log = LogFactory.getLog(ContextNodeFields.class); + - public ContextNodeFields(Model model){ - this.model = model; - } - + /** + * Construct this with a model to query when building Solr Documents and + * a list of the SPARQL queries to run. + */ + protected ContextNodeFields(Model model, List queries){ + this.model = model; + this.queries = queries; + } + + /** + * Implement this method to get values that will be added to ALLTEXT + * field of solr Document for each individual. + * + * @param individual + * @return StringBuffer with text values to add to ALLTEXT field of solr Document. + */ + protected StringBuffer getValues( Individual individual ){ + return executeQueryForValues( individual, queries ); + } + @Override - public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) { - log.debug("retrieving context node values.."); + public void modifyDocument(Individual individual, SolrInputDocument doc, StringBuffer addUri) { + if( individual == null ) + return; - StringBuffer objectProperties = singleThreadExecute( individual, multiValuedQueriesForAgent); - - SolrInputField field = doc.getField(VitroSearchTermNames.ALLTEXT); - if( field == null ){ - doc.addField(VitroSearchTermNames.ALLTEXT, - objectProperties + " " + - runQuery(individual, multiValuedQueryForInformationResource)); - }else{ - field.addValue(objectProperties + " " + - runQuery(individual, multiValuedQueryForInformationResource), - field.getBoost()); - } - log.debug("context node values are retrieved"); + log.debug( "doing context nodes for: " + individual.getURI()); + + /* get text from the context nodes and add the to ALLTEXT */ + StringBuffer values = getValues( individual ); + + SolrInputField field = doc.getField(VitroSearchTermNames.ALLTEXT); + if( field == null ){ + doc.addField(VitroSearchTermNames.ALLTEXT, values); + }else{ + field.addValue(values, field.getBoost()); + } } + + + protected StringBuffer executeQueryForValues( Individual individual, Collection queries){ + /* execute all the queries on the list and concat the values to add to all text */ + + StringBuffer allValues = new StringBuffer(""); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + initialBinding.add("uri", ResourceFactory.createResource(individual.getURI())); - protected StringBuffer singleThreadExecute(Individual individual, List queries ){ - StringBuffer propertyValues = new StringBuffer(" "); - for(String query : queries ){ - propertyValues.append(runQuery(individual, query)); - } - return propertyValues; + for(String query : queries ){ + StringBuffer valuesForQuery = new StringBuffer(); + + Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ); + model.getLock().enterCriticalSection(Lock.READ); + try{ + QueryExecution qExec = + QueryExecutionFactory.create(sparqlQuery, model, initialBinding); + try{ + ResultSet results = qExec.execSelect(); + while(results.hasNext()){ + valuesForQuery.append( + getTextForRow( results.nextSolution() ) ) ; + } + }catch(Throwable t){ + if( ! shutdown ) + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + model.getLock().leaveCriticalSection(); + } + if(log.isDebugEnabled()){ + log.debug("query: '" + query + "'"); + log.debug("text for query: '" + valuesForQuery.toString() + "'"); + } + allValues.append(valuesForQuery); + } + return allValues; } - public StringBuffer runQuery( Individual individual, String query ){ - StringBuffer propertyValues = new StringBuffer(); - - QuerySolutionMap initialBinding = new QuerySolutionMap(); - Resource uriResource = ResourceFactory.createResource(individual.getURI()); - initialBinding.add("uri", uriResource); - - Query sparqlQuery = QueryFactory.create( query, Syntax.syntaxARQ); - model.getLock().enterCriticalSection(Lock.READ); - try{ - QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, model, initialBinding); - try{ - ResultSet results = qExec.execSelect(); - while(results.hasNext()){ - QuerySolution soln = results.nextSolution(); - Iterator iter = soln.varNames() ; - while( iter.hasNext()){ - String name = iter.next(); - RDFNode node = soln.get( name ); - if( node != null ){ - propertyValues.append(" " + node.toString()); - }else{ - log.debug(name + " is null"); - } - } - } - }catch(Throwable t){ - if( ! shutdown ) - log.error(t,t); - } finally{ - qExec.close(); - } - }finally{ - model.getLock().leaveCriticalSection(); - } - - return propertyValues; - } + protected String getTextForRow( QuerySolution row){ + if( row == null ) + return ""; + StringBuffer text = new StringBuffer(); + Iterator iter = row.varNames() ; + while( iter.hasNext()){ + String name = iter.next(); + RDFNode node = row.get( name ); + if( node != null ){ + text.append(" ").append( node.toString() ); + }else{ + log.debug(name + " is null"); + } + } + return text.toString(); + } - protected static final String prefix = "prefix owl: " - + " prefix vitroDisplay: " - + " prefix rdf: " - + " prefix core: " - + " prefix foaf: " - + " prefix rdfs: " - + " prefix localNav: " - + " prefix bibo: "; - //single valued queries for foaf:Agent - static { - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Position . " + - " ?c core:hrJobTitle ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Position . " + - " ?c core:involvedOrganizationName ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Position . " + - " ?c core:positionForPerson ?f . ?f rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Position . " + - " ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Position . " + - " ?c core:titleOrRole ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Relationship . " + - " ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Relationship . " + - " ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Relationship . " + - " ?c core:linkedAuthor ?f . ?f rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:Relationship . " + - " ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:AwardReceipt . " + - " ?c core:awardConferredBy ?d . ?d rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:AwardReceipt . " + - " ?c core:awardOrHonorFor ?e . ?e rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT " + - "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + - "?uri rdf:type foaf:Agent ; ?b ?c . " + - " ?c rdf:type core:AwardReceipt . " + - " ?c core:description ?ContextNodeProperty . }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT DISTINCT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:Role ; core:roleIn ?Organization ." - + " ?Organization rdfs:label ?ContextNodeProperty . " - + " } ORDER BY ?ContextNodeProperty "); - - singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:EducationalTraining . " - + " ?c core:degreeEarned ?d . ?d rdfs:label ?ContextNodeProperty ." - + " }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:EducationalTraining . " - + " ?c core:degreeEarned ?d . ?d core:abbreviation ?ContextNodeProperty ." - + " }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:EducationalTraining . " - + " ?c core:majorField ?ContextNodeProperty ." - + " }"); - singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:EducationalTraining . " - + " ?c core:departmentOrSchool ?ContextNodeProperty ." - + " }"); - - singleValuedQueriesForAgent.add(prefix + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:EducationalTraining . " - + " ?c core:trainingAtOrganization ?e . ?e rdfs:label ?ContextNodeProperty . " - + " }"); - - - - } - - //single valued queries for core:InformationResource - static { - singleValuedQueriesForInformationResource.add(prefix + - "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + " ?uri rdf:type core:InformationResource . " - + "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ." - + "?b rdfs:label ?ContextNodeProperty .}"); - - singleValuedQueriesForInformationResource.add(prefix + - "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + " ?uri rdf:type core:InformationResource . " - + " ?uri core:linkedInformationResource ?d ." - + " ?d rdfs:label ?ContextNodeProperty . }"); - - singleValuedQueriesForInformationResource.add(prefix + - "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type core:InformationResource . " - + "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ." - + "}"); - - singleValuedQueriesForInformationResource.add(prefix + - "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type core:InformationResource . " - + "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ." - + "}"); - - singleValuedQueriesForInformationResource.add(prefix + - "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type core:InformationResource . " - + "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ." - + "}"); - - singleValuedQueriesForInformationResource.add(prefix + - "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" - + "?uri rdf:type core:InformationResource . " - + "?uri core:hasSubjectArea ?f . ?f core:researchAreaOf ?h . ?h rdfs:label ?ContextNodeProperty ." - + "}"); - } - - //multi valued queries - - static{ - multiValuedQueriesForAgent.add(prefix + - "SELECT " + - "(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " + - " (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " + - " (str(?TitleOrRole) as ?titleOrRole) WHERE {" - - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:Position . " - - + " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . " - + " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ." - + " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . " - + " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . " - + " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . " - + " }"); - - multiValuedQueriesForAgent.add(prefix + - "SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " + - " (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {" - - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:Relationship . " - - + " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . " - + " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ." - + " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . " - + " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . " - + " } "); - - multiValuedQueriesForAgent.add(prefix + - "SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " + - " (str(?Description) as ?description) WHERE {" - - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:AwardReceipt . " - - + " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy . } . " - + " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor . } ." - + " OPTIONAL { ?c core:description ?Description . } . " - + " }"); - - multiValuedQueriesForAgent.add(prefix + - "SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {" - + "?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:Role ; core:roleIn ?Organization ." - + " ?Organization rdfs:label ?OrganizationLabel . " - + " }"); - - multiValuedQueriesForAgent.add(prefix + - "SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) " - + "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " + - "(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {" - - + " ?uri rdf:type foaf:Agent ; ?b ?c . " - + " ?c rdf:type core:EducationalTraining . " - - + "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . " - + "OPTIONAL { ?c core:majorField ?MajorField .} ." - + " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }" - + " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . " - +"}"); - - } - - //multivalued query for core:InformationResource - static { - - multiValuedQueryForInformationResource = prefix + - "SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) " - + "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) " + - "(str(?Features) as ?features) WHERE {" - - + " ?uri rdf:type core:InformationResource . " - - + "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." + - "?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource .} . " - + "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ." - + " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea . } " - + " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . " - +"}" ; - - } - - public void shutdown(){ - shutdown=true; - } + public void shutdown(){ + shutdown=true; + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java index bf3bbcec5..b0825762f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java @@ -80,9 +80,15 @@ public class SolrSetup implements javax.servlet.ServletContextListener{ OntModel jenaOntModel = ModelContext.getJenaOntModel(context); - List modifiers = new ArrayList(); - modifiers.add(new CalculateParameters(dataset)); - modifiers.add(new ContextNodeFields(jenaOntModel)); + + /* try to get context attribute DocumentModifiers + * and use that as the start of the list of DocumentModifier + * objects. This allows other listeners to add to the basic set of + * DocumentModifiers. */ + List modifiers = (List)context.getAttribute("DocumentModifiers"); + if( modifiers == null ) + modifiers = new ArrayList(); + modifiers.add(new NameBoost()); modifiers.add(new ThumbnailImageURL(jenaOntModel));