From 1e6b66a12c57fbf98478959e5bf3af9abddac636 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Thu, 13 Nov 2014 17:14:42 -0500 Subject: [PATCH] VIVO-906 Explicitly configure the SearchIndexExcluders and DocumentModifiers. --- .../everytime/SearchIndexerConfiguration.n3 | 57 +++++++ .../documentBuilding/ContextNodeFields.java | 27 +++- .../ExcludeBasedOnNamespace.java | 40 ++--- .../documentBuilding/ExcludeBasedOnType.java | 143 ++++++++++-------- .../ExcludeBasedOnTypeNamespace.java | 31 ++-- .../documentBuilding/ExcludeNonFlagVitro.java | 5 + .../IndividualToSearchDocument.java | 2 +- .../search/documentBuilding/NameBoost.java | 28 +++- .../search/documentBuilding/NameFields.java | 15 +- .../SyncingExcludeBasedOnType.java | 62 +++++--- .../documentBuilding/ThumbnailImageURL.java | 14 +- .../searchindex/SearchIndexerSetup.java | 122 ++++----------- .../documentBuilding/NameFieldsTest.java | 24 +-- .../ThumbnailImageURLTest.java | 13 +- .../modelaccess/ContextModelAccessStub.java | 29 ++-- 15 files changed, 342 insertions(+), 270 deletions(-) create mode 100644 webapp/rdf/display/everytime/SearchIndexerConfiguration.n3 diff --git a/webapp/rdf/display/everytime/SearchIndexerConfiguration.n3 b/webapp/rdf/display/everytime/SearchIndexerConfiguration.n3 new file mode 100644 index 000000000..fb82c5b74 --- /dev/null +++ b/webapp/rdf/display/everytime/SearchIndexerConfiguration.n3 @@ -0,0 +1,57 @@ +@prefix : . +@prefix xsd: . + +# +# Specify the SearchIndexExcluders and DocumentModifiers. +# + +# Exclude from the search index Individuals with types from these namespaces. +# Note: if you do OWL.NS here you will exclude all of owl:Thing. +:searchExcluder_namespaceExcluder + a , + ; + :excludes + "http://vitro.mannlib.cornell.edu/ns/vitro/0.7#" , + "http://vitro.mannlib.cornell.edu/ns/vitro/public#" , + "http://vitro.mannlib.cornell.edu/ns/bnode#" , + "http://www.w3.org/2002/07/owl#" . + +# Individuals of these types will be excluded from the search index +:searchExcluder_typeExcluder + a , + ; + :excludes + "http://www.w3.org/2002/07/owl#AnnotationProperty" , + "http://www.w3.org/2002/07/owl#DatatypeProperty" , + "http://www.w3.org/2002/07/owl#ObjectProperty" . + +# Exclude from the search index individuals who's URIs start with these namespaces. +:searchExcluder_typeNamespaceExcluder + a , + ; + :excludes + "http://vitro.mannlib.cornell.edu/ns/vitro/role#public" . + +:searchExcluder_vitroExcluder + a , + . + +:searchExcluder_syncingTypeExcluder + a , + . + +# ------------------------------------ + +:documentModifier_nameFields + a , + . + +:documentModifier_nameBoost + a , + ; + :hasBoost "1.2"^^xsd:float . + +:documentModifier_thumbnailImageUrl + a , + . + diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ContextNodeFields.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ContextNodeFields.java index 636a61e91..249d528bc 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ContextNodeFields.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ContextNodeFields.java @@ -15,12 +15,12 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.RDFNode; import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; -import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser; /** * DocumentModifier that will run SPARQL queries for an @@ -30,19 +30,24 @@ import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames; * @author bdc34 * */ -public class ContextNodeFields implements DocumentModifier{ +public class ContextNodeFields implements DocumentModifier, ContextModelsUser{ protected List queries = new ArrayList(); protected boolean shutdown = false; protected Log log = LogFactory.getLog(ContextNodeFields.class); - protected RDFServiceFactory rdfServiceFactory; - + + private RDFService rdfService; + + @Override + public void setContextModels(ContextModelAccess models) { + this.rdfService = models.getRDFService(); + } + /** * Construct this with a model to query when building search documents and * a list of the SPARQL queries to run. */ - protected ContextNodeFields(List queries, RDFServiceFactory rdfServiceFactory){ + protected ContextNodeFields(List queries){ this.queries = queries; - this.rdfServiceFactory = rdfServiceFactory; } @@ -74,7 +79,6 @@ public class ContextNodeFields implements DocumentModifier{ */ protected StringBuffer executeQueryForValues( Individual individual, Collection queries){ /* execute all the queries on the list and concat the values to add to all text */ - RDFService rdfService = rdfServiceFactory.getRDFService(); StringBuffer allValues = new StringBuffer(""); for(String query : queries ){ @@ -137,4 +141,11 @@ public class ContextNodeFields implements DocumentModifier{ public void shutdown(){ shutdown=true; } + + + @Override + public String toString() { + return this.getClass().getSimpleName() + "[]"; + } + } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnNamespace.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnNamespace.java index 615fdfffd..7e295313b 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnNamespace.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnNamespace.java @@ -1,34 +1,36 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ package edu.cornell.mannlib.vitro.webapp.search.documentBuilding; -import java.util.Arrays; +import java.util.ArrayList; import java.util.List; import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property; /** - * Skip individual if its URI is from any of the excludeNamepsaces - * + * Skip individual if its URI is from any of the excludeNamespaces. */ public class ExcludeBasedOnNamespace implements SearchIndexExcluder { - List excludeNamepsaces; - - - public ExcludeBasedOnNamespace(String ... excludeNamepsaces) { - super(); - this.excludeNamepsaces = Arrays.asList(excludeNamepsaces); - } + private List excludeNamespaces = new ArrayList<>(); + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#excludes") + public void addExcludedNamespace(String ns) { + excludeNamespaces.add(ns); + } - @Override - public String checkForExclusion(Individual ind) { - for( String ns: excludeNamepsaces){ - if( ns.equals( ind.getNamespace() ) ){ - return "skipping because of namespace " ; - } - } - return null; - } + @Override + public String checkForExclusion(Individual ind) { + for (String ns : excludeNamespaces) { + if (ns.equals(ind.getNamespace())) { + return "skipping because of namespace " + ns; + } + } + return null; + } + @Override + public String toString() { + return "ExcludeBasedOnNamespace[namespaces=" + excludeNamespaces + "]"; + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnType.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnType.java index f39f567ea..1a9879e80 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnType.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnType.java @@ -1,83 +1,96 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ package edu.cornell.mannlib.vitro.webapp.search.documentBuilding; +import static edu.cornell.mannlib.vitro.webapp.search.documentBuilding.IndividualToSearchDocument.DONT_EXCLUDE; + import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; +import java.util.HashSet; import java.util.List; +import java.util.Set; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.VClass; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property; /** - * Exclude individual from search index if - * it is a member of any of the the types. + * Exclude individual from search index if it is a member of any of the the + * types. + * * @author bdc34 - * + * */ public class ExcludeBasedOnType implements SearchIndexExcluder { private static final String SKIP_MSG = "skipping due to type."; - - /** The add, set and remove methods must keep this list sorted. */ - List typeURIs; - - public ExcludeBasedOnType(String ... typeURIs) { - setExcludedTypes( typeURIs ); - } - @Override - public String checkForExclusion(Individual ind) { - if( ind == null ) - return null; - - if( typeURIinExcludeList( ind.getVClass() )) - return SKIP_MSG; - - List vclasses = new ArrayList(); - vclasses.addAll( ind.getVClasses()!=null?ind.getVClasses():Collections.EMPTY_LIST ); - vclasses.addAll( ind.getVClasses(true)!=null?ind.getVClasses(true):Collections.EMPTY_LIST ); - - for( VClass vclz : vclasses){ - if( typeURIinExcludeList( vclz )) - return SKIP_MSG; - } - - return null; - } - - protected boolean typeURIinExcludeList( VClass vclz){ - if( vclz != null && vclz.getURI() != null && !vclz.isAnonymous() ){ - int pos = Collections.binarySearch(typeURIs, vclz.getURI()); - return pos >= 0; - }else{ - return false; - } - } - - public void setExcludedTypes(String ... typeURIs){ - setExcludedTypes(Arrays.asList(typeURIs)); - } - - public void setExcludedTypes(List typeURIs){ - synchronized(this){ - this.typeURIs = new ArrayList(typeURIs) ; - Collections.sort( this.typeURIs ); - } - } - - protected void addTypeToExclude(String typeURI){ - if( typeURI != null && !typeURI.isEmpty()){ - synchronized(this){ - typeURIs.add(typeURI); - Collections.sort( this.typeURIs ); - } - } - } - - protected void removeTypeToExclude(String typeURI){ - synchronized(this){ - typeURIs.remove(typeURI); - } - } + private final Set typeURIs = new HashSet<>(); + + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#excludes") + public void addTypeURI(String uri) { + typeURIs.add(uri); + } + + @Override + public String checkForExclusion(Individual ind) { + if (ind == null) { + return DONT_EXCLUDE; + } + if (typeURIinExcludeList(ind.getVClass())) { + return SKIP_MSG; + } + + List vclasses = new ArrayList<>(); + addToList(vclasses, ind.getVClasses()); + addToList(vclasses, ind.getVClasses(true)); + + for (VClass vclz : vclasses) { + if (typeURIinExcludeList(vclz)) + return SKIP_MSG; + } + + return DONT_EXCLUDE; + } + + private void addToList(List list, List additions) { + if (additions != null) { + list.addAll(additions); + } + } + + protected boolean typeURIinExcludeList(VClass vclz) { + if (vclz != null && vclz.getURI() != null && !vclz.isAnonymous()) { + synchronized (typeURIs) { + return typeURIs.contains(vclz.getURI()); + } + } else { + return false; + } + } + + protected void setExcludedTypes(List typeURIs) { + synchronized (typeURIs) { + this.typeURIs.clear(); + this.typeURIs.addAll(typeURIs); + } + } + + protected void addTypeToExclude(String typeURI) { + if (typeURI != null && !typeURI.isEmpty()) { + synchronized (typeURIs) { + typeURIs.add(typeURI); + } + } + } + + protected void removeTypeToExclude(String typeURI) { + synchronized (typeURIs) { + typeURIs.remove(typeURI); + } + } + + @Override + public String toString() { + return this.getClass().getSimpleName() + " [typeURIs=" + typeURIs + "]"; + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnTypeNamespace.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnTypeNamespace.java index 60af4f0bc..671112f28 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnTypeNamespace.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ExcludeBasedOnTypeNamespace.java @@ -1,34 +1,39 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ package edu.cornell.mannlib.vitro.webapp.search.documentBuilding; -import java.util.Arrays; -import java.util.Collections; +import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.VClass; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation; /** * Exclude individuals based on the namespaces of their types. */ public class ExcludeBasedOnTypeNamespace implements SearchIndexExcluder { - final List namespaces; + private final List namespaces = new ArrayList<>(); Pattern nsRegexPattern; - public ExcludeBasedOnTypeNamespace(String ... namespaces) { - super(); - this.namespaces = Collections.unmodifiableList(Arrays.asList( namespaces )); + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#excludes") + public void addExcludedNamespace(String uri) { + namespaces.add(uri); + } + + @Validation + public void compileRegexPattern() { String nsOrPattern = ""; - for( int i=0; i ?label }"; try { - RDFService rdfService = rsf.getRDFService(); BufferedReader stream = new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV))); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/SyncingExcludeBasedOnType.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/SyncingExcludeBasedOnType.java index dfdf2a28d..c6152126c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/SyncingExcludeBasedOnType.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/SyncingExcludeBasedOnType.java @@ -1,12 +1,16 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ package edu.cornell.mannlib.vitro.webapp.search.documentBuilding; +import static edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary.EXCLUDE_CLASS; +import static edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary.SEARCH_INDEX_URI; + import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecution; import com.hp.hpl.jena.query.QueryExecutionFactory; @@ -23,25 +27,35 @@ import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.shared.Lock; -import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; +import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess; +import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation; /** * This excludes based on types defined as EXCLUDE_CLASS in the * configuration RDF model. */ -public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements ModelChangedListener{ +public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements ModelChangedListener, ContextModelsUser { static final Log log = LogFactory.getLog(SyncingExcludeBasedOnType.class); private static final String queryForProhibitedClasses = "SELECT ?prohibited WHERE{" + - "?searchConfig <" + DisplayVocabulary.EXCLUDE_CLASS + "> ?prohibited . " + + "?searchConfig <" + EXCLUDE_CLASS + "> ?prohibited . " + "}"; - String searchIndexURI = DisplayVocabulary.SEARCH_INDEX_URI; - - public SyncingExcludeBasedOnType( Model model){ - this.setExcludedTypes( buildProhibitedClassesList(searchIndexURI, model) ); - log.info("types excluded from search: " + typeURIs); + private ContextModelAccess models; + + @Override + public void setContextModels(ContextModelAccess models) { + this.models = models; + } + + @Validation + public void buildClassList( ){ + OntModel model = models.getOntModel(ModelNames.DISPLAY); + this.setExcludedTypes( buildProhibitedClassesList(SEARCH_INDEX_URI, model) ); + log.debug(this); } private List buildProhibitedClassesList( String URI, Model model){ @@ -81,10 +95,10 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod public void addedStatement(Statement s) { try{ if( isExcludeClassPredicate( s ) && isAboutSearchIndex(s)){ - if( s.getObject() != null && s.getObject().canAs(Resource.class)){ - String classURI = ((Resource)s.getObject().as(Resource.class)).getURI(); + if( s.getObject() != null && s.getObject().isURIResource()){ + String classURI = s.getObject().asResource().getURI(); this.addTypeToExclude(classURI); - log.debug("prohibited classes: " + this.typeURIs); + log.debug("prohibited classes: " + this); } } }catch(Exception ex){ @@ -97,10 +111,10 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod public void removedStatement(Statement s) { try{ if( isExcludeClassPredicate( s ) && isAboutSearchIndex(s)){ - if( s.getObject() != null && s.getObject().canAs(Resource.class)){ - String classURI = ((Resource)s.getObject().as(Resource.class)).getURI(); + if( s.getObject() != null && s.getObject().isURIResource()){ + String classURI = s.getObject().asResource().getURI(); this.removeTypeToExclude(classURI); - log.debug("prohibited classes: " + this.typeURIs); + log.debug("prohibited classes: " + this); } } }catch(Exception ex){ @@ -111,13 +125,13 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod private boolean isExcludeClassPredicate(Statement s){ return s != null && s.getPredicate() != null - && DisplayVocabulary.EXCLUDE_CLASS.getURI().equals( s.getPredicate().getURI()); + && EXCLUDE_CLASS.getURI().equals( s.getPredicate().getURI()); } private boolean isAboutSearchIndex(Statement s){ if( s.getSubject() != null ){ - String subURI = ((Resource) s.getSubject()).getURI() ; - return this.searchIndexURI.equals(subURI); + String subURI = s.getSubject().getURI() ; + return SEARCH_INDEX_URI.equals(subURI); }else{ return false; } @@ -152,10 +166,9 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod @Override public void addedStatements(Model model) { if( model != null){ - addedStatements(model.listStatements( - model.createResource(searchIndexURI), - DisplayVocabulary.EXCLUDE_CLASS, - (RDFNode)null)); + addedStatements(model.listStatements( + model.createResource(SEARCH_INDEX_URI), EXCLUDE_CLASS, + (RDFNode) null)); } } @@ -193,10 +206,9 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod @Override public void removedStatements(Model model) { if( model != null){ - removedStatements(model.listStatements( - model.createResource(searchIndexURI), - DisplayVocabulary.EXCLUDE_CLASS, - (RDFNode)null)); + removedStatements(model.listStatements( + model.createResource(SEARCH_INDEX_URI), EXCLUDE_CLASS, + (RDFNode) null)); } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURL.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURL.java index a82b3a661..3e2e7c4d7 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURL.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURL.java @@ -15,12 +15,13 @@ import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.rdf.model.RDFNode; import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser; -public class ThumbnailImageURL implements DocumentModifier { +public class ThumbnailImageURL implements DocumentModifier, ContextModelsUser { private static final String PREFIX = "prefix owl: " + " prefix vitroDisplay: " @@ -36,12 +37,12 @@ public class ThumbnailImageURL implements DocumentModifier { + " ?uri ?a . " + " ?a ?downloadLocation . } "; - private RDFServiceFactory rsf; + private RDFService rdf; private Log log = LogFactory.getLog(ThumbnailImageURL.class); - - public ThumbnailImageURL( RDFServiceFactory rsf ){ - this.rsf = rsf; + @Override + public void setContextModels(ContextModelAccess models) { + this.rdf = models.getRDFService(); } @Override @@ -71,7 +72,6 @@ public class ThumbnailImageURL implements DocumentModifier { String uri = "<" + individual.getURI() + "> "; String query = QUERY_TEMPLATE.replaceAll("\\?uri", uri); - RDFService rdf = rsf.getRDFService(); try{ ResultSet results = RDFServiceUtils.sparqlSelectQuery(query, rdf); while(results.hasNext()){ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java index 6e742499b..19738057d 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java @@ -6,17 +6,18 @@ import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY; import java.util.ArrayList; import java.util.List; +import java.util.Set; import javax.servlet.ServletContext; import javax.servlet.ServletContextEvent; import javax.servlet.ServletContextListener; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.vocabulary.OWL; import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils; -import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; @@ -25,25 +26,18 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; import edu.cornell.mannlib.vitro.webapp.search.SearchIndexer; import edu.cornell.mannlib.vitro.webapp.search.beans.StatementToURIsToUpdate; import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.DocumentModifier; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnNamespace; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnType; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnTypeNamespace; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeNonFlagVitro; import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.IndividualToSearchDocument; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.NameBoost; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.NameFields; import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SearchIndexExcluder; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SyncingExcludeBasedOnType; -import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ThumbnailImageURL; import edu.cornell.mannlib.vitro.webapp.search.indexing.AdditionalUriFinders; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.search.indexing.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoaderException; import edu.cornell.mannlib.vitro.webapp.utils.developer.Key; import edu.cornell.mannlib.vitro.webapp.utils.developer.listeners.DeveloperDisabledModelChangeListener; @@ -51,63 +45,26 @@ import edu.cornell.mannlib.vitro.webapp.utils.developer.listeners.DeveloperDisab * TODO */ public class SearchIndexerSetup implements ServletContextListener { + private static final Log log = LogFactory.getLog(SearchIndexerSetup.class); + public static final String PROHIBITED_FROM_SEARCH = "edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch"; - - /** - * Exclude from the search index Individuals with types from these - * namespaces - */ - private static final String[] TYPE_NS_EXCLUDES = { VitroVocabulary.PUBLIC - // if you do OWL.NS here you will exclude all of owl:Thing. - }; - - /** - * Exclude from the search index individuals who's URIs start with these - * namespaces. - */ - private static final String[] INDIVIDUAL_NS_EXCLUDES = { - VitroVocabulary.vitroURI, VitroVocabulary.VITRO_PUBLIC, - VitroVocabulary.PSEUDO_BNODE_NS, OWL.NS }; - - /** Individuals of these types will be excluded from the search index */ - private static final String[] OWL_TYPES_EXCLUDES = { - OWL.ObjectProperty.getURI(), OWL.DatatypeProperty.getURI(), - OWL.AnnotationProperty.getURI() }; + + private ServletContext ctx; + private OntModel displayModel; + private ConfigurationBeanLoader beanLoader; @Override public void contextInitialized(ServletContextEvent sce) { + this.ctx = sce.getServletContext(); + this.displayModel = ModelAccess.on(ctx).getOntModel(DISPLAY); + this.beanLoader = new ConfigurationBeanLoader(displayModel, ctx); + ServletContext context = sce.getServletContext(); StartupStatus ss = StartupStatus.getBean(context); SearchEngine searchEngine = ApplicationUtils.instance().getSearchEngine(); try { - /* set up the individual to search doc translation */ - OntModel jenaOntModel = ModelAccess.on(context).getOntModel(); - OntModel displayModel = ModelAccess.on(context).getOntModel(DISPLAY); - - /* - * try to get context attribute DocumentModifiers and use that as - * the start of the list of DocumentModifier objects. This allows - * other ContextListeners to add to the basic set of - * DocumentModifiers. - */ - @SuppressWarnings("unchecked") - List modifiersFromContext = (List) context - .getAttribute("DocumentModifiers"); - - /* - * try to get context attribute SearchIndexExcludes and use that as - * the start of the list of exclude objects. This allows other - * ContextListeners to add to the basic set of SearchIndexExcludes . - */ - @SuppressWarnings("unchecked") - List searchIndexExcludesFromContext = (List) context - .getAttribute("SearchIndexExcludes"); - - IndividualToSearchDocument indToSearchDoc = setupTranslation( - jenaOntModel, displayModel, - RDFServiceUtils.getRDFServiceFactory(context), - modifiersFromContext, searchIndexExcludesFromContext); + IndividualToSearchDocument indToSearchDoc = setupTranslation(); /* setup search indexer */ SearchIndexer searchIndexer = new SearchIndexer(searchEngine, indToSearchDoc); @@ -156,42 +113,17 @@ public class SearchIndexerSetup implements ServletContextListener { } - public static IndividualToSearchDocument setupTranslation( - OntModel jenaOntModel, Model displayModel, - RDFServiceFactory rdfServiceFactory, - List modifiersFromContext, - List searchIndexExcludesFromContext) { + private IndividualToSearchDocument setupTranslation() { + try { + Set excluders = beanLoader.loadAll(SearchIndexExcluder.class); + log.debug("Excludes: (" + excluders.size() + ") " + excluders); - /* - * try to get context attribute DocumentModifiers and use that as the - * start of the list of DocumentModifier objects. This allows other - * ContextListeners to add to the basic set of DocumentModifiers. - */ - List modifiers = new ArrayList(); - if (modifiersFromContext != null) { - modifiers.addAll(modifiersFromContext); + Set modifiers = beanLoader.loadAll(DocumentModifier.class); + log.debug("Modifiers: (" + modifiers.size() + ") " + modifiers); + + return new IndividualToSearchDocument(new ArrayList<>(excluders), new ArrayList<>(modifiers)); + } catch (ConfigurationBeanLoaderException e) { + throw new RuntimeException("Failed to configure the SearchIndexer", e); } - - modifiers.add(new NameFields(rdfServiceFactory)); - modifiers.add(new NameBoost(1.2f)); - modifiers.add(new ThumbnailImageURL(rdfServiceFactory)); - - /* - * try to get context attribute SearchIndexExcludes and use that as the - * start of the list of exclude objects. This allows other - * ContextListeners to add to the basic set of SearchIndexExcludes . - */ - List excludes = new ArrayList(); - if (searchIndexExcludesFromContext != null) { - excludes.addAll(searchIndexExcludesFromContext); - } - - excludes.add(new ExcludeBasedOnNamespace(INDIVIDUAL_NS_EXCLUDES)); - excludes.add(new ExcludeBasedOnTypeNamespace(TYPE_NS_EXCLUDES)); - excludes.add(new ExcludeBasedOnType(OWL_TYPES_EXCLUDES)); - excludes.add(new ExcludeNonFlagVitro()); - excludes.add(new SyncingExcludeBasedOnType(displayModel)); - - return new IndividualToSearchDocument(excludes, modifiers); } } diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/NameFieldsTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/NameFieldsTest.java index d4e2767cf..1a7c163e1 100644 --- a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/NameFieldsTest.java +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/NameFieldsTest.java @@ -2,6 +2,7 @@ package edu.cornell.mannlib.vitro.webapp.search.documentBuilding; +import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT; import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW; import static org.junit.Assert.assertEquals; @@ -9,6 +10,7 @@ import org.junit.Before; import org.junit.Test; import stubs.edu.cornell.mannlib.vitro.webapp.beans.IndividualStub; +import stubs.edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccessStub; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; @@ -16,8 +18,6 @@ import com.hp.hpl.jena.rdf.model.Statement; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; -import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceFactorySingle; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel; import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument; @@ -38,27 +38,29 @@ public class NameFieldsTest { doc = new BaseSearchInputDocument(); RDFServiceModel rdfService = new RDFServiceModel(baseModel); - RDFServiceFactory rdfServiceFactory = new RDFServiceFactorySingle( - rdfService); - nameFields = new NameFields(rdfServiceFactory); + ContextModelAccessStub models = new ContextModelAccessStub(); + models.setRDFService(CONTENT, rdfService); + + nameFields = new NameFields(); + nameFields.setContextModels(models); } @Test - public void nullIndividual() throws SkipIndividualException { + public void nullIndividual() { SearchInputDocument expected = new BaseSearchInputDocument(doc); assertResultingSearchDocument(null, expected); } @Test - public void nullUri() throws SkipIndividualException { + public void nullUri() { SearchInputDocument expected = new BaseSearchInputDocument(doc); assertResultingSearchDocument(new IndividualStub(null), expected); } @Test - public void foundNoLabels() throws SkipIndividualException { + public void foundNoLabels() { SearchInputDocument expected = new BaseSearchInputDocument(doc); expected.addField(NAME_RAW, ""); @@ -67,7 +69,7 @@ public class NameFieldsTest { } @Test - public void foundOneLabel() throws SkipIndividualException { + public void foundOneLabel() { baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1")); SearchInputDocument expected = new BaseSearchInputDocument(doc); @@ -78,7 +80,7 @@ public class NameFieldsTest { } @Test - public void foundTwoLabels() throws SkipIndividualException { + public void foundTwoLabels() { baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1")); baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label2")); @@ -100,7 +102,7 @@ public class NameFieldsTest { } private void assertResultingSearchDocument(Individual ind, - SearchInputDocument expected) throws SkipIndividualException { + SearchInputDocument expected) { nameFields.modifyDocument(ind, doc); assertEquals(expected, doc); } diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURLTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURLTest.java index 285152e8d..b5c9149cf 100644 --- a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURLTest.java +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/documentBuilding/ThumbnailImageURLTest.java @@ -4,6 +4,8 @@ */ package edu.cornell.mannlib.vitro.webapp.search.documentBuilding; +import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT; + import java.io.InputStream; import org.apache.log4j.Level; @@ -11,6 +13,7 @@ import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import stubs.edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccessStub; import stubs.edu.cornell.mannlib.vitro.webapp.modules.ApplicationStub; import stubs.edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineStub; import stubs.javax.servlet.ServletContextStub; @@ -25,13 +28,11 @@ import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; -import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceFactorySingle; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames; public class ThumbnailImageURLTest extends AbstractTestClass{ - RDFServiceFactory testRDF; + ContextModelAccessStub contextModels; String personsURI = "http://vivo.cornell.edu/individual/individual8803"; /** @@ -45,7 +46,8 @@ public class ThumbnailImageURLTest extends AbstractTestClass{ Model model = ModelFactory.createDefaultModel(); InputStream in = ThumbnailImageURLTest.class.getResourceAsStream("testPerson.n3"); model.read(in,"","N3"); - testRDF = new RDFServiceFactorySingle( new RDFServiceModel( model ) ); + contextModels = new ContextModelAccessStub(); + contextModels.setRDFService(CONTENT, new RDFServiceModel( model )); } /** @@ -55,7 +57,8 @@ public class ThumbnailImageURLTest extends AbstractTestClass{ @Test public void testThumbnailFieldCreatedInSearchDoc() { SearchInputDocument doc = ApplicationUtils.instance().getSearchEngine().createInputDocument(); - ThumbnailImageURL testMe = new ThumbnailImageURL( testRDF ); + ThumbnailImageURL testMe = new ThumbnailImageURL(); + testMe.setContextModels(contextModels); Individual ind = new IndividualImpl(); ind.setURI(personsURI); diff --git a/webapp/test/stubs/edu/cornell/mannlib/vitro/webapp/modelaccess/ContextModelAccessStub.java b/webapp/test/stubs/edu/cornell/mannlib/vitro/webapp/modelaccess/ContextModelAccessStub.java index 1ab169f23..566b6cb02 100644 --- a/webapp/test/stubs/edu/cornell/mannlib/vitro/webapp/modelaccess/ContextModelAccessStub.java +++ b/webapp/test/stubs/edu/cornell/mannlib/vitro/webapp/modelaccess/ContextModelAccessStub.java @@ -3,7 +3,9 @@ package stubs.edu.cornell.mannlib.vitro.webapp.modelaccess; import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.ReasoningOption.ASSERTIONS_AND_INFERENCES; +import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT; +import java.util.EnumMap; import java.util.HashMap; import java.util.Map; @@ -30,6 +32,7 @@ public class ContextModelAccessStub implements ContextModelAccess { // ---------------------------------------------------------------------- private final Map wadfMap = new HashMap<>(); + private final Map rdfServiceMap = new EnumMap<>(WhichService.class); public void setWebappDaoFactory(WebappDaoFactory wadf) { setWebappDaoFactory(wadf, ASSERTIONS_AND_INFERENCES); @@ -39,6 +42,10 @@ public class ContextModelAccessStub implements ContextModelAccess { ReasoningOption option) { wadfMap.put(option, wadf); } + + public void setRDFService(WhichService which, RDFService rdfService) { + rdfServiceMap.put(which, rdfService); + } // ---------------------------------------------------------------------- // Stub methods @@ -49,22 +56,20 @@ public class ContextModelAccessStub implements ContextModelAccess { return wadfMap.get(ASSERTIONS_AND_INFERENCES); } + @Override + public RDFService getRDFService() { + return getRDFService(CONTENT); + } + + @Override + public RDFService getRDFService(WhichService which) { + return rdfServiceMap.get(which); + } + // ---------------------------------------------------------------------- // Un-implemented methods // ---------------------------------------------------------------------- - @Override - public RDFService getRDFService() { - throw new RuntimeException( - "ContextModelAccessStub.getRDFService() not implemented."); - } - - @Override - public RDFService getRDFService(WhichService which) { - throw new RuntimeException( - "ContextModelAccessStub.getRDFService() not implemented."); - } - @Override public Dataset getDataset() { throw new RuntimeException(