VIVO-906 Explicitly configure the SearchIndexExcluders and DocumentModifiers.

This commit is contained in:
Jim Blake 2014-11-13 17:14:42 -05:00
parent ebb31eb081
commit 1e6b66a12c
15 changed files with 342 additions and 270 deletions

View file

@ -0,0 +1,57 @@
@prefix : <http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
#
# Specify the SearchIndexExcluders and DocumentModifiers.
#
# Exclude from the search index Individuals with types from these namespaces.
# Note: if you do OWL.NS here you will exclude all of owl:Thing.
:searchExcluder_namespaceExcluder
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnNamespace> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SearchIndexExcluder> ;
:excludes
"http://vitro.mannlib.cornell.edu/ns/vitro/0.7#" ,
"http://vitro.mannlib.cornell.edu/ns/vitro/public#" ,
"http://vitro.mannlib.cornell.edu/ns/bnode#" ,
"http://www.w3.org/2002/07/owl#" .
# Individuals of these types will be excluded from the search index
:searchExcluder_typeExcluder
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnType> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SearchIndexExcluder> ;
:excludes
"http://www.w3.org/2002/07/owl#AnnotationProperty" ,
"http://www.w3.org/2002/07/owl#DatatypeProperty" ,
"http://www.w3.org/2002/07/owl#ObjectProperty" .
# Exclude from the search index individuals who's URIs start with these namespaces.
:searchExcluder_typeNamespaceExcluder
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnTypeNamespace> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SearchIndexExcluder> ;
:excludes
"http://vitro.mannlib.cornell.edu/ns/vitro/role#public" .
:searchExcluder_vitroExcluder
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeNonFlagVitro> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SearchIndexExcluder> .
:searchExcluder_syncingTypeExcluder
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SyncingExcludeBasedOnType> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SearchIndexExcluder> .
# ------------------------------------
:documentModifier_nameFields
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.NameFields> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.DocumentModifier> .
:documentModifier_nameBoost
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.NameBoost> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.DocumentModifier> ;
:hasBoost "1.2"^^xsd:float .
:documentModifier_thumbnailImageUrl
a <java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ThumbnailImageURL> ,
<java:edu.cornell.mannlib.vitro.webapp.search.documentBuilding.DocumentModifier> .

View file

@ -15,12 +15,12 @@ import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.RDFNode;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
/**
* DocumentModifier that will run SPARQL queries for an
@ -30,19 +30,24 @@ import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
* @author bdc34
*
*/
public class ContextNodeFields implements DocumentModifier{
public class ContextNodeFields implements DocumentModifier, ContextModelsUser{
protected List<String> queries = new ArrayList<String>();
protected boolean shutdown = false;
protected Log log = LogFactory.getLog(ContextNodeFields.class);
protected RDFServiceFactory rdfServiceFactory;
private RDFService rdfService;
@Override
public void setContextModels(ContextModelAccess models) {
this.rdfService = models.getRDFService();
}
/**
* Construct this with a model to query when building search documents and
* a list of the SPARQL queries to run.
*/
protected ContextNodeFields(List<String> queries, RDFServiceFactory rdfServiceFactory){
protected ContextNodeFields(List<String> queries){
this.queries = queries;
this.rdfServiceFactory = rdfServiceFactory;
}
@ -74,7 +79,6 @@ public class ContextNodeFields implements DocumentModifier{
*/
protected StringBuffer executeQueryForValues( Individual individual, Collection<String> queries){
/* execute all the queries on the list and concat the values to add to all text */
RDFService rdfService = rdfServiceFactory.getRDFService();
StringBuffer allValues = new StringBuffer("");
for(String query : queries ){
@ -137,4 +141,11 @@ public class ContextNodeFields implements DocumentModifier{
public void shutdown(){
shutdown=true;
}
@Override
public String toString() {
return this.getClass().getSimpleName() + "[]";
}
}

View file

@ -1,34 +1,36 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.documentBuilding;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
/**
* Skip individual if its URI is from any of the excludeNamepsaces
*
* Skip individual if its URI is from any of the excludeNamespaces.
*/
public class ExcludeBasedOnNamespace implements SearchIndexExcluder {
List<String> excludeNamepsaces;
private List<String> excludeNamespaces = new ArrayList<>();
public ExcludeBasedOnNamespace(String ... excludeNamepsaces) {
super();
this.excludeNamepsaces = Arrays.asList(excludeNamepsaces);
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#excludes")
public void addExcludedNamespace(String ns) {
excludeNamespaces.add(ns);
}
@Override
public String checkForExclusion(Individual ind) {
for( String ns: excludeNamepsaces){
for (String ns : excludeNamespaces) {
if (ns.equals(ind.getNamespace())) {
return "skipping because of namespace " ;
return "skipping because of namespace " + ns;
}
}
return null;
}
@Override
public String toString() {
return "ExcludeBasedOnNamespace[namespaces=" + excludeNamespaces + "]";
}
}

View file

@ -1,17 +1,22 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.search.documentBuilding.IndividualToSearchDocument.DONT_EXCLUDE;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
/**
* Exclude individual from search index if
* it is a member of any of the the types.
* Exclude individual from search index if it is a member of any of the the
* types.
*
* @author bdc34
*
*/
@ -19,65 +24,73 @@ public class ExcludeBasedOnType implements SearchIndexExcluder {
private static final String SKIP_MSG = "skipping due to type.";
/** The add, set and remove methods must keep this list sorted. */
List<String> typeURIs;
private final Set<String> typeURIs = new HashSet<>();
public ExcludeBasedOnType(String ... typeURIs) {
setExcludedTypes( typeURIs );
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#excludes")
public void addTypeURI(String uri) {
typeURIs.add(uri);
}
@Override
public String checkForExclusion(Individual ind) {
if( ind == null )
return null;
if( typeURIinExcludeList( ind.getVClass() ))
if (ind == null) {
return DONT_EXCLUDE;
}
if (typeURIinExcludeList(ind.getVClass())) {
return SKIP_MSG;
}
List<VClass> vclasses = new ArrayList<VClass>();
vclasses.addAll( ind.getVClasses()!=null?ind.getVClasses():Collections.EMPTY_LIST );
vclasses.addAll( ind.getVClasses(true)!=null?ind.getVClasses(true):Collections.EMPTY_LIST );
List<VClass> vclasses = new ArrayList<>();
addToList(vclasses, ind.getVClasses());
addToList(vclasses, ind.getVClasses(true));
for (VClass vclz : vclasses) {
if (typeURIinExcludeList(vclz))
return SKIP_MSG;
}
return null;
return DONT_EXCLUDE;
}
private void addToList(List<VClass> list, List<VClass> additions) {
if (additions != null) {
list.addAll(additions);
}
}
protected boolean typeURIinExcludeList(VClass vclz) {
if (vclz != null && vclz.getURI() != null && !vclz.isAnonymous()) {
int pos = Collections.binarySearch(typeURIs, vclz.getURI());
return pos >= 0;
synchronized (typeURIs) {
return typeURIs.contains(vclz.getURI());
}
} else {
return false;
}
}
public void setExcludedTypes(String ... typeURIs){
setExcludedTypes(Arrays.asList(typeURIs));
}
public void setExcludedTypes(List<String> typeURIs){
synchronized(this){
this.typeURIs = new ArrayList<String>(typeURIs) ;
Collections.sort( this.typeURIs );
protected void setExcludedTypes(List<String> typeURIs) {
synchronized (typeURIs) {
this.typeURIs.clear();
this.typeURIs.addAll(typeURIs);
}
}
protected void addTypeToExclude(String typeURI) {
if (typeURI != null && !typeURI.isEmpty()) {
synchronized(this){
synchronized (typeURIs) {
typeURIs.add(typeURI);
Collections.sort( this.typeURIs );
}
}
}
protected void removeTypeToExclude(String typeURI) {
synchronized(this){
synchronized (typeURIs) {
typeURIs.remove(typeURI);
}
}
@Override
public String toString() {
return this.getClass().getSimpleName() + " [typeURIs=" + typeURIs + "]";
}
}

View file

@ -1,29 +1,34 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.documentBuilding;
import java.util.Arrays;
import java.util.Collections;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
/**
* Exclude individuals based on the namespaces of their types.
*/
public class ExcludeBasedOnTypeNamespace implements SearchIndexExcluder {
final List<String> namespaces;
private final List<String> namespaces = new ArrayList<>();
Pattern nsRegexPattern;
public ExcludeBasedOnTypeNamespace(String ... namespaces) {
super();
this.namespaces = Collections.unmodifiableList(Arrays.asList( namespaces ));
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#excludes")
public void addExcludedNamespace(String uri) {
namespaces.add(uri);
}
@Validation
public void compileRegexPattern() {
String nsOrPattern = "";
for( int i=0; i<namespaces.length; i++){
String ns = namespaces[i];
for( int i=0; i<namespaces.size(); i++){
String ns = namespaces.get(i);
nsOrPattern = nsOrPattern + (i!=0?"|":"") + Pattern.quote(ns) + "[^/#]*$";
}
this.nsRegexPattern = Pattern.compile(nsOrPattern);
@ -53,4 +58,8 @@ public class ExcludeBasedOnTypeNamespace implements SearchIndexExcluder {
}
@Override
public String toString() {
return "ExcludeBasedOnTypeNamespace [namespaces=" + namespaces + "]";
}
}

View file

@ -54,4 +54,9 @@ public class ExcludeNonFlagVitro implements SearchIndexExcluder {
return DONT_EXCLUDE;
}
@Override
public String toString() {
return "ExcludeNonFlagVitro []";
}
}

View file

@ -350,5 +350,5 @@ public class IndividualToSearchDocument {
}
}
protected static final String DONT_EXCLUDE =null;
public static final String DONT_EXCLUDE =null;
}

View file

@ -6,9 +6,14 @@ import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_UNSTEMMED;
import java.util.Arrays;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
public class NameBoost implements DocumentModifier {
@ -18,15 +23,24 @@ public class NameBoost implements DocumentModifier {
* please consider if you need to change this list
* of name fields to boost.
*/
String[] fieldsToBoost = {NAME_RAW,NAME_LOWERCASE,NAME_UNSTEMMED,NAME_STEMMED};
private String[] fieldsToBoost = {NAME_RAW,NAME_LOWERCASE,NAME_UNSTEMMED,NAME_STEMMED};
private Float boost;
final float boost;
public NameBoost(float boost){
@Property(uri="http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost")
public void setBoost(float boost) {
this.boost = boost;
}
@Validation
public void validate() {
if (boost == null) {
throw new IllegalStateException(
"Configuration did not include a boost value.");
}
}
@Override
public void modifyDocument(Individual individual, SearchInputDocument doc) {
@ -43,4 +57,10 @@ public class NameBoost implements DocumentModifier {
// do nothing.
}
@Override
public String toString() {
return "NameBoost[fieldsToBoost=" + Arrays.toString(fieldsToBoost)
+ ", boost=" + boost + "]";
}
}

View file

@ -10,24 +10,26 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
/**
* Adds all labels to name fields, not just the one returned by Individual.getName().
*/
public class NameFields implements DocumentModifier {
RDFServiceFactory rsf;
public class NameFields implements DocumentModifier, ContextModelsUser {
private RDFService rdfService;
public static final VitroSearchTermNames term = new VitroSearchTermNames();
public static final Log log = LogFactory.getLog(NameFields.class.getName());
public NameFields( RDFServiceFactory rsf){
this.rsf = rsf;
@Override
public void setContextModels(ContextModelAccess models) {
this.rdfService = models.getRDFService();
}
@Override
@ -43,7 +45,6 @@ public class NameFields implements DocumentModifier {
"<http://www.w3.org/2000/01/rdf-schema#label> ?label }";
try {
RDFService rdfService = rsf.getRDFService();
BufferedReader stream =
new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV)));

View file

@ -1,12 +1,16 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary.EXCLUDE_CLASS;
import static edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary.SEARCH_INDEX_URI;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
@ -23,25 +27,35 @@ import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
/**
* This excludes based on types defined as EXCLUDE_CLASS in the
* configuration RDF model.
*/
public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements ModelChangedListener{
public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements ModelChangedListener, ContextModelsUser {
static final Log log = LogFactory.getLog(SyncingExcludeBasedOnType.class);
private static final String queryForProhibitedClasses =
"SELECT ?prohibited WHERE{" +
"?searchConfig <" + DisplayVocabulary.EXCLUDE_CLASS + "> ?prohibited . " +
"?searchConfig <" + EXCLUDE_CLASS + "> ?prohibited . " +
"}";
String searchIndexURI = DisplayVocabulary.SEARCH_INDEX_URI;
private ContextModelAccess models;
public SyncingExcludeBasedOnType( Model model){
this.setExcludedTypes( buildProhibitedClassesList(searchIndexURI, model) );
log.info("types excluded from search: " + typeURIs);
@Override
public void setContextModels(ContextModelAccess models) {
this.models = models;
}
@Validation
public void buildClassList( ){
OntModel model = models.getOntModel(ModelNames.DISPLAY);
this.setExcludedTypes( buildProhibitedClassesList(SEARCH_INDEX_URI, model) );
log.debug(this);
}
private List<String> buildProhibitedClassesList( String URI, Model model){
@ -81,10 +95,10 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod
public void addedStatement(Statement s) {
try{
if( isExcludeClassPredicate( s ) && isAboutSearchIndex(s)){
if( s.getObject() != null && s.getObject().canAs(Resource.class)){
String classURI = ((Resource)s.getObject().as(Resource.class)).getURI();
if( s.getObject() != null && s.getObject().isURIResource()){
String classURI = s.getObject().asResource().getURI();
this.addTypeToExclude(classURI);
log.debug("prohibited classes: " + this.typeURIs);
log.debug("prohibited classes: " + this);
}
}
}catch(Exception ex){
@ -97,10 +111,10 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod
public void removedStatement(Statement s) {
try{
if( isExcludeClassPredicate( s ) && isAboutSearchIndex(s)){
if( s.getObject() != null && s.getObject().canAs(Resource.class)){
String classURI = ((Resource)s.getObject().as(Resource.class)).getURI();
if( s.getObject() != null && s.getObject().isURIResource()){
String classURI = s.getObject().asResource().getURI();
this.removeTypeToExclude(classURI);
log.debug("prohibited classes: " + this.typeURIs);
log.debug("prohibited classes: " + this);
}
}
}catch(Exception ex){
@ -111,13 +125,13 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod
private boolean isExcludeClassPredicate(Statement s){
return s != null
&& s.getPredicate() != null
&& DisplayVocabulary.EXCLUDE_CLASS.getURI().equals( s.getPredicate().getURI());
&& EXCLUDE_CLASS.getURI().equals( s.getPredicate().getURI());
}
private boolean isAboutSearchIndex(Statement s){
if( s.getSubject() != null ){
String subURI = ((Resource) s.getSubject()).getURI() ;
return this.searchIndexURI.equals(subURI);
String subURI = s.getSubject().getURI() ;
return SEARCH_INDEX_URI.equals(subURI);
}else{
return false;
}
@ -153,8 +167,7 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod
public void addedStatements(Model model) {
if( model != null){
addedStatements(model.listStatements(
model.createResource(searchIndexURI),
DisplayVocabulary.EXCLUDE_CLASS,
model.createResource(SEARCH_INDEX_URI), EXCLUDE_CLASS,
(RDFNode) null));
}
}
@ -194,8 +207,7 @@ public class SyncingExcludeBasedOnType extends ExcludeBasedOnType implements Mod
public void removedStatements(Model model) {
if( model != null){
removedStatements(model.listStatements(
model.createResource(searchIndexURI),
DisplayVocabulary.EXCLUDE_CLASS,
model.createResource(SEARCH_INDEX_URI), EXCLUDE_CLASS,
(RDFNode) null));
}
}

View file

@ -15,12 +15,13 @@ import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.RDFNode;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
public class ThumbnailImageURL implements DocumentModifier {
public class ThumbnailImageURL implements DocumentModifier, ContextModelsUser {
private static final String PREFIX = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
@ -36,12 +37,12 @@ public class ThumbnailImageURL implements DocumentModifier {
+ " ?uri <http://vitro.mannlib.cornell.edu/ns/vitro/public#mainImage> ?a . "
+ " ?a <http://vitro.mannlib.cornell.edu/ns/vitro/public#downloadLocation> ?downloadLocation . } ";
private RDFServiceFactory rsf;
private RDFService rdf;
private Log log = LogFactory.getLog(ThumbnailImageURL.class);
public ThumbnailImageURL( RDFServiceFactory rsf ){
this.rsf = rsf;
@Override
public void setContextModels(ContextModelAccess models) {
this.rdf = models.getRDFService();
}
@Override
@ -71,7 +72,6 @@ public class ThumbnailImageURL implements DocumentModifier {
String uri = "<" + individual.getURI() + "> ";
String query = QUERY_TEMPLATE.replaceAll("\\?uri", uri);
RDFService rdf = rsf.getRDFService();
try{
ResultSet results = RDFServiceUtils.sparqlSelectQuery(query, rdf);
while(results.hasNext()){

View file

@ -6,17 +6,18 @@ import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import javax.servlet.ServletContext;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.vocabulary.OWL;
import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
@ -25,25 +26,18 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.search.SearchIndexer;
import edu.cornell.mannlib.vitro.webapp.search.beans.StatementToURIsToUpdate;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.DocumentModifier;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnNamespace;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnType;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeBasedOnTypeNamespace;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ExcludeNonFlagVitro;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.IndividualToSearchDocument;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.NameBoost;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.NameFields;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SearchIndexExcluder;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.SyncingExcludeBasedOnType;
import edu.cornell.mannlib.vitro.webapp.search.documentBuilding.ThumbnailImageURL;
import edu.cornell.mannlib.vitro.webapp.search.indexing.AdditionalUriFinders;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.search.indexing.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoaderException;
import edu.cornell.mannlib.vitro.webapp.utils.developer.Key;
import edu.cornell.mannlib.vitro.webapp.utils.developer.listeners.DeveloperDisabledModelChangeListener;
@ -51,63 +45,26 @@ import edu.cornell.mannlib.vitro.webapp.utils.developer.listeners.DeveloperDisab
* TODO
*/
public class SearchIndexerSetup implements ServletContextListener {
private static final Log log = LogFactory.getLog(SearchIndexerSetup.class);
public static final String PROHIBITED_FROM_SEARCH = "edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch";
/**
* Exclude from the search index Individuals with types from these
* namespaces
*/
private static final String[] TYPE_NS_EXCLUDES = { VitroVocabulary.PUBLIC
// if you do OWL.NS here you will exclude all of owl:Thing.
};
/**
* Exclude from the search index individuals who's URIs start with these
* namespaces.
*/
private static final String[] INDIVIDUAL_NS_EXCLUDES = {
VitroVocabulary.vitroURI, VitroVocabulary.VITRO_PUBLIC,
VitroVocabulary.PSEUDO_BNODE_NS, OWL.NS };
/** Individuals of these types will be excluded from the search index */
private static final String[] OWL_TYPES_EXCLUDES = {
OWL.ObjectProperty.getURI(), OWL.DatatypeProperty.getURI(),
OWL.AnnotationProperty.getURI() };
private ServletContext ctx;
private OntModel displayModel;
private ConfigurationBeanLoader beanLoader;
@Override
public void contextInitialized(ServletContextEvent sce) {
this.ctx = sce.getServletContext();
this.displayModel = ModelAccess.on(ctx).getOntModel(DISPLAY);
this.beanLoader = new ConfigurationBeanLoader(displayModel, ctx);
ServletContext context = sce.getServletContext();
StartupStatus ss = StartupStatus.getBean(context);
SearchEngine searchEngine = ApplicationUtils.instance().getSearchEngine();
try {
/* set up the individual to search doc translation */
OntModel jenaOntModel = ModelAccess.on(context).getOntModel();
OntModel displayModel = ModelAccess.on(context).getOntModel(DISPLAY);
/*
* try to get context attribute DocumentModifiers and use that as
* the start of the list of DocumentModifier objects. This allows
* other ContextListeners to add to the basic set of
* DocumentModifiers.
*/
@SuppressWarnings("unchecked")
List<DocumentModifier> modifiersFromContext = (List<DocumentModifier>) context
.getAttribute("DocumentModifiers");
/*
* try to get context attribute SearchIndexExcludes and use that as
* the start of the list of exclude objects. This allows other
* ContextListeners to add to the basic set of SearchIndexExcludes .
*/
@SuppressWarnings("unchecked")
List<SearchIndexExcluder> searchIndexExcludesFromContext = (List<SearchIndexExcluder>) context
.getAttribute("SearchIndexExcludes");
IndividualToSearchDocument indToSearchDoc = setupTranslation(
jenaOntModel, displayModel,
RDFServiceUtils.getRDFServiceFactory(context),
modifiersFromContext, searchIndexExcludesFromContext);
IndividualToSearchDocument indToSearchDoc = setupTranslation();
/* setup search indexer */
SearchIndexer searchIndexer = new SearchIndexer(searchEngine, indToSearchDoc);
@ -156,42 +113,17 @@ public class SearchIndexerSetup implements ServletContextListener {
}
public static IndividualToSearchDocument setupTranslation(
OntModel jenaOntModel, Model displayModel,
RDFServiceFactory rdfServiceFactory,
List<DocumentModifier> modifiersFromContext,
List<SearchIndexExcluder> searchIndexExcludesFromContext) {
private IndividualToSearchDocument setupTranslation() {
try {
Set<SearchIndexExcluder> excluders = beanLoader.loadAll(SearchIndexExcluder.class);
log.debug("Excludes: (" + excluders.size() + ") " + excluders);
/*
* try to get context attribute DocumentModifiers and use that as the
* start of the list of DocumentModifier objects. This allows other
* ContextListeners to add to the basic set of DocumentModifiers.
*/
List<DocumentModifier> modifiers = new ArrayList<DocumentModifier>();
if (modifiersFromContext != null) {
modifiers.addAll(modifiersFromContext);
}
Set<DocumentModifier> modifiers = beanLoader.loadAll(DocumentModifier.class);
log.debug("Modifiers: (" + modifiers.size() + ") " + modifiers);
modifiers.add(new NameFields(rdfServiceFactory));
modifiers.add(new NameBoost(1.2f));
modifiers.add(new ThumbnailImageURL(rdfServiceFactory));
/*
* try to get context attribute SearchIndexExcludes and use that as the
* start of the list of exclude objects. This allows other
* ContextListeners to add to the basic set of SearchIndexExcludes .
*/
List<SearchIndexExcluder> excludes = new ArrayList<SearchIndexExcluder>();
if (searchIndexExcludesFromContext != null) {
excludes.addAll(searchIndexExcludesFromContext);
}
excludes.add(new ExcludeBasedOnNamespace(INDIVIDUAL_NS_EXCLUDES));
excludes.add(new ExcludeBasedOnTypeNamespace(TYPE_NS_EXCLUDES));
excludes.add(new ExcludeBasedOnType(OWL_TYPES_EXCLUDES));
excludes.add(new ExcludeNonFlagVitro());
excludes.add(new SyncingExcludeBasedOnType(displayModel));
return new IndividualToSearchDocument(excludes, modifiers);
return new IndividualToSearchDocument(new ArrayList<>(excluders), new ArrayList<>(modifiers));
} catch (ConfigurationBeanLoaderException e) {
throw new RuntimeException("Failed to configure the SearchIndexer", e);
}
}
}

View file

@ -2,6 +2,7 @@
package edu.cornell.mannlib.vitro.webapp.search.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
import static org.junit.Assert.assertEquals;
@ -9,6 +10,7 @@ import org.junit.Before;
import org.junit.Test;
import stubs.edu.cornell.mannlib.vitro.webapp.beans.IndividualStub;
import stubs.edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccessStub;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
@ -16,8 +18,6 @@ import com.hp.hpl.jena.rdf.model.Statement;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceFactorySingle;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
@ -38,27 +38,29 @@ public class NameFieldsTest {
doc = new BaseSearchInputDocument();
RDFServiceModel rdfService = new RDFServiceModel(baseModel);
RDFServiceFactory rdfServiceFactory = new RDFServiceFactorySingle(
rdfService);
nameFields = new NameFields(rdfServiceFactory);
ContextModelAccessStub models = new ContextModelAccessStub();
models.setRDFService(CONTENT, rdfService);
nameFields = new NameFields();
nameFields.setContextModels(models);
}
@Test
public void nullIndividual() throws SkipIndividualException {
public void nullIndividual() {
SearchInputDocument expected = new BaseSearchInputDocument(doc);
assertResultingSearchDocument(null, expected);
}
@Test
public void nullUri() throws SkipIndividualException {
public void nullUri() {
SearchInputDocument expected = new BaseSearchInputDocument(doc);
assertResultingSearchDocument(new IndividualStub(null), expected);
}
@Test
public void foundNoLabels() throws SkipIndividualException {
public void foundNoLabels() {
SearchInputDocument expected = new BaseSearchInputDocument(doc);
expected.addField(NAME_RAW, "");
@ -67,7 +69,7 @@ public class NameFieldsTest {
}
@Test
public void foundOneLabel() throws SkipIndividualException {
public void foundOneLabel() {
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1"));
SearchInputDocument expected = new BaseSearchInputDocument(doc);
@ -78,7 +80,7 @@ public class NameFieldsTest {
}
@Test
public void foundTwoLabels() throws SkipIndividualException {
public void foundTwoLabels() {
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1"));
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label2"));
@ -100,7 +102,7 @@ public class NameFieldsTest {
}
private void assertResultingSearchDocument(Individual ind,
SearchInputDocument expected) throws SkipIndividualException {
SearchInputDocument expected) {
nameFields.modifyDocument(ind, doc);
assertEquals(expected, doc);
}

View file

@ -4,6 +4,8 @@
*/
package edu.cornell.mannlib.vitro.webapp.search.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT;
import java.io.InputStream;
import org.apache.log4j.Level;
@ -11,6 +13,7 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import stubs.edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccessStub;
import stubs.edu.cornell.mannlib.vitro.webapp.modules.ApplicationStub;
import stubs.edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineStub;
import stubs.javax.servlet.ServletContextStub;
@ -25,13 +28,11 @@ import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceFactorySingle;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
public class ThumbnailImageURLTest extends AbstractTestClass{
RDFServiceFactory testRDF;
ContextModelAccessStub contextModels;
String personsURI = "http://vivo.cornell.edu/individual/individual8803";
/**
@ -45,7 +46,8 @@ public class ThumbnailImageURLTest extends AbstractTestClass{
Model model = ModelFactory.createDefaultModel();
InputStream in = ThumbnailImageURLTest.class.getResourceAsStream("testPerson.n3");
model.read(in,"","N3");
testRDF = new RDFServiceFactorySingle( new RDFServiceModel( model ) );
contextModels = new ContextModelAccessStub();
contextModels.setRDFService(CONTENT, new RDFServiceModel( model ));
}
/**
@ -55,7 +57,8 @@ public class ThumbnailImageURLTest extends AbstractTestClass{
@Test
public void testThumbnailFieldCreatedInSearchDoc() {
SearchInputDocument doc = ApplicationUtils.instance().getSearchEngine().createInputDocument();
ThumbnailImageURL testMe = new ThumbnailImageURL( testRDF );
ThumbnailImageURL testMe = new ThumbnailImageURL();
testMe.setContextModels(contextModels);
Individual ind = new IndividualImpl();
ind.setURI(personsURI);

View file

@ -3,7 +3,9 @@
package stubs.edu.cornell.mannlib.vitro.webapp.modelaccess;
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.ReasoningOption.ASSERTIONS_AND_INFERENCES;
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.Map;
@ -30,6 +32,7 @@ public class ContextModelAccessStub implements ContextModelAccess {
// ----------------------------------------------------------------------
private final Map<ReasoningOption, WebappDaoFactory> wadfMap = new HashMap<>();
private final Map<WhichService, RDFService> rdfServiceMap = new EnumMap<>(WhichService.class);
public void setWebappDaoFactory(WebappDaoFactory wadf) {
setWebappDaoFactory(wadf, ASSERTIONS_AND_INFERENCES);
@ -40,6 +43,10 @@ public class ContextModelAccessStub implements ContextModelAccess {
wadfMap.put(option, wadf);
}
public void setRDFService(WhichService which, RDFService rdfService) {
rdfServiceMap.put(which, rdfService);
}
// ----------------------------------------------------------------------
// Stub methods
// ----------------------------------------------------------------------
@ -49,22 +56,20 @@ public class ContextModelAccessStub implements ContextModelAccess {
return wadfMap.get(ASSERTIONS_AND_INFERENCES);
}
// ----------------------------------------------------------------------
// Un-implemented methods
// ----------------------------------------------------------------------
@Override
public RDFService getRDFService() {
throw new RuntimeException(
"ContextModelAccessStub.getRDFService() not implemented.");
return getRDFService(CONTENT);
}
@Override
public RDFService getRDFService(WhichService which) {
throw new RuntimeException(
"ContextModelAccessStub.getRDFService() not implemented.");
return rdfServiceMap.get(which);
}
// ----------------------------------------------------------------------
// Un-implemented methods
// ----------------------------------------------------------------------
@Override
public Dataset getDataset() {
throw new RuntimeException(