From 823848123fe29056ab821bd02d4ccf02b64a067a Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Tue, 22 Apr 2014 10:56:00 -0400 Subject: [PATCH] VIVO-742 Create SearchEngineSetup and SearchIndexerSetup --- .../webapp/application/ApplicationImpl.java | 41 ++++ .../searchengine/SearchEngineSetup.java | 32 +++ .../searchengine/SearchEngineWrapper.java | 178 ++++++++++++++++ .../searchindex/SearchIndexerSetup.java | 193 ++++++++++++++++++ .../startup/ComponentStartupStatusImpl.java | 53 +++++ .../WEB-INF/resources/startup_listeners.txt | 5 +- 6 files changed, 500 insertions(+), 2 deletions(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/application/ApplicationImpl.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineSetup.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineWrapper.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/startup/ComponentStartupStatusImpl.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/application/ApplicationImpl.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/application/ApplicationImpl.java new file mode 100644 index 000000000..0d33ad0e9 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/application/ApplicationImpl.java @@ -0,0 +1,41 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.application; + +import javax.servlet.ServletContext; + +import edu.cornell.mannlib.vitro.webapp.modules.Application; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; +import edu.cornell.mannlib.vitro.webapp.searchengine.SearchEngineWrapper; +import edu.cornell.mannlib.vitro.webapp.searchengine.solr.SolrSearchEngine; + +/** + * The basic implementation of the Application interface. + */ +public class ApplicationImpl implements Application { + private final ServletContext ctx; + private SearchEngine searchEngine; + + public ApplicationImpl(ServletContext ctx) { + this.ctx = ctx; + setSearchEngine(new SolrSearchEngine()); + } + + @Override + public ServletContext getServletContext() { + return ctx; + } + + @Override + public SearchEngine getSearchEngine() { + return searchEngine; + } + + public void setSearchEngine(SearchEngine searchEngine) { + if (searchEngine instanceof SearchEngineWrapper) { + this.searchEngine = searchEngine; + } else { + this.searchEngine = new SearchEngineWrapper(searchEngine); + } + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineSetup.java new file mode 100644 index 000000000..0a4748d90 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineSetup.java @@ -0,0 +1,32 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchengine; + +import javax.servlet.ServletContextEvent; +import javax.servlet.ServletContextListener; + +import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils; +import edu.cornell.mannlib.vitro.webapp.modules.Application; +import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus; +import edu.cornell.mannlib.vitro.webapp.startup.ComponentStartupStatusImpl; +import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus; + +/** + * Whatever search engine we have, start it up and shut it down. + */ +public class SearchEngineSetup implements ServletContextListener { + @Override + public void contextInitialized(ServletContextEvent sce) { + Application application = ApplicationUtils.instance(); + StartupStatus ss = StartupStatus.getBean(sce.getServletContext()); + ComponentStartupStatus css = new ComponentStartupStatusImpl(this, ss); + application.getSearchEngine().startup(application, css); + } + + @Override + public void contextDestroyed(ServletContextEvent sce) { + Application application = ApplicationUtils.instance(); + application.getSearchEngine().shutdown(application); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineWrapper.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineWrapper.java new file mode 100644 index 000000000..6cbd9b52b --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/SearchEngineWrapper.java @@ -0,0 +1,178 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchengine; + +import static edu.cornell.mannlib.vitro.webapp.modules.Application.Component.LifecycleState.ACTIVE; +import static edu.cornell.mannlib.vitro.webapp.modules.Application.Component.LifecycleState.NEW; +import static edu.cornell.mannlib.vitro.webapp.modules.Application.Component.LifecycleState.STOPPED; + +import java.util.Collection; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import edu.cornell.mannlib.vitro.webapp.modules.Application; +import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse; + +/** + * TODO + */ +public class SearchEngineWrapper implements SearchEngine { + private static final Log log = LogFactory.getLog(SearchEngineWrapper.class); + + private final SearchEngine innerEngine; + + private volatile LifecycleState lifecycleState = NEW; + + public SearchEngineWrapper(SearchEngine innerEngine) { + if (innerEngine == null) { + throw new NullPointerException("innerEngine may not be null."); + } + this.innerEngine = innerEngine; + } + + /** + * Complain unless ACTIVE. + */ + private void confirmActive() { + if (lifecycleState == NEW) { + throw new IllegalStateException( + "Search engine has not been started."); + } else if (lifecycleState == STOPPED) { + throw new IllegalStateException("Search engine has stopped."); + } + } + + // ---------------------------------------------------------------------- + // Overridden methods. + // ---------------------------------------------------------------------- + + /** + * If NEW, do startup. If STOPPED, throw an exception. If ACTIVE, just + * complain. + */ + @Override + public void startup(Application application, ComponentStartupStatus css) { + if (application == null) { + throw new NullPointerException("application may not be null."); + } + switch (lifecycleState) { + case NEW: + innerEngine.startup(application, css); + lifecycleState = ACTIVE; + break; + case STOPPED: + throw new IllegalStateException( + "startup called when already STOPPED"); + default: // ACTIVE: + try { + throw new IllegalStateException(); + } catch (Exception e) { + log.warn("startup called when already ACTIVE", e); + } + break; + } + } + + /** + * If ACTIVE, do shutdown. Otherwise, complain and do nothing. + */ + @Override + public void shutdown(Application application) { + if (application == null) { + throw new NullPointerException("application may not be null."); + } + switch (lifecycleState) { + case ACTIVE: + innerEngine.shutdown(application); + lifecycleState = STOPPED; + break; + default: // NEW, STOPPED: + try { + throw new IllegalStateException(); + } catch (Exception e) { + log.warn("startup called when state was " + lifecycleState, e); + } + break; + } + } + + @Override + public void ping() throws SearchEngineException { + confirmActive(); + innerEngine.ping(); + } + + @Override + public SearchInputDocument createInputDocument() { + confirmActive(); + return innerEngine.createInputDocument(); + } + + @Override + public void add(SearchInputDocument... docs) throws SearchEngineException { + confirmActive(); + innerEngine.add(docs); + } + + @Override + public void add(Collection docs) + throws SearchEngineException { + confirmActive(); + innerEngine.add(docs); + } + + @Override + public void commit() throws SearchEngineException { + confirmActive(); + innerEngine.commit(); + } + + @Override + public void commit(boolean wait) throws SearchEngineException { + confirmActive(); + innerEngine.commit(wait); + } + + @Override + public void deleteById(String... ids) throws SearchEngineException { + confirmActive(); + innerEngine.deleteById(ids); + } + + @Override + public void deleteById(Collection ids) throws SearchEngineException { + confirmActive(); + innerEngine.deleteById(ids); + } + + @Override + public void deleteByQuery(String query) throws SearchEngineException { + confirmActive(); + innerEngine.deleteByQuery(query); + } + + @Override + public SearchQuery createQuery() { + confirmActive(); + return innerEngine.createQuery(); + } + + @Override + public SearchQuery createQuery(String queryText) { + confirmActive(); + return innerEngine.createQuery(queryText); + } + + @Override + public SearchResponse query(SearchQuery query) throws SearchEngineException { + confirmActive(); + return innerEngine.query(query); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java new file mode 100644 index 000000000..a5fff0582 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java @@ -0,0 +1,193 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex; + +import java.util.ArrayList; +import java.util.List; + +import javax.servlet.ServletContext; +import javax.servlet.ServletContextEvent; +import javax.servlet.ServletContextListener; + +import org.apache.solr.client.solrj.impl.HttpSolrServer; + +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.vocabulary.OWL; + +import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils; +import edu.cornell.mannlib.vitro.webapp.dao.ModelAccess; +import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; +import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; +import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering; +import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; +import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; +import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; +import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; +import edu.cornell.mannlib.vitro.webapp.search.beans.StatementToURIsToUpdate; +import edu.cornell.mannlib.vitro.webapp.search.indexing.AdditionalUriFinders; +import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; +import edu.cornell.mannlib.vitro.webapp.search.indexing.SearchReindexingListener; +import edu.cornell.mannlib.vitro.webapp.search.solr.SolrIndexer; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.DocumentModifier; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnNamespace; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnType; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnTypeNamespace; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeNonFlagVitro; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.IndividualToSolrDocument; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.NameBoost; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.NameFields; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SearchIndexExcluder; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SyncingExcludeBasedOnType; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL; +import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus; + +/** + * TODO + */ +public class SearchIndexerSetup implements ServletContextListener { + public static final String PROHIBITED_FROM_SEARCH = "edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch"; + + /** + * Exclude from the search index Individuals with types from these + * namespaces + */ + private static final String[] TYPE_NS_EXCLUDES = { VitroVocabulary.PUBLIC + // if you do OWL.NS here you will exclude all of owl:Thing. + }; + + /** + * Exclude from the search index individuals who's URIs start with these + * namespaces. + */ + private static final String[] INDIVIDUAL_NS_EXCLUDES = { + VitroVocabulary.vitroURI, VitroVocabulary.VITRO_PUBLIC, + VitroVocabulary.PSEUDO_BNODE_NS, OWL.NS }; + + /** Individuals of these types will be excluded from the search index */ + private static final String[] OWL_TYPES_EXCLUDES = { + OWL.ObjectProperty.getURI(), OWL.DatatypeProperty.getURI(), + OWL.AnnotationProperty.getURI() }; + + @Override + public void contextInitialized(ServletContextEvent sce) { + ServletContext context = sce.getServletContext(); + StartupStatus ss = StartupStatus.getBean(context); + HttpSolrServer server = (HttpSolrServer) ApplicationUtils.instance().getSearchEngine(); + + try { + /* set up the individual to solr doc translation */ + OntModel jenaOntModel = ModelAccess.on(context).getJenaOntModel(); + OntModel displayModel = ModelAccess.on(context).getDisplayModel(); + + /* + * try to get context attribute DocumentModifiers and use that as + * the start of the list of DocumentModifier objects. This allows + * other ContextListeners to add to the basic set of + * DocumentModifiers. + */ + @SuppressWarnings("unchecked") + List modifiersFromContext = (List) context + .getAttribute("DocumentModifiers"); + + /* + * try to get context attribute SearchIndexExcludes and use that as + * the start of the list of exclude objects. This allows other + * ContextListeners to add to the basic set of SearchIndexExcludes . + */ + @SuppressWarnings("unchecked") + List searchIndexExcludesFromContext = (List) context + .getAttribute("SearchIndexExcludes"); + + IndividualToSolrDocument indToSolrDoc = setupTransltion( + jenaOntModel, displayModel, + RDFServiceUtils.getRDFServiceFactory(context), + modifiersFromContext, searchIndexExcludesFromContext); + + /* setup solr indexer */ + SolrIndexer solrIndexer = new SolrIndexer(server, indToSolrDoc); + + // This is where the builder gets the list of places to try to + // get objects to index. It is filtered so that non-public text + // does not get into the search index. + WebappDaoFactory wadf = ModelAccess.on(context) + .getWebappDaoFactory(); + VitroFilters vf = VitroFilterUtils.getPublicFilter(context); + wadf = new WebappDaoFactoryFiltering(wadf, vf); + + // make objects that will find additional URIs for context nodes etc + RDFService rdfService = RDFServiceUtils.getRDFServiceFactory( + context).getRDFService(); + List uriFinders = AdditionalUriFinders + .getList(rdfService, wadf.getIndividualDao()); + + // Make the IndexBuilder + IndexBuilder builder = new IndexBuilder(solrIndexer, wadf, + uriFinders); + // Save it to the servlet context so we can access it later in the + // webapp. + context.setAttribute(IndexBuilder.class.getName(), builder); + + // set up listeners so search index builder is notified of changes + // to model + ServletContext ctx = sce.getServletContext(); + SearchReindexingListener srl = new SearchReindexingListener(builder); + ModelContext.registerListenerForChanges(ctx, srl); + + ss.info(this, "Setup of Solr index completed."); + } catch (Throwable e) { + ss.fatal(this, "could not setup local solr server", e); + } + + } + + @Override + public void contextDestroyed(ServletContextEvent sce) { + IndexBuilder builder = (IndexBuilder) sce.getServletContext() + .getAttribute(IndexBuilder.class.getName()); + if (builder != null) + builder.stopIndexingThread(); + + } + + public static IndividualToSolrDocument setupTransltion( + OntModel jenaOntModel, Model displayModel, + RDFServiceFactory rdfServiceFactory, + List modifiersFromContext, + List searchIndexExcludesFromContext) { + + /* + * try to get context attribute DocumentModifiers and use that as the + * start of the list of DocumentModifier objects. This allows other + * ContextListeners to add to the basic set of DocumentModifiers. + */ + List modifiers = new ArrayList(); + if (modifiersFromContext != null) { + modifiers.addAll(modifiersFromContext); + } + + modifiers.add(new NameFields(rdfServiceFactory)); + modifiers.add(new NameBoost(1.2f)); + modifiers.add(new ThumbnailImageURL(rdfServiceFactory)); + + /* + * try to get context attribute SearchIndexExcludes and use that as the + * start of the list of exclude objects. This allows other + * ContextListeners to add to the basic set of SearchIndexExcludes . + */ + List excludes = new ArrayList(); + if (searchIndexExcludesFromContext != null) { + excludes.addAll(searchIndexExcludesFromContext); + } + + excludes.add(new ExcludeBasedOnNamespace(INDIVIDUAL_NS_EXCLUDES)); + excludes.add(new ExcludeBasedOnTypeNamespace(TYPE_NS_EXCLUDES)); + excludes.add(new ExcludeBasedOnType(OWL_TYPES_EXCLUDES)); + excludes.add(new ExcludeNonFlagVitro()); + excludes.add(new SyncingExcludeBasedOnType(displayModel)); + + return new IndividualToSolrDocument(excludes, modifiers); + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/startup/ComponentStartupStatusImpl.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/startup/ComponentStartupStatusImpl.java new file mode 100644 index 000000000..65ad2e206 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/startup/ComponentStartupStatusImpl.java @@ -0,0 +1,53 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.startup; + +import javax.servlet.ServletContextListener; + +import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus; + +/** + * A temporary wrapper around the StartupStatus, with the ServletContextListener + * built in. + */ +public class ComponentStartupStatusImpl implements ComponentStartupStatus { + private final ServletContextListener listener; + private final StartupStatus ss; + + public ComponentStartupStatusImpl(ServletContextListener listener, + StartupStatus ss) { + this.listener = listener; + this.ss = ss; + } + + @Override + public void info(String message) { + ss.info(listener, message); + } + + @Override + public void info(String message, Throwable cause) { + ss.info(listener, message, cause); + } + + @Override + public void warning(String message) { + ss.warning(listener, message); + } + + @Override + public void warning(String message, Throwable cause) { + ss.warning(listener, message, cause); + } + + @Override + public void fatal(String message) { + ss.fatal(listener, message); + } + + @Override + public void fatal(String message, Throwable cause) { + ss.fatal(listener, message, cause); + } + +} diff --git a/webapp/web/WEB-INF/resources/startup_listeners.txt b/webapp/web/WEB-INF/resources/startup_listeners.txt index 1744c622f..8899f7187 100644 --- a/webapp/web/WEB-INF/resources/startup_listeners.txt +++ b/webapp/web/WEB-INF/resources/startup_listeners.txt @@ -62,9 +62,10 @@ edu.ucsf.vitro.opensocial.OpenSocialSmokeTests # For multiple language support edu.cornell.mannlib.vitro.webapp.i18n.selection.LocaleSelectionSetup -# The Solr index uses a "public" permission, so the PropertyRestrictionPolicyHelper +# The search indexer uses a "public" permission, so the PropertyRestrictionPolicyHelper # and the PermissionRegistry must already be set up. -edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup +edu.cornell.mannlib.vitro.webapp.searchengine.SearchEngineSetup +edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerSetup edu.cornell.mannlib.vitro.webapp.controller.freemarker.FreemarkerSetup edu.cornell.mannlib.vitro.webapp.freemarker.config.FreemarkerConfiguration$Setup