merging 5348 from nihvivo-rel-1.1-maint to trunk

This commit is contained in:
bdc34 2010-10-06 19:18:10 +00:00
parent bc2b73885d
commit bcba2f22e7

View file

@ -1,7 +1,7 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */ /* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.lucene; package edu.cornell.mannlib.vitro.webapp.search.lucene;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
@ -37,33 +37,33 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.Searcher; import edu.cornell.mannlib.vitro.webapp.search.beans.Searcher;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.web.DisplayVocabulary; import edu.cornell.mannlib.vitro.webapp.web.DisplayVocabulary;
/** /**
* Setup objects for lucene searching and indexing. * Setup objects for lucene searching and indexing.
* *
* The indexing and search objects, IndexBuilder and Searcher are found by the * The indexing and search objects, IndexBuilder and Searcher are found by the
* controllers IndexController and SearchController through the servletContext. * controllers IndexController and SearchController through the servletContext.
* This object will have the method contextInitialized() called when the tomcat * This object will have the method contextInitialized() called when the tomcat
* server starts this webapp. * server starts this webapp.
* *
* The contextInitialized() will try to find the lucene index directory, * The contextInitialized() will try to find the lucene index directory,
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
* also get a list of Obj2Doc objects so it can translate object to lucene docs. * also get a list of Obj2Doc objects so it can translate object to lucene docs.
* *
* To execute this at context creation put this in web.xml: * To execute this at context creation put this in web.xml:
<listener> <listener>
<listener-class> <listener-class>
edu.cornell.mannlib.vitro.search.setup.LuceneSetup edu.cornell.mannlib.vitro.search.setup.LuceneSetup
</listener-class> </listener-class>
</listener> </listener>
* @author bdc34 * @author bdc34
* *
*/ */
public class LuceneSetup implements javax.servlet.ServletContextListener { public class LuceneSetup implements javax.servlet.ServletContextListener {
private static String indexDir = null; private static String indexDir = null;
private static final Log log = LogFactory.getLog(LuceneSetup.class.getName()); private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
/** /**
* Gets run to set up DataSource when the webapp servlet context gets * Gets run to set up DataSource when the webapp servlet context gets
* created. * created.
@ -78,33 +78,29 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
setBoolMax(); setBoolMax();
// these should really be set as annotation properties. // these should really be set as annotation properties.
HashSet<String> dataPropertyBlacklist = new HashSet<String>(); HashSet<String> dataPropertyBlacklist = new HashSet<String>();
context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
HashSet<String> objectPropertyBlacklist = new HashSet<String>(); HashSet<String> objectPropertyBlacklist = new HashSet<String>();
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom"); objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
context.setAttribute(SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist); context.setAttribute(SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
// Here we want to put the LuceneIndex object into the application scope. // Here we want to put the LuceneIndex object into the application scope.
// This will attempt to create a new directory and empty index if there is none. // This will attempt to create a new directory and empty index if there is none.
LuceneIndexer indexer = new LuceneIndexer(indexDir, null, getAnalyzer()); LuceneIndexer indexer = new LuceneIndexer(indexDir, null, getAnalyzer());
context.setAttribute(ANALYZER, getAnalyzer()); context.setAttribute(ANALYZER, getAnalyzer());
context.setAttribute(INDEX_DIR, indexDir); context.setAttribute(INDEX_DIR, indexDir);
indexer.addObj2Doc(new Entity2LuceneDoc()); indexer.addObj2Doc(new Entity2LuceneDoc());
context.setAttribute(LuceneIndexer.class.getName(), indexer); context.setAttribute(LuceneIndexer.class.getName(), indexer);
// Here we want to put the LuceneSearcher in the application scope. //This is where to get a LucenIndex from. The indexer will
// the queries need to know the analyzer to use so that the same one can be used //need to reference this to notify it of updates to the index
// to analyze the fields in the incoming user query terms. LuceneIndexFactory lif = LuceneIndexFactory.getLuceneIndexFactoryFromContext(context);
LuceneSearcher searcher = new LuceneSearcher( indexer.setLuceneIndexFactory(lif);
new LuceneQueryFactory(getAnalyzer(), ALLTEXT), indexDir);
searcher.addObj2Doc(new Entity2LuceneDoc()); // This is where the builder gets the list of places to try to
context.setAttribute(Searcher.class.getName(), searcher); // get objects to index. It is filtered so that non-public text
indexer.addSearcher(searcher); // does not get into the search index.
// This is where the builder gets the list of places to try to
// get objects to index. It is filtered so that non-public text
// does not get into the search index.
WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory"); WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory");
VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf); VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf);
wadf = new WebappDaoFactoryFiltering(wadf, vf); wadf = new WebappDaoFactoryFiltering(wadf, vf);
@ -114,8 +110,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
IndexBuilder builder = new IndexBuilder(context, indexer, sources); IndexBuilder builder = new IndexBuilder(context, indexer, sources);
// here we add the IndexBuilder with the LuceneIndexer // here we add the IndexBuilder with the LuceneIndexer
// to the servlet context so we can access it later in the webapp. // to the servlet context so we can access it later in the webapp.
context.setAttribute(IndexBuilder.class.getName(), builder); context.setAttribute(IndexBuilder.class.getName(), builder);
// set up listeners so search index builder is notified of changes to model // set up listeners so search index builder is notified of changes to model
@ -149,8 +145,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
} catch (Throwable t) { } catch (Throwable t) {
log.error("***** Error setting up Lucene search *****", t); log.error("***** Error setting up Lucene search *****", t);
} }
} }
/** /**
* Gets run when the webApp Context gets destroyed. * Gets run when the webApp Context gets destroyed.
*/ */
@ -170,7 +166,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
public static void setBoolMax() { public static void setBoolMax() {
BooleanQuery.setMaxClauseCount(16384); BooleanQuery.setMaxClauseCount(16384);
} }
/** /**
* Gets the name of the directory to store the lucene index in. The * Gets the name of the directory to store the lucene index in. The
* {@link ConfigurationProperties} should have a property named * {@link ConfigurationProperties} should have a property named
@ -204,29 +200,29 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
} }
return dirName; return dirName;
} }
/** /**
* Gets the analyzer that will be used when building the indexing * Gets the analyzer that will be used when building the indexing
* and when analyzing the incoming search terms. * and when analyzing the incoming search terms.
* *
* @return * @return
*/ */
private Analyzer getAnalyzer() { private Analyzer getAnalyzer() {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer()); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
return analyzer; return analyzer;
} }
public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup"; public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
public static final String ANALYZER= "lucene.analyzer"; public static final String ANALYZER= "lucene.analyzer";
public static final String INDEX_DIR = "lucene.indexDir"; public static final String INDEX_DIR = "lucene.indexDir";
public static final String SEARCH_DATAPROPERTY_BLACKLIST = public static final String SEARCH_DATAPROPERTY_BLACKLIST =
"search.dataproperty.blacklist"; "search.dataproperty.blacklist";
public static final String SEARCH_OBJECTPROPERTY_BLACKLIST = public static final String SEARCH_OBJECTPROPERTY_BLACKLIST =
"search.objectproperty.blacklist"; "search.objectproperty.blacklist";
} }