diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java index 506175252..9212e409f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java @@ -233,7 +233,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear //sets the query boost for the query. the lucene docs matching this query term //are multiplied by QUERY_BOOST to get their total score - query.setBoost(QUERY_BOOST); + //query.setBoost(QUERY_BOOST); topDocs = searcherForRequest.search(query,null,maxHitSize); }catch(Throwable t){ @@ -685,13 +685,14 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear //was used on the text that was indexed. //QueryParser qp = new QueryParser("NAME",analyzer); //this sets the query parser to AND all of the query terms it finds. - //qp.setDefaultOperator(QueryParser.AND_OPERATOR); //set up the map of stemmed field names -> unstemmed field names // HashMap map = new HashMap(); // map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED); // qp.setStemmedToUnstemmed(map); - MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{"ALLTEXT", "name", "type", "moniker"}, analyzer); + MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{ "name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" }, analyzer); + qp.setDefaultOperator(QueryParser.AND_OPERATOR); + return qp; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index 0ecb58365..a003123fd 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -192,15 +192,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{ value = ent.getLocalName(); } Field name =new Field(term.NAME, value, - Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - // name.setBoost( NAME_BOOST ); + Field.Store.YES, Field.Index.ANALYZED); doc.add( name ); Field nameUn = new Field(term.NAMEUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED); - // nameUn.setBoost( NAME_BOOST ); doc.add( nameUn ); + // BK nameunanalyzed is used by IndividualListController Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add( nameUnanalyzed ); @@ -211,8 +210,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{ //Moniker if(ent.getMoniker() != null){ - Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - // moniker.setBoost(MONIKER_BOOST); + Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED); doc.add(moniker); } @@ -240,7 +238,18 @@ public class Entity2LuceneDoc implements Obj2DocIface{ log.error("could not save timekey " + ex); } - + /* thumbnail */ + try{ + value = null; + if( ent.hasThumb() ) + doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + else + doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED)); + }catch(Exception ex){ + log.debug("could not index thumbnail: " + ex); + } + + //time of index in millis past epoc Object anon[] = { new Long((new DateTime() ).getMillis()) }; doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ), @@ -283,9 +292,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{ } } //stemmed terms - doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED)); //unstemmed terms - doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED)); } //flagX and portal flags are no longer indexed. diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java index 03b98a7a6..196ae7a76 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java @@ -1,12 +1,13 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.lucene; - +package edu.cornell.mannlib.vitro.webapp.search.lucene; + import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER; +import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE; import java.io.File; import java.io.IOException; @@ -41,32 +42,32 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup; - -/** - * Setup objects for lucene searching and indexing. - * - * The indexing and search objects, IndexBuilder and Searcher are found by the - * controllers IndexController and SearchController through the servletContext. - * This object will have the method contextInitialized() called when the tomcat - * server starts this webapp. - * - * The contextInitialized() will try to find the lucene index directory, - * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will - * also get a list of Obj2Doc objects so it can translate object to lucene docs. - * - * To execute this at context creation put this in web.xml: - - - edu.cornell.mannlib.vitro.search.setup.LuceneSetup - - - - * @author bdc34 - * - */ -public class LuceneSetup implements javax.servlet.ServletContextListener { - private static final Log log = LogFactory.getLog(LuceneSetup.class.getName()); - + +/** + * Setup objects for lucene searching and indexing. + * + * The indexing and search objects, IndexBuilder and Searcher are found by the + * controllers IndexController and SearchController through the servletContext. + * This object will have the method contextInitialized() called when the tomcat + * server starts this webapp. + * + * The contextInitialized() will try to find the lucene index directory, + * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will + * also get a list of Obj2Doc objects so it can translate object to lucene docs. + * + * To execute this at context creation put this in web.xml: + + + edu.cornell.mannlib.vitro.search.setup.LuceneSetup + + + + * @author bdc34 + * + */ +public class LuceneSetup implements javax.servlet.ServletContextListener { + private static final Log log = LogFactory.getLog(LuceneSetup.class.getName()); + /** * Gets run to set up DataSource when the webapp servlet context gets * created. @@ -86,7 +87,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { setBoolMax(); - // these should really be set as annotation properties. + // these should really be set as annotation properties. HashSet dataPropertyBlacklist = new HashSet(); context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); HashSet objectPropertyBlacklist = new HashSet(); @@ -99,8 +100,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { LuceneIndexFactory lif = LuceneIndexFactory.setup(context, baseIndexDir); String liveIndexDir = lif.getLiveIndexDir(context); - // Here we want to put the LuceneIndex object into the application scope. - // This will attempt to create a new directory and empty index if there is none. + // Here we want to put the LuceneIndex object into the application scope. + // This will attempt to create a new directory and empty index if there is none. LuceneIndexer indexer = new LuceneIndexer( getBaseIndexDirName(context), liveIndexDir, null, getAnalyzer()); @@ -123,9 +124,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE); } - // This is where the builder gets the list of places to try to - // get objects to index. It is filtered so that non-public text - // does not get into the search index. + // This is where the builder gets the list of places to try to + // get objects to index. It is filtered so that non-public text + // does not get into the search index. WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory"); VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf); wadf = new WebappDaoFactoryFiltering(wadf, vf); @@ -135,8 +136,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { IndexBuilder builder = new IndexBuilder(context, indexer, sources); - // here we add the IndexBuilder with the LuceneIndexer - // to the servlet context so we can access it later in the webapp. + // here we add the IndexBuilder with the LuceneIndexer + // to the servlet context so we can access it later in the webapp. context.setAttribute(IndexBuilder.class.getName(), builder); // set up listeners so search index builder is notified of changes to model @@ -163,8 +164,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { log.error("***** Error setting up Lucene index *****", t); throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration"); } - } - + } + /** * Gets run when the webApp Context gets destroyed. */ @@ -187,7 +188,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { public static void setBoolMax() { BooleanQuery.setMaxClauseCount(16384); } - + /** * Gets the name of the directory to store the lucene index in. The * {@link ConfigurationProperties} should have a property named @@ -221,33 +222,33 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { } return dirName; - } - - /** - * Gets the analyzer that will be used when building the indexing - * and when analyzing the incoming search terms. - * - * @return - */ - private Analyzer getAnalyzer() { - PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer()); - // PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer()); + } + + /** + * Gets the analyzer that will be used when building the indexing + * and when analyzing the incoming search terms. + * + * @return + */ + private Analyzer getAnalyzer() { + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer()); analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer()); - // analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer()); - // analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer()); - // analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer()); - analyzer.addAnalyzer(NAME, new KeywordAnalyzer()); - analyzer.addAnalyzer(MONIKER, new KeywordAnalyzer()); + analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer()); + analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer()); + analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer()); + analyzer.addAnalyzer(NAME, new StandardAnalyzer()); + analyzer.addAnalyzer(MONIKER, new StandardAnalyzer()); + analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer()); - return analyzer; + return analyzer; } - public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup"; - public static final String ANALYZER= "lucene.analyzer"; - public static final String BASE_INDEX_DIR = "lucene.indexDir"; - public static final String SEARCH_DATAPROPERTY_BLACKLIST = - "search.dataproperty.blacklist"; - public static final String SEARCH_OBJECTPROPERTY_BLACKLIST = - "search.objectproperty.blacklist"; - -} + public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup"; + public static final String ANALYZER= "lucene.analyzer"; + public static final String BASE_INDEX_DIR = "lucene.indexDir"; + public static final String SEARCH_DATAPROPERTY_BLACKLIST = + "search.dataproperty.blacklist"; + public static final String SEARCH_OBJECTPROPERTY_BLACKLIST = + "search.objectproperty.blacklist"; + +}