1) Adding back thumbnail into the search index.

2) Setting MultiFieldQueryParser to perform an AND if there are multiple query terms 3) Adding analyzers for Alltextunstemmed, rdftype.
2011-03-17 18:54:42 +00:00 · 2011-03-17 18:54:42 +00:00 · 38902a5787
commit 38902a5787
parent 55d4e6d5dd
3 changed files with 87 additions and 76 deletions
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java
@ -233,7 +233,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
            	//sets the query boost for the query. the lucene docs matching this query term
            	//are multiplied by QUERY_BOOST to get their total score
-            	query.setBoost(QUERY_BOOST);
+            	//query.setBoost(QUERY_BOOST);
            	topDocs = searcherForRequest.search(query,null,maxHitSize);
            }catch(Throwable t){
@ -685,13 +685,14 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
        //was used on the text that was indexed.
    	//QueryParser qp = new QueryParser("NAME",analyzer);
        //this sets the query parser to AND all of the query terms it finds.
        //qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        //set up the map of stemmed field names -> unstemmed field names
 //        HashMap<String,String> map = new HashMap<String, String>();
 //        map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
 //        qp.setStemmedToUnstemmed(map);
-    	MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{"ALLTEXT", "name", "type", "moniker"}, analyzer);
+    	MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{ "name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" }, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
    	return qp;
    }
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java
@ -192,15 +192,14 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
            value = ent.getLocalName();
        }
        Field name =new Field(term.NAME, value, 
-                               Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
+                               Field.Store.YES, Field.Index.ANALYZED);
       // name.setBoost( NAME_BOOST );
        doc.add( name );
        Field nameUn = new Field(term.NAMEUNSTEMMED, value, 
        						Field.Store.NO, Field.Index.ANALYZED);        
       // nameUn.setBoost( NAME_BOOST );
        doc.add( nameUn );
        // BK nameunanalyzed is used by IndividualListController
        Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(), 
 				Field.Store.YES, Field.Index.NOT_ANALYZED);        
        doc.add( nameUnanalyzed );
@ -211,8 +210,7 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
        //Moniker
        if(ent.getMoniker() != null){
-        	Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
+        	Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED);
        //	moniker.setBoost(MONIKER_BOOST);
        	doc.add(moniker);
        }
@ -240,7 +238,18 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
            log.error("could not save timekey " + ex);            
        }        
-
+        /* thumbnail */
        try{
            value = null;
            if( ent.hasThumb() )
                doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
            else
                doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
        }catch(Exception ex){
            log.debug("could not index thumbnail: " + ex);
        }
        //time of index in millis past epoc
        Object anon[] =  { new Long((new DateTime() ).getMillis())  };
        doc.add(  new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
@ -283,9 +292,9 @@ public class Entity2LuceneDoc  implements Obj2DocIface{
                }
            }
            //stemmed terms
-            doc.add( new  Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
+            doc.add( new  Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
            //unstemmed terms
-            doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
+            doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
        }
        //flagX and portal flags are no longer indexed.
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java
@ -1,12 +1,13 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
-package edu.cornell.mannlib.vitro.webapp.search.lucene;
+package edu.cornell.mannlib.vitro.webapp.search.lucene;
-
+
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
 import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
 import java.io.File;
 import java.io.IOException;
@ -41,32 +42,32 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
 import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
 import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
 import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
-
+
-/**
+/**
- * Setup objects for lucene searching and indexing.
+ * Setup objects for lucene searching and indexing.
- *
+ *
- * The indexing and search objects, IndexBuilder and Searcher are found by the
+ * The indexing and search objects, IndexBuilder and Searcher are found by the
- * controllers IndexController and SearchController through the servletContext.
+ * controllers IndexController and SearchController through the servletContext.
- * This object will have the method contextInitialized() called when the tomcat
+ * This object will have the method contextInitialized() called when the tomcat
- * server starts this webapp.
+ * server starts this webapp.
- *
+ *
- *  The contextInitialized() will try to find the lucene index directory,
+ *  The contextInitialized() will try to find the lucene index directory,
- *  make a LueceneIndexer and a LuceneSearcher.  The LuceneIndexer will
+ *  make a LueceneIndexer and a LuceneSearcher.  The LuceneIndexer will
- *  also get a list of Obj2Doc objects so it can translate object to lucene docs.
+ *  also get a list of Obj2Doc objects so it can translate object to lucene docs.
- *
+ *
- * To execute this at context creation put this in web.xml:
+ * To execute this at context creation put this in web.xml:
-    <listener>
+    <listener>
-        <listener-class>
+        <listener-class>
-            edu.cornell.mannlib.vitro.search.setup.LuceneSetup
+            edu.cornell.mannlib.vitro.search.setup.LuceneSetup
-        </listener-class>
+        </listener-class>
-    </listener>
+    </listener>
-
+
- * @author bdc34
+ * @author bdc34
- *
+ *
- */
+ */
-public class LuceneSetup implements javax.servlet.ServletContextListener {        
+public class LuceneSetup implements javax.servlet.ServletContextListener {        
-    private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
+    private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
-        
+        
 	/**
 	 * Gets run to set up DataSource when the webapp servlet context gets
 	 * created.
@ -86,7 +87,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
 			setBoolMax();
-			// these should really be set as annotation properties.
+			// these should really be set as annotation properties.
 			HashSet<String> dataPropertyBlacklist = new HashSet<String>();
 			context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST,	dataPropertyBlacklist);
 			HashSet<String> objectPropertyBlacklist = new HashSet<String>();
@ -99,8 +100,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
            LuceneIndexFactory lif = LuceneIndexFactory.setup(context, baseIndexDir);                       
            String liveIndexDir = lif.getLiveIndexDir(context);
-			// Here we want to put the LuceneIndex object into the application scope.
+			// Here we want to put the LuceneIndex object into the application scope.
-			// This will attempt to create a new directory and empty index if there is none.
+			// This will attempt to create a new directory and empty index if there is none.
 			LuceneIndexer indexer = new LuceneIndexer(
 					getBaseIndexDirName(context), liveIndexDir, null,
 					getAnalyzer());
@ -123,9 +124,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
 			    sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);			
 			}
-			// This is where the builder gets the list of places to try to
+			// This is where the builder gets the list of places to try to
-			// get objects to index. It is filtered so that non-public text
+			// get objects to index. It is filtered so that non-public text
-			// does not get into the search index.
+			// does not get into the search index.
 			WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory");
 			VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf);
 			wadf = new WebappDaoFactoryFiltering(wadf, vf);
@ -135,8 +136,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
 			IndexBuilder builder = new IndexBuilder(context, indexer, sources);
-			// here we add the IndexBuilder with the LuceneIndexer
+			// here we add the IndexBuilder with the LuceneIndexer
-			// to the servlet context so we can access it later in the webapp.
+			// to the servlet context so we can access it later in the webapp.
 			context.setAttribute(IndexBuilder.class.getName(), builder);
 			// set up listeners so search index builder is notified of changes to model
@ -163,8 +164,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
 			log.error("***** Error setting up Lucene index *****", t);
 			throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration");
 		}
-	}
+	}
-
+
 	/**
 	 * Gets run when the webApp Context gets destroyed.
 	 */
@ -187,7 +188,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
 	public static void setBoolMax() {
 		BooleanQuery.setMaxClauseCount(16384);
 	}
-       
+       
 	/**
 	 * Gets the name of the directory to store the lucene index in. The
 	 * {@link ConfigurationProperties} should have a property named
@ -221,33 +222,33 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
 		}
 		return dirName;
-	}
+	}
-
+
-    /**
+    /**
-     * Gets the analyzer that will be used when building the indexing
+     * Gets the analyzer that will be used when building the indexing
-     * and when analyzing the incoming search terms.
+     * and when analyzing the incoming search terms.
-     *
+     *
-     * @return
+     * @return
-     */
+     */
-    private Analyzer getAnalyzer() {
+    private Analyzer getAnalyzer() {
-        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
+        PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
      //  PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
    	analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
-      // analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
+    	analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
-     //   analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
+        analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
-     //   analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());      
+        analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());      
-        analyzer.addAnalyzer(NAME, new KeywordAnalyzer());
+        analyzer.addAnalyzer(NAME, new StandardAnalyzer());
-        analyzer.addAnalyzer(MONIKER, new KeywordAnalyzer());
+        analyzer.addAnalyzer(MONIKER, new StandardAnalyzer());
        analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer());
-        return analyzer;
+        return analyzer;
    }
-    public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
+    public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
-    public static final String ANALYZER= "lucene.analyzer";
+    public static final String ANALYZER= "lucene.analyzer";
-    public static final String BASE_INDEX_DIR = "lucene.indexDir";
+    public static final String BASE_INDEX_DIR = "lucene.indexDir";
-    public static final String SEARCH_DATAPROPERTY_BLACKLIST = 
+    public static final String SEARCH_DATAPROPERTY_BLACKLIST = 
-        "search.dataproperty.blacklist";
+        "search.dataproperty.blacklist";
-    public static final String SEARCH_OBJECTPROPERTY_BLACKLIST = 
+    public static final String SEARCH_OBJECTPROPERTY_BLACKLIST = 
-        "search.objectproperty.blacklist";
+        "search.objectproperty.blacklist";
-
+
-}
+}