1) Adding back thumbnail into the search index.
2) Setting MultiFieldQueryParser to perform an AND if there are multiple query terms 3) Adding analyzers for Alltextunstemmed, rdftype.
This commit is contained in:
parent
55d4e6d5dd
commit
38902a5787
3 changed files with 87 additions and 76 deletions
|
@ -233,7 +233,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
|
||||
//sets the query boost for the query. the lucene docs matching this query term
|
||||
//are multiplied by QUERY_BOOST to get their total score
|
||||
query.setBoost(QUERY_BOOST);
|
||||
//query.setBoost(QUERY_BOOST);
|
||||
|
||||
topDocs = searcherForRequest.search(query,null,maxHitSize);
|
||||
}catch(Throwable t){
|
||||
|
@ -685,13 +685,14 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
//was used on the text that was indexed.
|
||||
//QueryParser qp = new QueryParser("NAME",analyzer);
|
||||
//this sets the query parser to AND all of the query terms it finds.
|
||||
//qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||
//set up the map of stemmed field names -> unstemmed field names
|
||||
// HashMap<String,String> map = new HashMap<String, String>();
|
||||
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
||||
// qp.setStemmedToUnstemmed(map);
|
||||
|
||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{"ALLTEXT", "name", "type", "moniker"}, analyzer);
|
||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{ "name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" }, analyzer);
|
||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||
|
||||
|
||||
return qp;
|
||||
}
|
||||
|
|
|
@ -192,15 +192,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
value = ent.getLocalName();
|
||||
}
|
||||
Field name =new Field(term.NAME, value,
|
||||
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
// name.setBoost( NAME_BOOST );
|
||||
Field.Store.YES, Field.Index.ANALYZED);
|
||||
doc.add( name );
|
||||
|
||||
Field nameUn = new Field(term.NAMEUNSTEMMED, value,
|
||||
Field.Store.NO, Field.Index.ANALYZED);
|
||||
// nameUn.setBoost( NAME_BOOST );
|
||||
doc.add( nameUn );
|
||||
|
||||
// BK nameunanalyzed is used by IndividualListController
|
||||
Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(),
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
doc.add( nameUnanalyzed );
|
||||
|
@ -211,8 +210,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
//Moniker
|
||||
|
||||
if(ent.getMoniker() != null){
|
||||
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
// moniker.setBoost(MONIKER_BOOST);
|
||||
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED);
|
||||
doc.add(moniker);
|
||||
}
|
||||
|
||||
|
@ -240,7 +238,18 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
log.error("could not save timekey " + ex);
|
||||
}
|
||||
|
||||
|
||||
/* thumbnail */
|
||||
try{
|
||||
value = null;
|
||||
if( ent.hasThumb() )
|
||||
doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
else
|
||||
doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
}catch(Exception ex){
|
||||
log.debug("could not index thumbnail: " + ex);
|
||||
}
|
||||
|
||||
|
||||
//time of index in millis past epoc
|
||||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||
|
@ -283,9 +292,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
}
|
||||
}
|
||||
//stemmed terms
|
||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
||||
//unstemmed terms
|
||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
||||
}
|
||||
|
||||
//flagX and portal flags are no longer indexed.
|
||||
|
|
|
@ -1,12 +1,13 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -41,32 +42,32 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
|||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
||||
|
||||
/**
|
||||
* Setup objects for lucene searching and indexing.
|
||||
*
|
||||
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
||||
* controllers IndexController and SearchController through the servletContext.
|
||||
* This object will have the method contextInitialized() called when the tomcat
|
||||
* server starts this webapp.
|
||||
*
|
||||
* The contextInitialized() will try to find the lucene index directory,
|
||||
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
||||
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
||||
*
|
||||
* To execute this at context creation put this in web.xml:
|
||||
<listener>
|
||||
<listener-class>
|
||||
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
||||
</listener-class>
|
||||
</listener>
|
||||
|
||||
* @author bdc34
|
||||
*
|
||||
*/
|
||||
public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||
private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
|
||||
|
||||
|
||||
/**
|
||||
* Setup objects for lucene searching and indexing.
|
||||
*
|
||||
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
||||
* controllers IndexController and SearchController through the servletContext.
|
||||
* This object will have the method contextInitialized() called when the tomcat
|
||||
* server starts this webapp.
|
||||
*
|
||||
* The contextInitialized() will try to find the lucene index directory,
|
||||
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
||||
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
||||
*
|
||||
* To execute this at context creation put this in web.xml:
|
||||
<listener>
|
||||
<listener-class>
|
||||
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
||||
</listener-class>
|
||||
</listener>
|
||||
|
||||
* @author bdc34
|
||||
*
|
||||
*/
|
||||
public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||
private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
|
||||
|
||||
/**
|
||||
* Gets run to set up DataSource when the webapp servlet context gets
|
||||
* created.
|
||||
|
@ -86,7 +87,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
|
||||
setBoolMax();
|
||||
|
||||
// these should really be set as annotation properties.
|
||||
// these should really be set as annotation properties.
|
||||
HashSet<String> dataPropertyBlacklist = new HashSet<String>();
|
||||
context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
|
||||
HashSet<String> objectPropertyBlacklist = new HashSet<String>();
|
||||
|
@ -99,8 +100,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, baseIndexDir);
|
||||
String liveIndexDir = lif.getLiveIndexDir(context);
|
||||
|
||||
// Here we want to put the LuceneIndex object into the application scope.
|
||||
// This will attempt to create a new directory and empty index if there is none.
|
||||
// Here we want to put the LuceneIndex object into the application scope.
|
||||
// This will attempt to create a new directory and empty index if there is none.
|
||||
LuceneIndexer indexer = new LuceneIndexer(
|
||||
getBaseIndexDirName(context), liveIndexDir, null,
|
||||
getAnalyzer());
|
||||
|
@ -123,9 +124,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
|
||||
}
|
||||
|
||||
// This is where the builder gets the list of places to try to
|
||||
// get objects to index. It is filtered so that non-public text
|
||||
// does not get into the search index.
|
||||
// This is where the builder gets the list of places to try to
|
||||
// get objects to index. It is filtered so that non-public text
|
||||
// does not get into the search index.
|
||||
WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
||||
VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf);
|
||||
wadf = new WebappDaoFactoryFiltering(wadf, vf);
|
||||
|
@ -135,8 +136,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
|
||||
IndexBuilder builder = new IndexBuilder(context, indexer, sources);
|
||||
|
||||
// here we add the IndexBuilder with the LuceneIndexer
|
||||
// to the servlet context so we can access it later in the webapp.
|
||||
// here we add the IndexBuilder with the LuceneIndexer
|
||||
// to the servlet context so we can access it later in the webapp.
|
||||
context.setAttribute(IndexBuilder.class.getName(), builder);
|
||||
|
||||
// set up listeners so search index builder is notified of changes to model
|
||||
|
@ -163,8 +164,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
log.error("***** Error setting up Lucene index *****", t);
|
||||
throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets run when the webApp Context gets destroyed.
|
||||
*/
|
||||
|
@ -187,7 +188,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
public static void setBoolMax() {
|
||||
BooleanQuery.setMaxClauseCount(16384);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Gets the name of the directory to store the lucene index in. The
|
||||
* {@link ConfigurationProperties} should have a property named
|
||||
|
@ -221,33 +222,33 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
}
|
||||
|
||||
return dirName;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the analyzer that will be used when building the indexing
|
||||
* and when analyzing the incoming search terms.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private Analyzer getAnalyzer() {
|
||||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
|
||||
// PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the analyzer that will be used when building the indexing
|
||||
* and when analyzing the incoming search terms.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private Analyzer getAnalyzer() {
|
||||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
|
||||
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||
// analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
||||
// analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
// analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAME, new KeywordAnalyzer());
|
||||
analyzer.addAnalyzer(MONIKER, new KeywordAnalyzer());
|
||||
analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAME, new StandardAnalyzer());
|
||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer());
|
||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer());
|
||||
|
||||
return analyzer;
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
|
||||
public static final String ANALYZER= "lucene.analyzer";
|
||||
public static final String BASE_INDEX_DIR = "lucene.indexDir";
|
||||
public static final String SEARCH_DATAPROPERTY_BLACKLIST =
|
||||
"search.dataproperty.blacklist";
|
||||
public static final String SEARCH_OBJECTPROPERTY_BLACKLIST =
|
||||
"search.objectproperty.blacklist";
|
||||
|
||||
}
|
||||
public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
|
||||
public static final String ANALYZER= "lucene.analyzer";
|
||||
public static final String BASE_INDEX_DIR = "lucene.indexDir";
|
||||
public static final String SEARCH_DATAPROPERTY_BLACKLIST =
|
||||
"search.dataproperty.blacklist";
|
||||
public static final String SEARCH_OBJECTPROPERTY_BLACKLIST =
|
||||
"search.objectproperty.blacklist";
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue