1) Adding back thumbnail into the search index.
2) Setting MultiFieldQueryParser to perform an AND if there are multiple query terms 3) Adding analyzers for Alltextunstemmed, rdftype.
This commit is contained in:
parent
55d4e6d5dd
commit
38902a5787
3 changed files with 87 additions and 76 deletions
|
@ -233,7 +233,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
|
|
||||||
//sets the query boost for the query. the lucene docs matching this query term
|
//sets the query boost for the query. the lucene docs matching this query term
|
||||||
//are multiplied by QUERY_BOOST to get their total score
|
//are multiplied by QUERY_BOOST to get their total score
|
||||||
query.setBoost(QUERY_BOOST);
|
//query.setBoost(QUERY_BOOST);
|
||||||
|
|
||||||
topDocs = searcherForRequest.search(query,null,maxHitSize);
|
topDocs = searcherForRequest.search(query,null,maxHitSize);
|
||||||
}catch(Throwable t){
|
}catch(Throwable t){
|
||||||
|
@ -685,13 +685,14 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
//was used on the text that was indexed.
|
//was used on the text that was indexed.
|
||||||
//QueryParser qp = new QueryParser("NAME",analyzer);
|
//QueryParser qp = new QueryParser("NAME",analyzer);
|
||||||
//this sets the query parser to AND all of the query terms it finds.
|
//this sets the query parser to AND all of the query terms it finds.
|
||||||
//qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
|
||||||
//set up the map of stemmed field names -> unstemmed field names
|
//set up the map of stemmed field names -> unstemmed field names
|
||||||
// HashMap<String,String> map = new HashMap<String, String>();
|
// HashMap<String,String> map = new HashMap<String, String>();
|
||||||
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
||||||
// qp.setStemmedToUnstemmed(map);
|
// qp.setStemmedToUnstemmed(map);
|
||||||
|
|
||||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{"ALLTEXT", "name", "type", "moniker"}, analyzer);
|
MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{ "name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" }, analyzer);
|
||||||
|
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||||
|
|
||||||
|
|
||||||
return qp;
|
return qp;
|
||||||
}
|
}
|
||||||
|
|
|
@ -192,15 +192,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
value = ent.getLocalName();
|
value = ent.getLocalName();
|
||||||
}
|
}
|
||||||
Field name =new Field(term.NAME, value,
|
Field name =new Field(term.NAME, value,
|
||||||
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
Field.Store.YES, Field.Index.ANALYZED);
|
||||||
// name.setBoost( NAME_BOOST );
|
|
||||||
doc.add( name );
|
doc.add( name );
|
||||||
|
|
||||||
Field nameUn = new Field(term.NAMEUNSTEMMED, value,
|
Field nameUn = new Field(term.NAMEUNSTEMMED, value,
|
||||||
Field.Store.NO, Field.Index.ANALYZED);
|
Field.Store.NO, Field.Index.ANALYZED);
|
||||||
// nameUn.setBoost( NAME_BOOST );
|
|
||||||
doc.add( nameUn );
|
doc.add( nameUn );
|
||||||
|
|
||||||
|
// BK nameunanalyzed is used by IndividualListController
|
||||||
Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(),
|
Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(),
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED);
|
Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||||
doc.add( nameUnanalyzed );
|
doc.add( nameUnanalyzed );
|
||||||
|
@ -211,8 +210,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
//Moniker
|
//Moniker
|
||||||
|
|
||||||
if(ent.getMoniker() != null){
|
if(ent.getMoniker() != null){
|
||||||
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED);
|
||||||
// moniker.setBoost(MONIKER_BOOST);
|
|
||||||
doc.add(moniker);
|
doc.add(moniker);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -240,7 +238,18 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
log.error("could not save timekey " + ex);
|
log.error("could not save timekey " + ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* thumbnail */
|
||||||
|
try{
|
||||||
|
value = null;
|
||||||
|
if( ent.hasThumb() )
|
||||||
|
doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
else
|
||||||
|
doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||||
|
}catch(Exception ex){
|
||||||
|
log.debug("could not index thumbnail: " + ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//time of index in millis past epoc
|
//time of index in millis past epoc
|
||||||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||||
|
@ -283,9 +292,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//stemmed terms
|
//stemmed terms
|
||||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
||||||
//unstemmed terms
|
//unstemmed terms
|
||||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
||||||
}
|
}
|
||||||
|
|
||||||
//flagX and portal flags are no longer indexed.
|
//flagX and portal flags are no longer indexed.
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||||
|
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -41,32 +42,32 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||||
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Setup objects for lucene searching and indexing.
|
* Setup objects for lucene searching and indexing.
|
||||||
*
|
*
|
||||||
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
||||||
* controllers IndexController and SearchController through the servletContext.
|
* controllers IndexController and SearchController through the servletContext.
|
||||||
* This object will have the method contextInitialized() called when the tomcat
|
* This object will have the method contextInitialized() called when the tomcat
|
||||||
* server starts this webapp.
|
* server starts this webapp.
|
||||||
*
|
*
|
||||||
* The contextInitialized() will try to find the lucene index directory,
|
* The contextInitialized() will try to find the lucene index directory,
|
||||||
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
||||||
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
||||||
*
|
*
|
||||||
* To execute this at context creation put this in web.xml:
|
* To execute this at context creation put this in web.xml:
|
||||||
<listener>
|
<listener>
|
||||||
<listener-class>
|
<listener-class>
|
||||||
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
||||||
</listener-class>
|
</listener-class>
|
||||||
</listener>
|
</listener>
|
||||||
|
|
||||||
* @author bdc34
|
* @author bdc34
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class LuceneSetup implements javax.servlet.ServletContextListener {
|
public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
|
private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets run to set up DataSource when the webapp servlet context gets
|
* Gets run to set up DataSource when the webapp servlet context gets
|
||||||
* created.
|
* created.
|
||||||
|
@ -86,7 +87,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
|
|
||||||
setBoolMax();
|
setBoolMax();
|
||||||
|
|
||||||
// these should really be set as annotation properties.
|
// these should really be set as annotation properties.
|
||||||
HashSet<String> dataPropertyBlacklist = new HashSet<String>();
|
HashSet<String> dataPropertyBlacklist = new HashSet<String>();
|
||||||
context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
|
context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
|
||||||
HashSet<String> objectPropertyBlacklist = new HashSet<String>();
|
HashSet<String> objectPropertyBlacklist = new HashSet<String>();
|
||||||
|
@ -99,8 +100,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, baseIndexDir);
|
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, baseIndexDir);
|
||||||
String liveIndexDir = lif.getLiveIndexDir(context);
|
String liveIndexDir = lif.getLiveIndexDir(context);
|
||||||
|
|
||||||
// Here we want to put the LuceneIndex object into the application scope.
|
// Here we want to put the LuceneIndex object into the application scope.
|
||||||
// This will attempt to create a new directory and empty index if there is none.
|
// This will attempt to create a new directory and empty index if there is none.
|
||||||
LuceneIndexer indexer = new LuceneIndexer(
|
LuceneIndexer indexer = new LuceneIndexer(
|
||||||
getBaseIndexDirName(context), liveIndexDir, null,
|
getBaseIndexDirName(context), liveIndexDir, null,
|
||||||
getAnalyzer());
|
getAnalyzer());
|
||||||
|
@ -123,9 +124,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
|
sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is where the builder gets the list of places to try to
|
// This is where the builder gets the list of places to try to
|
||||||
// get objects to index. It is filtered so that non-public text
|
// get objects to index. It is filtered so that non-public text
|
||||||
// does not get into the search index.
|
// does not get into the search index.
|
||||||
WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
||||||
VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf);
|
VitroFilters vf = VitroFilterUtils.getDisplayFilterByRoleLevel(RoleLevel.PUBLIC, wadf);
|
||||||
wadf = new WebappDaoFactoryFiltering(wadf, vf);
|
wadf = new WebappDaoFactoryFiltering(wadf, vf);
|
||||||
|
@ -135,8 +136,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
|
|
||||||
IndexBuilder builder = new IndexBuilder(context, indexer, sources);
|
IndexBuilder builder = new IndexBuilder(context, indexer, sources);
|
||||||
|
|
||||||
// here we add the IndexBuilder with the LuceneIndexer
|
// here we add the IndexBuilder with the LuceneIndexer
|
||||||
// to the servlet context so we can access it later in the webapp.
|
// to the servlet context so we can access it later in the webapp.
|
||||||
context.setAttribute(IndexBuilder.class.getName(), builder);
|
context.setAttribute(IndexBuilder.class.getName(), builder);
|
||||||
|
|
||||||
// set up listeners so search index builder is notified of changes to model
|
// set up listeners so search index builder is notified of changes to model
|
||||||
|
@ -163,8 +164,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
log.error("***** Error setting up Lucene index *****", t);
|
log.error("***** Error setting up Lucene index *****", t);
|
||||||
throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration");
|
throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets run when the webApp Context gets destroyed.
|
* Gets run when the webApp Context gets destroyed.
|
||||||
*/
|
*/
|
||||||
|
@ -187,7 +188,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
public static void setBoolMax() {
|
public static void setBoolMax() {
|
||||||
BooleanQuery.setMaxClauseCount(16384);
|
BooleanQuery.setMaxClauseCount(16384);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the name of the directory to store the lucene index in. The
|
* Gets the name of the directory to store the lucene index in. The
|
||||||
* {@link ConfigurationProperties} should have a property named
|
* {@link ConfigurationProperties} should have a property named
|
||||||
|
@ -221,33 +222,33 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
return dirName;
|
return dirName;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the analyzer that will be used when building the indexing
|
* Gets the analyzer that will be used when building the indexing
|
||||||
* and when analyzing the incoming search terms.
|
* and when analyzing the incoming search terms.
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private Analyzer getAnalyzer() {
|
private Analyzer getAnalyzer() {
|
||||||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
|
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
|
||||||
// PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
|
|
||||||
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||||
// analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
||||||
// analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
// analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
analyzer.addAnalyzer(NAME, new KeywordAnalyzer());
|
analyzer.addAnalyzer(NAME, new StandardAnalyzer());
|
||||||
analyzer.addAnalyzer(MONIKER, new KeywordAnalyzer());
|
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer());
|
||||||
|
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer());
|
||||||
|
|
||||||
return analyzer;
|
return analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
|
public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
|
||||||
public static final String ANALYZER= "lucene.analyzer";
|
public static final String ANALYZER= "lucene.analyzer";
|
||||||
public static final String BASE_INDEX_DIR = "lucene.indexDir";
|
public static final String BASE_INDEX_DIR = "lucene.indexDir";
|
||||||
public static final String SEARCH_DATAPROPERTY_BLACKLIST =
|
public static final String SEARCH_DATAPROPERTY_BLACKLIST =
|
||||||
"search.dataproperty.blacklist";
|
"search.dataproperty.blacklist";
|
||||||
public static final String SEARCH_OBJECTPROPERTY_BLACKLIST =
|
public static final String SEARCH_OBJECTPROPERTY_BLACKLIST =
|
||||||
"search.objectproperty.blacklist";
|
"search.objectproperty.blacklist";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue