1) Adding back thumbnail into the search index.

2) Setting MultiFieldQueryParser to perform an AND if there are multiple query terms
3) Adding analyzers for Alltextunstemmed, rdftype.
This commit is contained in:
deepakkoni 2011-03-17 18:54:42 +00:00
parent 55d4e6d5dd
commit 38902a5787
3 changed files with 87 additions and 76 deletions

View file

@ -233,7 +233,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
//sets the query boost for the query. the lucene docs matching this query term
//are multiplied by QUERY_BOOST to get their total score
query.setBoost(QUERY_BOOST);
//query.setBoost(QUERY_BOOST);
topDocs = searcherForRequest.search(query,null,maxHitSize);
}catch(Throwable t){
@ -685,13 +685,14 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
//was used on the text that was indexed.
//QueryParser qp = new QueryParser("NAME",analyzer);
//this sets the query parser to AND all of the query terms it finds.
//qp.setDefaultOperator(QueryParser.AND_OPERATOR);
//set up the map of stemmed field names -> unstemmed field names
// HashMap<String,String> map = new HashMap<String, String>();
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
// qp.setStemmedToUnstemmed(map);
MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{"ALLTEXT", "name", "type", "moniker"}, analyzer);
MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{ "name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" }, analyzer);
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
return qp;
}

View file

@ -192,15 +192,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{
value = ent.getLocalName();
}
Field name =new Field(term.NAME, value,
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
// name.setBoost( NAME_BOOST );
Field.Store.YES, Field.Index.ANALYZED);
doc.add( name );
Field nameUn = new Field(term.NAMEUNSTEMMED, value,
Field.Store.NO, Field.Index.ANALYZED);
// nameUn.setBoost( NAME_BOOST );
doc.add( nameUn );
// BK nameunanalyzed is used by IndividualListController
Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(),
Field.Store.YES, Field.Index.NOT_ANALYZED);
doc.add( nameUnanalyzed );
@ -211,8 +210,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
//Moniker
if(ent.getMoniker() != null){
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
// moniker.setBoost(MONIKER_BOOST);
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.ANALYZED);
doc.add(moniker);
}
@ -240,6 +238,17 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.error("could not save timekey " + ex);
}
/* thumbnail */
try{
value = null;
if( ent.hasThumb() )
doc.add(new Field(term.THUMBNAIL, "1", Field.Store.YES, Field.Index.NOT_ANALYZED));
else
doc.add(new Field(term.THUMBNAIL, "0", Field.Store.YES, Field.Index.NOT_ANALYZED));
}catch(Exception ex){
log.debug("could not index thumbnail: " + ex);
}
//time of index in millis past epoc
Object anon[] = { new Long((new DateTime() ).getMillis()) };
@ -283,9 +292,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{
}
}
//stemmed terms
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
//unstemmed terms
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
}
//flagX and portal flags are no longer indexed.

View file

@ -7,6 +7,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
import java.io.File;
import java.io.IOException;
@ -230,14 +231,14 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
* @return
*/
private Analyzer getAnalyzer() {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
// PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
// analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
// analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
// analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAME, new KeywordAnalyzer());
analyzer.addAnalyzer(MONIKER, new KeywordAnalyzer());
analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAME, new StandardAnalyzer());
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer());
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer());
return analyzer;
}