diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java index 2d543a95d..b08a11898 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java @@ -503,13 +503,13 @@ public class PagedSearchController extends VitroHttpServlet implements Searcher{ //indicated in the query string. //The analyzer is needed so that we use the same analyzer on the search queries as //was used on the text that was indexed. - VitroQueryParser qp = new VitroQueryParser(defaultSearchField,analyzer); + QueryParser qp = new QueryParser(defaultSearchField,analyzer); //this sets the query parser to AND all of the query terms it finds. qp.setDefaultOperator(QueryParser.AND_OPERATOR); //set up the map of stemmed field names -> unstemmed field names - HashMap map = new HashMap(); - map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED); - qp.setStemmedToUnstemmed(map); +// HashMap map = new HashMap(); +// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED); +// qp.setStemmedToUnstemmed(map); return qp; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index 1852c4f62..4cab4f76c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -47,6 +47,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{ public static String MODTIME = "modTime"; /** Name of entity, tab or vclass */ public static String NAME = "name"; + /** Name of entity, unstemmed */ + public static String NAMEUNSTEMMED = "nameunstemmed"; /** Name of portal */ public static String PORTAL = "portal"; /** time of index in msec since epoc */ @@ -109,6 +111,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field.Store.YES, Field.Index.ANALYZED); name.setBoost( NAME_BOOST ); doc.add( name ); + Field nameUn = new Field(term.NAMEUNSTEMMED, value, + Field.Store.YES, Field.Index.ANALYZED); + nameUn.setBoost( NAME_BOOST ); //boost for entity if( ent.getSearchBoost() != null && ent.getSearchBoost() != 0 ) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/HtmlLowerStopAnalyzer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/HtmlLowerStopAnalyzer.java index 4b597517a..366026ec4 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/HtmlLowerStopAnalyzer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/HtmlLowerStopAnalyzer.java @@ -100,10 +100,10 @@ public class HtmlLowerStopAnalyzer extends Analyzer { // TokenStream result = new StandardTokenizer(arg0); - result = new StandardFilter(result); - result = new LowerCaseFilter(result); - result = new StopFilter(result, _stopWords, IGNORE_CASE); - result = new ISOLatin1AccentFilter(result); + result = new StandardFilter(result); //break into tokens + result = new LowerCaseFilter(result); //lower case + result = new StopFilter(result, _stopWords, IGNORE_CASE); //remove stop words + result = new ISOLatin1AccentFilter(result); //ISO-8859-1 accented chars are replace by unaccented return result; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQuery.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQuery.java index e0a34d819..23b5371e4 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQuery.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQuery.java @@ -36,7 +36,8 @@ import edu.cornell.mannlib.vitro.webapp.utils.FlagMathUtils; * QueryParser see: * http://lucene.apache.org/java/docs/queryparsersyntax.html * http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html - * + * + * This class is not thread safe, use one instance per request. * @author bdc34 * */ @@ -47,48 +48,30 @@ public class LuceneQuery extends VitroQuery { private final int ADVANCED =2; private int queryType = SIMPLE; - public boolean defaultTimeWindow = true; - private Query query = null; private Analyzer analyzer = null; private static final Log log = LogFactory.getLog(LuceneQuery.class.getName()); - //private IndexReader indexReader; - public LuceneQuery(VitroRequest request, PortalFlag portalState, - Analyzer analyzer, String indexDir ){ - + Analyzer analyzer, String defualtField ){ super(request,portalState); //the super class will stash the parameters for us. this.analyzer = analyzer; -// if( indexReader == null ){ -// try { -// indexReader = IndexReader.open( indexDir ); -// } catch (IOException e) { -// System.out.println("LuceneQuery: could not create IndexReader"+e); -// e.printStackTrace(); -// } -// } if( isAdvancedQuery( request ) ){ queryType = ADVANCED; } } @SuppressWarnings("static-access") - private QueryParser getQueryParser(){ + private QueryParser getQueryParser(){ //defaultSearchField indicates which field search against when there is no term //indicated in the query string. //The analyzer is needed so that we use the same analyzer on the search queries as //was used on the text that was indexed. - VitroQueryParser qp = new VitroQueryParser(defaultSearchField,analyzer); + QueryParser qp = new QueryParser(defaultSearchField,analyzer); //this sets the query parser to AND all of the query terms it finds. qp.setDefaultOperator(QueryParser.AND_OPERATOR); - //set up the map of stemmed field names -> unstemmed field names - HashMap map = new HashMap(); - map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED); - qp.setStemmedToUnstemmed(map); - return qp; } @@ -132,67 +115,7 @@ public class LuceneQuery extends VitroQuery { return this.query; } - - /** - * Adds a Query that will get doc where the - * SUNSET is > NOW and SUNRISE <= NOW. We'll do - * this by creating two RangeQueries, one to - * check that SUNRISE is between [BEGINNING_OF_TIME, NOW] - * and that SUNSET is between [NOW, END_OF_TIME] - * There don't seem to be any GraterThanQuery - * or LessThanQuery classes in lucene. - */ -// private BooleanQuery makeDefaultTimeWindowQuery(){ -// String nowStr = new DateTime().toString(LuceneIndexer.DATE_FORMAT); - -// Term BEGINNING_OF_TIME = null; -// Term now = new Term(Entity2LuceneDoc.term.SUNRISE,nowStr ); -// RangeQuery sunriseBeforeNow = new RangeQuery(BEGINNING_OF_TIME,now, true); - -// Term END_OF_TIME = null; -// now = new Term(Entity2LuceneDoc.term.SUNSET,nowStr); -// RangeQuery sunsetAfterNow = new RangeQuery(now,END_OF_TIME, false); - -// BooleanQuery qRv = new BooleanQuery(); -// qRv.add( sunriseBeforeNow, BooleanClause.Occur.MUST); -// qRv.add( sunsetAfterNow, BooleanClause.Occur.MUST); - -// return qRv; -// } - - /** - * Makes queries to return only things between the given times and adds - * them as BooleanQuery objects. - * - * If earliest is null then the query include anything that existed before latest. - * If latest is null then the query will include anthing that existes after earliest. - * If both earliest and latest are null then NO restrictions will be added to the query. - */ - private Query addTimeWindowedQuery( Query query, DateTime earliest, DateTime latest){ - Query returnQuery = null; - if( earliest ==null && latest == null ) return query; - - if( earliest != null && latest != null ){ - //we work with the SUNSET here since that is the last time the - //object will be seen. - Term earliestTerm = new Term(Entity2LuceneDoc.term.SUNSET, - earliest.toString(LuceneIndexer.DATE_FORMAT)); - Term latestTerm = new Term(Entity2LuceneDoc.term.SUNRISE, - latest.toString(LuceneIndexer.DATE_FORMAT)); - - RangeQuery timeWindowQuery = new RangeQuery(earliestTerm,latestTerm, true); - BooleanQuery bQuery = new BooleanQuery(); - bQuery.add( query, BooleanClause.Occur.MUST); - bQuery.add( timeWindowQuery, BooleanClause.Occur.MUST); - returnQuery = bQuery; - } - return returnQuery; - } - - // Term beginning_of_time = new Term(Entity2LuceneDoc.term.SUNSET, -// BEGINNING_OF_TIME); -// Term end_of_time = new Term(Entity2LuceneDoc.term.SUNRISE, -// END_OF_TIME); + /** * Makes a flag based query clause. This is where searches can filter by portal. * @@ -250,8 +173,6 @@ public class LuceneQuery extends VitroQuery { return false; } - - @Override public String getTerms() { if( getParameters() != null && diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQueryFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQueryFactory.java index 2812c024e..dba9a73f3 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQueryFactory.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneQueryFactory.java @@ -2,27 +2,24 @@ package edu.cornell.mannlib.vitro.webapp.search.lucene; -import javax.servlet.http.HttpServletRequest; - -import org.apache.lucene.analysis.Analyzer; - -import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; -import edu.cornell.mannlib.vitro.webapp.flags.PortalFlag; -import edu.cornell.mannlib.vitro.webapp.search.SearchException; -import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery; -import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory; +import org.apache.lucene.analysis.Analyzer; + +import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; +import edu.cornell.mannlib.vitro.webapp.flags.PortalFlag; +import edu.cornell.mannlib.vitro.webapp.search.SearchException; +import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery; +import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory; public class LuceneQueryFactory implements VitroQueryFactory { - public static final int MAX_QUERY_LENGTH = 500; - private String indexDir; - - public LuceneQueryFactory(Analyzer analyzer, String indexDir ){ - this.analyzer = analyzer; - this.indexDir = indexDir; - } - - private Analyzer analyzer = null; + public static final int MAX_QUERY_LENGTH = 500; + private String defaultField; + private Analyzer analyzer = null; + + public LuceneQueryFactory(Analyzer analyzer, String defaultField ){ + this.analyzer = analyzer; + this.defaultField = defaultField; + } public VitroQuery getQuery(VitroRequest request, PortalFlag portalState) throws SearchException { //there should be a better way to integrate this with LuceneQuery @@ -34,7 +31,7 @@ public class LuceneQueryFactory implements VitroQueryFactory { if( txt.length() > MAX_QUERY_LENGTH ) throw new SearchException("The search was too long. The maximum " + "query length is " + MAX_QUERY_LENGTH ); - LuceneQuery query = new LuceneQuery(request, portalState, analyzer, indexDir); + LuceneQuery query = new LuceneQuery(request, portalState, analyzer, defaultField ); return query; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java index f604a2244..c44b7055a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java @@ -4,6 +4,7 @@ package edu.cornell.mannlib.vitro.webapp.search.lucene; import java.io.File; import java.io.IOException; +import java.io.Reader; import java.util.ArrayList; import java.util.HashSet; import java.util.List; @@ -14,6 +15,9 @@ import javax.servlet.ServletContextEvent; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.PerFieldAnalyzerWrapper; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.search.BooleanQuery; import com.hp.hpl.jena.ontology.OntModel; @@ -88,7 +92,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { // the queries need to know the analyzer to use so that the same one can be used // to analyze the fields in the incoming user query terms. LuceneSearcher searcher = new LuceneSearcher( - new LuceneQueryFactory(getAnalyzer(), indexDir), + new LuceneQueryFactory(getAnalyzer(), Entity2LuceneDoc.term.ALLTEXT), indexDir); searcher.addObj2Doc(new Entity2LuceneDoc()); context.setAttribute(Searcher.class.getName(), searcher); @@ -186,11 +190,15 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { * * @return */ - private Analyzer getAnalyzer() { - return new VitroAnalyzer(); - } + public Analyzer getAnalyzer() { + PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer()); + analyzer.addAnalyzer(Entity2LuceneDoc.term.ALLTEXT, new HtmlLowerStopStemAnalyzer()); + analyzer.addAnalyzer(Entity2LuceneDoc.term.NAME, new HtmlLowerStopStemAnalyzer()); + analyzer.addAnalyzer(Entity2LuceneDoc.term.ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer()); + analyzer.addAnalyzer(Entity2LuceneDoc.term.NAME, new HtmlLowerStopAnalyzer()); + return analyzer; + } - public static final String ANALYZER= "lucene.analyzer"; public static final String INDEX_DIR = "lucene.indexDir"; public static final String SEARCH_DATAPROPERTY_BLACKLIST = diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java index f3ddec3f9..0b72ed9ba 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java @@ -79,7 +79,7 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener { // the queries need to know the analyzer to use so that the same one can be used // to analyze the fields in the incoming user query terms. LuceneSearcher searcher = new LuceneSearcher( - new LuceneQueryFactory(getAnalyzer(), indexDir), + new LuceneQueryFactory(getAnalyzer(), Entity2LuceneDoc.term.ALLTEXT), indexDir); searcher.addObj2Doc(new Entity2LuceneDoc()); context.setAttribute(Searcher.class.getName(), searcher); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/VitroAnalyzer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/VitroAnalyzer.java index b388a7b19..8652cfefa 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/VitroAnalyzer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/VitroAnalyzer.java @@ -24,7 +24,8 @@ public class VitroAnalyzer extends Analyzer { if( Entity2LuceneDoc.term.ALLTEXT.equals(field) || Entity2LuceneDoc.term.NAME.equals(field) ) return stemmingAnalyzer.tokenStream(field, reader); - else if( Entity2LuceneDoc.term.ALLTEXTUNSTEMMED.equals(field) ) + else if( Entity2LuceneDoc.term.ALLTEXTUNSTEMMED.equals(field) || + Entity2LuceneDoc.term.NAMEUNSTEMMED.equals(field) ) return nonStemmingAnalyzer.tokenStream(field, reader); else{ return keywordAnalyzer.tokenStream(field, reader); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/JenaDataSourceSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/JenaDataSourceSetup.java index 2466d053d..d8c57845e 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/JenaDataSourceSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/JenaDataSourceSetup.java @@ -60,8 +60,6 @@ public class JenaDataSourceSetup extends JenaDataSourceSetupBase implements java unionOms.setUserAccountsModel(userAccountsModel); OntModel displayModel = ontModelFromContextAttribute(sce.getServletContext(),"displayOntModel"); - OntModel displayUnionModel = ModelFactory.createOntologyModel(MEM_ONT_MODEL_SPEC,ModelFactory.createUnion(displayModel, unionModel)); - sce.getServletContext().setAttribute("displayOntModel", displayUnionModel); baseOms.setDisplayModel(displayModel); inferenceOms.setDisplayModel(displayModel); unionOms.setDisplayModel(displayModel);