Removing union graph from application graph. Adding new field nameUnstemmed to lucene index. Minor refactoring of lucene objects.
This commit is contained in:
parent
444d37bd5a
commit
b6d0c61e86
9 changed files with 51 additions and 121 deletions
|
@ -503,13 +503,13 @@ public class PagedSearchController extends VitroHttpServlet implements Searcher{
|
||||||
//indicated in the query string.
|
//indicated in the query string.
|
||||||
//The analyzer is needed so that we use the same analyzer on the search queries as
|
//The analyzer is needed so that we use the same analyzer on the search queries as
|
||||||
//was used on the text that was indexed.
|
//was used on the text that was indexed.
|
||||||
VitroQueryParser qp = new VitroQueryParser(defaultSearchField,analyzer);
|
QueryParser qp = new QueryParser(defaultSearchField,analyzer);
|
||||||
//this sets the query parser to AND all of the query terms it finds.
|
//this sets the query parser to AND all of the query terms it finds.
|
||||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||||
//set up the map of stemmed field names -> unstemmed field names
|
//set up the map of stemmed field names -> unstemmed field names
|
||||||
HashMap<String,String> map = new HashMap<String, String>();
|
// HashMap<String,String> map = new HashMap<String, String>();
|
||||||
map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
||||||
qp.setStemmedToUnstemmed(map);
|
// qp.setStemmedToUnstemmed(map);
|
||||||
return qp;
|
return qp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,6 +47,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
public static String MODTIME = "modTime";
|
public static String MODTIME = "modTime";
|
||||||
/** Name of entity, tab or vclass */
|
/** Name of entity, tab or vclass */
|
||||||
public static String NAME = "name";
|
public static String NAME = "name";
|
||||||
|
/** Name of entity, unstemmed */
|
||||||
|
public static String NAMEUNSTEMMED = "nameunstemmed";
|
||||||
/** Name of portal */
|
/** Name of portal */
|
||||||
public static String PORTAL = "portal";
|
public static String PORTAL = "portal";
|
||||||
/** time of index in msec since epoc */
|
/** time of index in msec since epoc */
|
||||||
|
@ -109,6 +111,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
Field.Store.YES, Field.Index.ANALYZED);
|
Field.Store.YES, Field.Index.ANALYZED);
|
||||||
name.setBoost( NAME_BOOST );
|
name.setBoost( NAME_BOOST );
|
||||||
doc.add( name );
|
doc.add( name );
|
||||||
|
Field nameUn = new Field(term.NAMEUNSTEMMED, value,
|
||||||
|
Field.Store.YES, Field.Index.ANALYZED);
|
||||||
|
nameUn.setBoost( NAME_BOOST );
|
||||||
|
|
||||||
//boost for entity
|
//boost for entity
|
||||||
if( ent.getSearchBoost() != null && ent.getSearchBoost() != 0 )
|
if( ent.getSearchBoost() != null && ent.getSearchBoost() != 0 )
|
||||||
|
|
|
@ -100,10 +100,10 @@ public class HtmlLowerStopAnalyzer extends Analyzer {
|
||||||
//
|
//
|
||||||
|
|
||||||
TokenStream result = new StandardTokenizer(arg0);
|
TokenStream result = new StandardTokenizer(arg0);
|
||||||
result = new StandardFilter(result);
|
result = new StandardFilter(result); //break into tokens
|
||||||
result = new LowerCaseFilter(result);
|
result = new LowerCaseFilter(result); //lower case
|
||||||
result = new StopFilter(result, _stopWords, IGNORE_CASE);
|
result = new StopFilter(result, _stopWords, IGNORE_CASE); //remove stop words
|
||||||
result = new ISOLatin1AccentFilter(result);
|
result = new ISOLatin1AccentFilter(result); //ISO-8859-1 accented chars are replace by unaccented
|
||||||
return result;
|
return result;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,6 +37,7 @@ import edu.cornell.mannlib.vitro.webapp.utils.FlagMathUtils;
|
||||||
* http://lucene.apache.org/java/docs/queryparsersyntax.html
|
* http://lucene.apache.org/java/docs/queryparsersyntax.html
|
||||||
* http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html
|
* http://today.java.net/pub/a/today/2003/11/07/QueryParserRules.html
|
||||||
*
|
*
|
||||||
|
* This class is not thread safe, use one instance per request.
|
||||||
* @author bdc34
|
* @author bdc34
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -47,29 +48,16 @@ public class LuceneQuery extends VitroQuery {
|
||||||
private final int ADVANCED =2;
|
private final int ADVANCED =2;
|
||||||
private int queryType = SIMPLE;
|
private int queryType = SIMPLE;
|
||||||
|
|
||||||
public boolean defaultTimeWindow = true;
|
|
||||||
|
|
||||||
private Query query = null;
|
private Query query = null;
|
||||||
private Analyzer analyzer = null;
|
private Analyzer analyzer = null;
|
||||||
|
|
||||||
private static final Log log = LogFactory.getLog(LuceneQuery.class.getName());
|
private static final Log log = LogFactory.getLog(LuceneQuery.class.getName());
|
||||||
|
|
||||||
//private IndexReader indexReader;
|
|
||||||
|
|
||||||
public LuceneQuery(VitroRequest request, PortalFlag portalState,
|
public LuceneQuery(VitroRequest request, PortalFlag portalState,
|
||||||
Analyzer analyzer, String indexDir ){
|
Analyzer analyzer, String defualtField ){
|
||||||
|
|
||||||
super(request,portalState); //the super class will stash the parameters for us.
|
super(request,portalState); //the super class will stash the parameters for us.
|
||||||
this.analyzer = analyzer;
|
this.analyzer = analyzer;
|
||||||
|
|
||||||
// if( indexReader == null ){
|
|
||||||
// try {
|
|
||||||
// indexReader = IndexReader.open( indexDir );
|
|
||||||
// } catch (IOException e) {
|
|
||||||
// System.out.println("LuceneQuery: could not create IndexReader"+e);
|
|
||||||
// e.printStackTrace();
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
if( isAdvancedQuery( request ) ){
|
if( isAdvancedQuery( request ) ){
|
||||||
queryType = ADVANCED;
|
queryType = ADVANCED;
|
||||||
}
|
}
|
||||||
|
@ -81,14 +69,9 @@ public class LuceneQuery extends VitroQuery {
|
||||||
//indicated in the query string.
|
//indicated in the query string.
|
||||||
//The analyzer is needed so that we use the same analyzer on the search queries as
|
//The analyzer is needed so that we use the same analyzer on the search queries as
|
||||||
//was used on the text that was indexed.
|
//was used on the text that was indexed.
|
||||||
VitroQueryParser qp = new VitroQueryParser(defaultSearchField,analyzer);
|
QueryParser qp = new QueryParser(defaultSearchField,analyzer);
|
||||||
//this sets the query parser to AND all of the query terms it finds.
|
//this sets the query parser to AND all of the query terms it finds.
|
||||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||||
//set up the map of stemmed field names -> unstemmed field names
|
|
||||||
HashMap<String,String> map = new HashMap<String, String>();
|
|
||||||
map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
|
||||||
qp.setStemmedToUnstemmed(map);
|
|
||||||
|
|
||||||
return qp;
|
return qp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,66 +116,6 @@ public class LuceneQuery extends VitroQuery {
|
||||||
return this.query;
|
return this.query;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds a Query that will get doc where the
|
|
||||||
* SUNSET is > NOW and SUNRISE <= NOW. We'll do
|
|
||||||
* this by creating two RangeQueries, one to
|
|
||||||
* check that SUNRISE is between [BEGINNING_OF_TIME, NOW]
|
|
||||||
* and that SUNSET is between [NOW, END_OF_TIME]
|
|
||||||
* There don't seem to be any GraterThanQuery
|
|
||||||
* or LessThanQuery classes in lucene.
|
|
||||||
*/
|
|
||||||
// private BooleanQuery makeDefaultTimeWindowQuery(){
|
|
||||||
// String nowStr = new DateTime().toString(LuceneIndexer.DATE_FORMAT);
|
|
||||||
|
|
||||||
// Term BEGINNING_OF_TIME = null;
|
|
||||||
// Term now = new Term(Entity2LuceneDoc.term.SUNRISE,nowStr );
|
|
||||||
// RangeQuery sunriseBeforeNow = new RangeQuery(BEGINNING_OF_TIME,now, true);
|
|
||||||
|
|
||||||
// Term END_OF_TIME = null;
|
|
||||||
// now = new Term(Entity2LuceneDoc.term.SUNSET,nowStr);
|
|
||||||
// RangeQuery sunsetAfterNow = new RangeQuery(now,END_OF_TIME, false);
|
|
||||||
|
|
||||||
// BooleanQuery qRv = new BooleanQuery();
|
|
||||||
// qRv.add( sunriseBeforeNow, BooleanClause.Occur.MUST);
|
|
||||||
// qRv.add( sunsetAfterNow, BooleanClause.Occur.MUST);
|
|
||||||
|
|
||||||
// return qRv;
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Makes queries to return only things between the given times and adds
|
|
||||||
* them as BooleanQuery objects.
|
|
||||||
*
|
|
||||||
* If earliest is null then the query include anything that existed before latest.
|
|
||||||
* If latest is null then the query will include anthing that existes after earliest.
|
|
||||||
* If both earliest and latest are null then NO restrictions will be added to the query.
|
|
||||||
*/
|
|
||||||
private Query addTimeWindowedQuery( Query query, DateTime earliest, DateTime latest){
|
|
||||||
Query returnQuery = null;
|
|
||||||
if( earliest ==null && latest == null ) return query;
|
|
||||||
|
|
||||||
if( earliest != null && latest != null ){
|
|
||||||
//we work with the SUNSET here since that is the last time the
|
|
||||||
//object will be seen.
|
|
||||||
Term earliestTerm = new Term(Entity2LuceneDoc.term.SUNSET,
|
|
||||||
earliest.toString(LuceneIndexer.DATE_FORMAT));
|
|
||||||
Term latestTerm = new Term(Entity2LuceneDoc.term.SUNRISE,
|
|
||||||
latest.toString(LuceneIndexer.DATE_FORMAT));
|
|
||||||
|
|
||||||
RangeQuery timeWindowQuery = new RangeQuery(earliestTerm,latestTerm, true);
|
|
||||||
BooleanQuery bQuery = new BooleanQuery();
|
|
||||||
bQuery.add( query, BooleanClause.Occur.MUST);
|
|
||||||
bQuery.add( timeWindowQuery, BooleanClause.Occur.MUST);
|
|
||||||
returnQuery = bQuery;
|
|
||||||
}
|
|
||||||
return returnQuery;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Term beginning_of_time = new Term(Entity2LuceneDoc.term.SUNSET,
|
|
||||||
// BEGINNING_OF_TIME);
|
|
||||||
// Term end_of_time = new Term(Entity2LuceneDoc.term.SUNRISE,
|
|
||||||
// END_OF_TIME);
|
|
||||||
/**
|
/**
|
||||||
* Makes a flag based query clause. This is where searches can filter by portal.
|
* Makes a flag based query clause. This is where searches can filter by portal.
|
||||||
*
|
*
|
||||||
|
@ -250,8 +173,6 @@ public class LuceneQuery extends VitroQuery {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getTerms() {
|
public String getTerms() {
|
||||||
if( getParameters() != null &&
|
if( getParameters() != null &&
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||||
|
|
||||||
import javax.servlet.http.HttpServletRequest;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
|
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
|
||||||
|
@ -15,15 +13,14 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
|
||||||
public class LuceneQueryFactory implements VitroQueryFactory {
|
public class LuceneQueryFactory implements VitroQueryFactory {
|
||||||
|
|
||||||
public static final int MAX_QUERY_LENGTH = 500;
|
public static final int MAX_QUERY_LENGTH = 500;
|
||||||
private String indexDir;
|
private String defaultField;
|
||||||
|
|
||||||
public LuceneQueryFactory(Analyzer analyzer, String indexDir ){
|
|
||||||
this.analyzer = analyzer;
|
|
||||||
this.indexDir = indexDir;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Analyzer analyzer = null;
|
private Analyzer analyzer = null;
|
||||||
|
|
||||||
|
public LuceneQueryFactory(Analyzer analyzer, String defaultField ){
|
||||||
|
this.analyzer = analyzer;
|
||||||
|
this.defaultField = defaultField;
|
||||||
|
}
|
||||||
|
|
||||||
public VitroQuery getQuery(VitroRequest request, PortalFlag portalState) throws SearchException {
|
public VitroQuery getQuery(VitroRequest request, PortalFlag portalState) throws SearchException {
|
||||||
//there should be a better way to integrate this with LuceneQuery
|
//there should be a better way to integrate this with LuceneQuery
|
||||||
//here we check that the request has the parameters that we need to
|
//here we check that the request has the parameters that we need to
|
||||||
|
@ -34,7 +31,7 @@ public class LuceneQueryFactory implements VitroQueryFactory {
|
||||||
if( txt.length() > MAX_QUERY_LENGTH )
|
if( txt.length() > MAX_QUERY_LENGTH )
|
||||||
throw new SearchException("The search was too long. The maximum " +
|
throw new SearchException("The search was too long. The maximum " +
|
||||||
"query length is " + MAX_QUERY_LENGTH );
|
"query length is " + MAX_QUERY_LENGTH );
|
||||||
LuceneQuery query = new LuceneQuery(request, portalState, analyzer, indexDir);
|
LuceneQuery query = new LuceneQuery(request, portalState, analyzer, defaultField );
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -14,6 +15,9 @@ import javax.servlet.ServletContextEvent;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
|
||||||
import com.hp.hpl.jena.ontology.OntModel;
|
import com.hp.hpl.jena.ontology.OntModel;
|
||||||
|
@ -88,7 +92,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
// the queries need to know the analyzer to use so that the same one can be used
|
// the queries need to know the analyzer to use so that the same one can be used
|
||||||
// to analyze the fields in the incoming user query terms.
|
// to analyze the fields in the incoming user query terms.
|
||||||
LuceneSearcher searcher = new LuceneSearcher(
|
LuceneSearcher searcher = new LuceneSearcher(
|
||||||
new LuceneQueryFactory(getAnalyzer(), indexDir),
|
new LuceneQueryFactory(getAnalyzer(), Entity2LuceneDoc.term.ALLTEXT),
|
||||||
indexDir);
|
indexDir);
|
||||||
searcher.addObj2Doc(new Entity2LuceneDoc());
|
searcher.addObj2Doc(new Entity2LuceneDoc());
|
||||||
context.setAttribute(Searcher.class.getName(), searcher);
|
context.setAttribute(Searcher.class.getName(), searcher);
|
||||||
|
@ -186,11 +190,15 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
*
|
*
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private Analyzer getAnalyzer() {
|
public Analyzer getAnalyzer() {
|
||||||
return new VitroAnalyzer();
|
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
|
||||||
|
analyzer.addAnalyzer(Entity2LuceneDoc.term.ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||||
|
analyzer.addAnalyzer(Entity2LuceneDoc.term.NAME, new HtmlLowerStopStemAnalyzer());
|
||||||
|
analyzer.addAnalyzer(Entity2LuceneDoc.term.ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
|
analyzer.addAnalyzer(Entity2LuceneDoc.term.NAME, new HtmlLowerStopAnalyzer());
|
||||||
|
return analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public static final String ANALYZER= "lucene.analyzer";
|
public static final String ANALYZER= "lucene.analyzer";
|
||||||
public static final String INDEX_DIR = "lucene.indexDir";
|
public static final String INDEX_DIR = "lucene.indexDir";
|
||||||
public static final String SEARCH_DATAPROPERTY_BLACKLIST =
|
public static final String SEARCH_DATAPROPERTY_BLACKLIST =
|
||||||
|
|
|
@ -79,7 +79,7 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
||||||
// the queries need to know the analyzer to use so that the same one can be used
|
// the queries need to know the analyzer to use so that the same one can be used
|
||||||
// to analyze the fields in the incoming user query terms.
|
// to analyze the fields in the incoming user query terms.
|
||||||
LuceneSearcher searcher = new LuceneSearcher(
|
LuceneSearcher searcher = new LuceneSearcher(
|
||||||
new LuceneQueryFactory(getAnalyzer(), indexDir),
|
new LuceneQueryFactory(getAnalyzer(), Entity2LuceneDoc.term.ALLTEXT),
|
||||||
indexDir);
|
indexDir);
|
||||||
searcher.addObj2Doc(new Entity2LuceneDoc());
|
searcher.addObj2Doc(new Entity2LuceneDoc());
|
||||||
context.setAttribute(Searcher.class.getName(), searcher);
|
context.setAttribute(Searcher.class.getName(), searcher);
|
||||||
|
|
|
@ -24,7 +24,8 @@ public class VitroAnalyzer extends Analyzer {
|
||||||
if( Entity2LuceneDoc.term.ALLTEXT.equals(field) ||
|
if( Entity2LuceneDoc.term.ALLTEXT.equals(field) ||
|
||||||
Entity2LuceneDoc.term.NAME.equals(field) )
|
Entity2LuceneDoc.term.NAME.equals(field) )
|
||||||
return stemmingAnalyzer.tokenStream(field, reader);
|
return stemmingAnalyzer.tokenStream(field, reader);
|
||||||
else if( Entity2LuceneDoc.term.ALLTEXTUNSTEMMED.equals(field) )
|
else if( Entity2LuceneDoc.term.ALLTEXTUNSTEMMED.equals(field) ||
|
||||||
|
Entity2LuceneDoc.term.NAMEUNSTEMMED.equals(field) )
|
||||||
return nonStemmingAnalyzer.tokenStream(field, reader);
|
return nonStemmingAnalyzer.tokenStream(field, reader);
|
||||||
else{
|
else{
|
||||||
return keywordAnalyzer.tokenStream(field, reader);
|
return keywordAnalyzer.tokenStream(field, reader);
|
||||||
|
|
|
@ -60,8 +60,6 @@ public class JenaDataSourceSetup extends JenaDataSourceSetupBase implements java
|
||||||
unionOms.setUserAccountsModel(userAccountsModel);
|
unionOms.setUserAccountsModel(userAccountsModel);
|
||||||
|
|
||||||
OntModel displayModel = ontModelFromContextAttribute(sce.getServletContext(),"displayOntModel");
|
OntModel displayModel = ontModelFromContextAttribute(sce.getServletContext(),"displayOntModel");
|
||||||
OntModel displayUnionModel = ModelFactory.createOntologyModel(MEM_ONT_MODEL_SPEC,ModelFactory.createUnion(displayModel, unionModel));
|
|
||||||
sce.getServletContext().setAttribute("displayOntModel", displayUnionModel);
|
|
||||||
baseOms.setDisplayModel(displayModel);
|
baseOms.setDisplayModel(displayModel);
|
||||||
inferenceOms.setDisplayModel(displayModel);
|
inferenceOms.setDisplayModel(displayModel);
|
||||||
unionOms.setDisplayModel(displayModel);
|
unionOms.setDisplayModel(displayModel);
|
||||||
|
|
Loading…
Add table
Reference in a new issue