Upgraded lucene-2.4-*.jar to lucene-2.9.3-*.jar. Upgraded solr-1.0.jar to apache-solr-core-1.4.1.jar. Removed LuceneHighlighter and SimpleLuceneHighlighter as they are not used anymore. Made minor changes to the way Query is parsed in PagedSearchController. The query now uses a MultiFieldQueryParser that searches the query against ALLTEXT, NAME and TYPE fields in the search index.
This commit is contained in:
parent
af8ce43e16
commit
801d789696
15 changed files with 67 additions and 242 deletions
BIN
webapp/lib/apache-solr-core-1.4.1.jar
Normal file
BIN
webapp/lib/apache-solr-core-1.4.1.jar
Normal file
Binary file not shown.
Binary file not shown.
BIN
webapp/lib/lucene-analyzers-2.9.3.jar
Normal file
BIN
webapp/lib/lucene-analyzers-2.9.3.jar
Normal file
Binary file not shown.
Binary file not shown.
BIN
webapp/lib/lucene-core-2.9.3.jar
Normal file
BIN
webapp/lib/lucene-core-2.9.3.jar
Normal file
Binary file not shown.
Binary file not shown.
BIN
webapp/lib/lucene-highlighter-2.9.3.jar
Normal file
BIN
webapp/lib/lucene-highlighter-2.9.3.jar
Normal file
Binary file not shown.
Binary file not shown.
|
@ -41,7 +41,7 @@ public interface Searcher {
|
||||||
* @param q
|
* @param q
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public abstract VitroHighlighter getHighlighter(VitroQuery q);
|
// public abstract VitroHighlighter getHighlighter(VitroQuery q);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used to close the searcher if the index that it was using gets
|
* Used to close the searcher if the index that it was using gets
|
||||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.queryParser.MultiFieldQueryParser;
|
||||||
import org.apache.lucene.queryParser.ParseException;
|
import org.apache.lucene.queryParser.ParseException;
|
||||||
import org.apache.lucene.queryParser.QueryParser;
|
import org.apache.lucene.queryParser.QueryParser;
|
||||||
import org.apache.lucene.search.BooleanClause;
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
@ -69,7 +70,6 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.SimpleLuceneHighlighter;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.FlagMathUtils;
|
import edu.cornell.mannlib.vitro.webapp.utils.FlagMathUtils;
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.Html2Text;
|
import edu.cornell.mannlib.vitro.webapp.utils.Html2Text;
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.StringUtils;
|
import edu.cornell.mannlib.vitro.webapp.utils.StringUtils;
|
||||||
|
@ -157,6 +157,9 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected ResponseValues processRequest(VitroRequest vreq) {
|
protected ResponseValues processRequest(VitroRequest vreq) {
|
||||||
|
|
||||||
|
log.info("All parameters present in the request: "+ vreq.getParameterMap().toString());
|
||||||
|
|
||||||
//There may be other non-html formats in the future
|
//There may be other non-html formats in the future
|
||||||
Format format = getFormat(vreq);
|
Format format = getFormat(vreq);
|
||||||
boolean wasXmlRequested = Format.XML == format;
|
boolean wasXmlRequested = Format.XML == format;
|
||||||
|
@ -178,6 +181,10 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
VClassDao vclassDao = vreq.getWebappDaoFactory().getVClassDao();
|
VClassDao vclassDao = vreq.getWebappDaoFactory().getVClassDao();
|
||||||
String alphaFilter = vreq.getParameter("alpha");
|
String alphaFilter = vreq.getParameter("alpha");
|
||||||
|
|
||||||
|
|
||||||
|
log.info("IndividualDao is " + iDao.toString() + " Public classes in the classgroup are " + grpDao.getPublicGroupsWithVClasses().toString());
|
||||||
|
log.info("VClassDao is "+ vclassDao.toString() );
|
||||||
|
|
||||||
int startIndex = 0;
|
int startIndex = 0;
|
||||||
try{
|
try{
|
||||||
startIndex = Integer.parseInt(vreq.getParameter("startIndex"));
|
startIndex = Integer.parseInt(vreq.getParameter("startIndex"));
|
||||||
|
@ -206,6 +213,8 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
|
String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
|
||||||
Analyzer analyzer = getAnalyzer(getServletContext());
|
Analyzer analyzer = getAnalyzer(getServletContext());
|
||||||
|
|
||||||
|
log.info("Query text is "+ qtxt + " Analyzer is "+ analyzer.toString());
|
||||||
|
|
||||||
Query query = null;
|
Query query = null;
|
||||||
try {
|
try {
|
||||||
query = getQuery(vreq, portalFlag, analyzer, qtxt);
|
query = getQuery(vreq, portalFlag, analyzer, qtxt);
|
||||||
|
@ -218,6 +227,8 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
|
|
||||||
TopDocs topDocs = null;
|
TopDocs topDocs = null;
|
||||||
try{
|
try{
|
||||||
|
log.info("Searching for query term in the Index with maxHitSize "+ maxHitSize);
|
||||||
|
log.info("Query is "+ query.toString());
|
||||||
topDocs = searcherForRequest.search(query,null,maxHitSize);
|
topDocs = searcherForRequest.search(query,null,maxHitSize);
|
||||||
}catch(Throwable t){
|
}catch(Throwable t){
|
||||||
log.error("in first pass at search: " + t);
|
log.error("in first pass at search: " + t);
|
||||||
|
@ -241,7 +252,9 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
return doFailedSearch(msg, qtxt,format);
|
return doFailedSearch(msg, qtxt,format);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int hitsLength = topDocs.scoreDocs.length;
|
int hitsLength = topDocs.scoreDocs.length;
|
||||||
|
log.info("No. of hits "+ hitsLength);
|
||||||
if ( hitsLength < 1 ){
|
if ( hitsLength < 1 ){
|
||||||
return doNoHits(qtxt,format);
|
return doNoHits(qtxt,format);
|
||||||
}
|
}
|
||||||
|
@ -260,6 +273,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
if( (i >= startIndex) && (i <= lastHitToShow) ){
|
if( (i >= startIndex) && (i <= lastHitToShow) ){
|
||||||
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc);
|
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc);
|
||||||
String uri = doc.get(Entity2LuceneDoc.term.URI);
|
String uri = doc.get(Entity2LuceneDoc.term.URI);
|
||||||
|
log.info("Retrieving entity with uri "+ uri);
|
||||||
Individual ent = new IndividualImpl();
|
Individual ent = new IndividualImpl();
|
||||||
ent.setURI(uri);
|
ent.setURI(uri);
|
||||||
ent = iDao.getIndividualByURI(uri);
|
ent = iDao.getIndividualByURI(uri);
|
||||||
|
@ -582,11 +596,20 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
"query length is " + MAX_QUERY_LENGTH );
|
"query length is " + MAX_QUERY_LENGTH );
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.info("Parsing query using QueryParser ");
|
||||||
|
|
||||||
QueryParser parser = getQueryParser(analyzer);
|
QueryParser parser = getQueryParser(analyzer);
|
||||||
query = parser.parse(querystr);
|
query = parser.parse(querystr);
|
||||||
|
|
||||||
String alpha = request.getParameter("alpha");
|
String alpha = request.getParameter("alpha");
|
||||||
|
|
||||||
|
|
||||||
if( alpha != null && !"".equals(alpha) && alpha.length() == 1){
|
if( alpha != null && !"".equals(alpha) && alpha.length() == 1){
|
||||||
|
|
||||||
|
log.info("Firing alpha query ");
|
||||||
|
log.info("request.getParameter(alpha) is " + alpha);
|
||||||
|
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
boolQuery.add( query, BooleanClause.Occur.MUST );
|
boolQuery.add( query, BooleanClause.Occur.MUST );
|
||||||
boolQuery.add(
|
boolQuery.add(
|
||||||
|
@ -598,6 +621,10 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
//check if this is classgroup filtered
|
//check if this is classgroup filtered
|
||||||
Object param = request.getParameter("classgroup");
|
Object param = request.getParameter("classgroup");
|
||||||
if( param != null && !"".equals(param)){
|
if( param != null && !"".equals(param)){
|
||||||
|
|
||||||
|
log.info("Firing classgroup query ");
|
||||||
|
log.info("request.getParameter(classgroup) is "+ param.toString());
|
||||||
|
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
boolQuery.add( query, BooleanClause.Occur.MUST);
|
boolQuery.add( query, BooleanClause.Occur.MUST);
|
||||||
boolQuery.add( new TermQuery(
|
boolQuery.add( new TermQuery(
|
||||||
|
@ -610,6 +637,9 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
//check if this is rdf:type filtered
|
//check if this is rdf:type filtered
|
||||||
param = request.getParameter("type");
|
param = request.getParameter("type");
|
||||||
if( param != null && !"".equals(param)){
|
if( param != null && !"".equals(param)){
|
||||||
|
log.info("Firing type query ");
|
||||||
|
log.info("request.getParameter(type) is "+ param.toString());
|
||||||
|
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
boolQuery.add( query, BooleanClause.Occur.MUST);
|
boolQuery.add( query, BooleanClause.Occur.MUST);
|
||||||
boolQuery.add( new TermQuery(
|
boolQuery.add( new TermQuery(
|
||||||
|
@ -623,6 +653,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
//it by making a BooelanQuery.
|
//it by making a BooelanQuery.
|
||||||
Query flagQuery = makeFlagQuery( portalState );
|
Query flagQuery = makeFlagQuery( portalState );
|
||||||
if( flagQuery != null ){
|
if( flagQuery != null ){
|
||||||
|
log.info("Firing Flag query ");
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
boolQuery.add( query, BooleanClause.Occur.MUST);
|
boolQuery.add( query, BooleanClause.Occur.MUST);
|
||||||
boolQuery.add( flagQuery, BooleanClause.Occur.MUST);
|
boolQuery.add( flagQuery, BooleanClause.Occur.MUST);
|
||||||
|
@ -646,13 +677,16 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
//indicated in the query string.
|
//indicated in the query string.
|
||||||
//The analyzer is needed so that we use the same analyzer on the search queries as
|
//The analyzer is needed so that we use the same analyzer on the search queries as
|
||||||
//was used on the text that was indexed.
|
//was used on the text that was indexed.
|
||||||
QueryParser qp = new QueryParser(defaultSearchField,analyzer);
|
//QueryParser qp = new QueryParser("NAME",analyzer);
|
||||||
//this sets the query parser to AND all of the query terms it finds.
|
//this sets the query parser to AND all of the query terms it finds.
|
||||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
//qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||||
//set up the map of stemmed field names -> unstemmed field names
|
//set up the map of stemmed field names -> unstemmed field names
|
||||||
// HashMap<String,String> map = new HashMap<String, String>();
|
// HashMap<String,String> map = new HashMap<String, String>();
|
||||||
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
|
||||||
// qp.setStemmedToUnstemmed(map);
|
// qp.setStemmedToUnstemmed(map);
|
||||||
|
|
||||||
|
MultiFieldQueryParser qp = new MultiFieldQueryParser(new String[]{"ALLTEXT", "name", "type"}, analyzer);
|
||||||
|
|
||||||
return qp;
|
return qp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -149,16 +149,21 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
if( clz.getSearchBoost() != null )
|
if( clz.getSearchBoost() != null )
|
||||||
doc.setBoost( doc.getBoost() + clz.getSearchBoost() );
|
doc.setBoost( doc.getBoost() + clz.getSearchBoost() );
|
||||||
|
|
||||||
doc.add( new Field(term.RDFTYPE, clz.getURI(),
|
Field typeField = new Field (term.RDFTYPE, clz.getName(), Field.Store.YES, Field.Index.ANALYZED);
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
typeField.setBoost(2*FIELD_BOOST);
|
||||||
|
|
||||||
|
doc.add( typeField);
|
||||||
|
|
||||||
if( clz.getName() != null )
|
if( clz.getName() != null )
|
||||||
classPublicNames = classPublicNames + " " + clz.getName();
|
classPublicNames = classPublicNames + " " + clz.getName();
|
||||||
|
|
||||||
//Classgroup URI
|
//Classgroup URI
|
||||||
if( clz.getGroupURI() != null )
|
if( clz.getGroupURI() != null ){
|
||||||
doc.add( new Field(term.CLASSGROUP_URI, clz.getGroupURI(),
|
Field classGroupField = new Field(term.CLASSGROUP_URI, clz.getGroupURI(),
|
||||||
Field.Store.YES, Field.Index.NOT_ANALYZED));
|
Field.Store.YES, Field.Index.ANALYZED);
|
||||||
|
classGroupField.setBoost(FIELD_BOOST);
|
||||||
|
doc.add(classGroupField);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0",
|
doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0",
|
||||||
|
@ -184,7 +189,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
value = ent.getLocalName();
|
value = ent.getLocalName();
|
||||||
}
|
}
|
||||||
Field name =new Field(term.NAME, value,
|
Field name =new Field(term.NAME, value,
|
||||||
Field.Store.NO, Field.Index.ANALYZED);
|
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES);
|
||||||
name.setBoost( NAME_BOOST );
|
name.setBoost( NAME_BOOST );
|
||||||
doc.add( name );
|
doc.add( name );
|
||||||
|
|
||||||
|
@ -308,9 +313,9 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//stemmed terms
|
//stemmed terms
|
||||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||||
//unstemmed terms
|
//unstemmed terms
|
||||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||||
}
|
}
|
||||||
|
|
||||||
//flagX and portal flags are no longer indexed.
|
//flagX and portal flags are no longer indexed.
|
||||||
|
@ -359,6 +364,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static float NAME_BOOST = 10;
|
public static float NAME_BOOST = 3.0F;
|
||||||
public static float KEYWORD_BOOST = 2;
|
public static float KEYWORD_BOOST = 2.0F;
|
||||||
|
public static float FIELD_BOOST = 1.0F;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,135 +0,0 @@
|
||||||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.highlight.Formatter;
|
|
||||||
import org.apache.lucene.search.highlight.Highlighter;
|
|
||||||
import org.apache.lucene.search.highlight.NullFragmenter;
|
|
||||||
import org.apache.lucene.search.highlight.QueryScorer;
|
|
||||||
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
|
|
||||||
|
|
||||||
public class LuceneHighlighter extends VitroHighlighter{
|
|
||||||
/* See VitroHighlighter for prefix tag and postfix tag */
|
|
||||||
|
|
||||||
Highlighter nonFragHighlighter = null;
|
|
||||||
Highlighter fragHighlighter = null;
|
|
||||||
|
|
||||||
Analyzer analyzer = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Makes a VitroHighlighter that uses lucene highlighters.
|
|
||||||
* PreTag and PostTag are from VitroHighlighter.
|
|
||||||
*
|
|
||||||
* @param query - the query to highlight for.
|
|
||||||
* @param a - the Analyzer that was used in the query.
|
|
||||||
*/
|
|
||||||
public LuceneHighlighter(Query query, Analyzer a){
|
|
||||||
QueryScorer scorer = new QueryScorer( query );
|
|
||||||
/* See VitroHighlighter for prefix tag and postfix tag */
|
|
||||||
Formatter formatter =
|
|
||||||
new SimpleHTMLFormatter(preTag,postTag);
|
|
||||||
this.analyzer = a;
|
|
||||||
this.fragHighlighter = new Highlighter(formatter, scorer);
|
|
||||||
|
|
||||||
//here we make a highlighter that doesn't fragment
|
|
||||||
this.nonFragHighlighter = new Highlighter( formatter, scorer);
|
|
||||||
this.nonFragHighlighter.setTextFragmenter(new NullFragmenter());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
private Pattern htmlOrNot = Pattern.compile("(<[^>]*>)|([^<]*)");
|
|
||||||
private int HTML_PATTERN_INDEX = 1;
|
|
||||||
private int TEXT_PATTERN_INDEX = 2;
|
|
||||||
/**
|
|
||||||
* Highlights in a string. No Fragmenting. Attempts to avoid some HTML.
|
|
||||||
* @param in
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public String highlight( String in){
|
|
||||||
Matcher matcher = htmlOrNot.matcher(in);
|
|
||||||
StringBuilder output = new StringBuilder();
|
|
||||||
|
|
||||||
boolean found = matcher.find();
|
|
||||||
if( ! found )
|
|
||||||
return in;
|
|
||||||
|
|
||||||
while( found ){
|
|
||||||
String foundHtmlElement = matcher.group( HTML_PATTERN_INDEX );
|
|
||||||
if( foundHtmlElement != null ){
|
|
||||||
output.append( foundHtmlElement );
|
|
||||||
}else{
|
|
||||||
String foundTextNode = matcher.group( TEXT_PATTERN_INDEX );
|
|
||||||
String hi = foundTextNode;
|
|
||||||
try {
|
|
||||||
hi = nonFragHighlighter.getBestFragment(analyzer,"contents",foundTextNode);
|
|
||||||
} catch (IOException e) {
|
|
||||||
return in;
|
|
||||||
}
|
|
||||||
if( hi != null )
|
|
||||||
output.append( hi );
|
|
||||||
else
|
|
||||||
output.append( foundTextNode );
|
|
||||||
}
|
|
||||||
found = matcher.find();
|
|
||||||
}
|
|
||||||
return output.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
protected boolean WITH_ELLIPSIS = true;
|
|
||||||
protected String ellipsis = "...";
|
|
||||||
public String getHighlightFragments(String in ) {
|
|
||||||
|
|
||||||
if(WITH_ELLIPSIS ){
|
|
||||||
if( in != null && in.trim().length() > 0){
|
|
||||||
String b = doHighlight( in ,fragHighlighter);
|
|
||||||
if( b != null && b.trim().length() > 0 )
|
|
||||||
return ellipsis + " " + b + " " + ellipsis;
|
|
||||||
else
|
|
||||||
return "";
|
|
||||||
} else {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return doHighlight( in , fragHighlighter);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String doHighlight(String in, Highlighter hi ) {
|
|
||||||
String result = in;
|
|
||||||
|
|
||||||
if(in != null ){
|
|
||||||
|
|
||||||
|
|
||||||
TokenStream tokenStream =
|
|
||||||
analyzer.tokenStream("contents", new StringReader(in));
|
|
||||||
// Get 3 best fragments and seperate with a "..."
|
|
||||||
try {
|
|
||||||
result = hi.getBestFragments(tokenStream, in , 3, "...");
|
|
||||||
} catch (IOException e) {
|
|
||||||
// TODO Auto-generated catch block
|
|
||||||
e.printStackTrace();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private final int maxDocCharsToAnalyze = 4000;
|
|
||||||
Log log = LogFactory.getLog(LuceneHighlighter.class);
|
|
||||||
}
|
|
|
@ -306,7 +306,7 @@ public class LuceneSearcher implements Searcher {
|
||||||
* we need to 'rewrite' the query. That takes any wild cards
|
* we need to 'rewrite' the query. That takes any wild cards
|
||||||
* and replaces them will all terms that are found in the index.
|
* and replaces them will all terms that are found in the index.
|
||||||
*/
|
*/
|
||||||
public VitroHighlighter getHighlighter(VitroQuery queryIn){
|
/* public VitroHighlighter getHighlighter(VitroQuery queryIn){
|
||||||
if( ! (queryIn instanceof LuceneQuery) ){
|
if( ! (queryIn instanceof LuceneQuery) ){
|
||||||
log.error("LuceneSearcher expects to get a LuceneQuery");
|
log.error("LuceneSearcher expects to get a LuceneQuery");
|
||||||
throw new Error("LuceneSearcher expects to get a LuceneQuery");
|
throw new Error("LuceneSearcher expects to get a LuceneQuery");
|
||||||
|
@ -327,6 +327,6 @@ public class LuceneSearcher implements Searcher {
|
||||||
log.error(e, e);
|
log.error(e, e);
|
||||||
}
|
}
|
||||||
return (VitroHighlighter)highlighter;
|
return (VitroHighlighter)highlighter;
|
||||||
}
|
}*/
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||||
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
|
import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
|
||||||
|
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
|
||||||
import com.hp.hpl.jena.ontology.OntModel;
|
import com.hp.hpl.jena.ontology.OntModel;
|
||||||
|
@ -229,10 +230,12 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
*/
|
*/
|
||||||
private Analyzer getAnalyzer() {
|
private Analyzer getAnalyzer() {
|
||||||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
|
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new KeywordAnalyzer());
|
||||||
|
// PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer());
|
||||||
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||||
analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
// analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
||||||
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
|
analyzer.addAnalyzer(NAME, new KeywordAnalyzer());
|
||||||
return analyzer;
|
return analyzer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,83 +0,0 @@
|
||||||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
|
||||||
|
|
||||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.StringReader;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
|
||||||
import org.apache.commons.logging.LogFactory;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.highlight.Formatter;
|
|
||||||
import org.apache.lucene.search.highlight.Highlighter;
|
|
||||||
import org.apache.lucene.search.highlight.NullFragmenter;
|
|
||||||
import org.apache.lucene.search.highlight.QueryScorer;
|
|
||||||
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.utils.Html2Text;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a highlighter and fragmenter for use with PagedSearchController.
|
|
||||||
*/
|
|
||||||
public class SimpleLuceneHighlighter extends VitroHighlighter{
|
|
||||||
Highlighter fragHighlighter = null;
|
|
||||||
Analyzer analyzer = null;
|
|
||||||
|
|
||||||
public SimpleLuceneHighlighter(Query query, Analyzer a){
|
|
||||||
QueryScorer scorer = new QueryScorer( query ,Entity2LuceneDoc.term.ALLTEXT);
|
|
||||||
|
|
||||||
Formatter formatter =
|
|
||||||
new SimpleHTMLFormatter(preTag,postTag);
|
|
||||||
this.analyzer = a;
|
|
||||||
this.fragHighlighter = new Highlighter(formatter, scorer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String highlight( String in){
|
|
||||||
//not really implemented.
|
|
||||||
return in;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getHighlightFragments(String in ) {
|
|
||||||
Html2Text h2t = new Html2Text();
|
|
||||||
try{
|
|
||||||
h2t.parse(in);
|
|
||||||
}catch(IOException ioe){
|
|
||||||
log.debug("could not strip html from string",ioe);
|
|
||||||
}
|
|
||||||
String txt = h2t.getText();
|
|
||||||
|
|
||||||
if( txt != null && txt.trim().length() > 0){
|
|
||||||
String b = doHighlight( txt ,fragHighlighter);
|
|
||||||
if( b != null && b.trim().length() > 0 )
|
|
||||||
return "..." + " " + b + " " + "...";
|
|
||||||
else
|
|
||||||
return "";
|
|
||||||
} else {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private String doHighlight(String in, Highlighter hi ) {
|
|
||||||
String result = in;
|
|
||||||
if(in != null ){
|
|
||||||
TokenStream tokenStream =
|
|
||||||
analyzer.tokenStream(Entity2LuceneDoc.term.ALLTEXT, new StringReader(in));
|
|
||||||
try {
|
|
||||||
//Get 3 best fragments and seperate with a "..."
|
|
||||||
result = hi.getBestFragments(tokenStream, in , 3, "...");
|
|
||||||
} catch (IOException e) {
|
|
||||||
log.debug("could not highlight",e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Log log = LogFactory.getLog(SimpleLuceneHighlighter.class);
|
|
||||||
}
|
|
Loading…
Add table
Add a link
Reference in a new issue