NIHVIVO-2459 Work on SolrAutocompleteController (not working yet). Refactoring VitroLuceneTermNames rdfs:label field names to clarify what the fields are. Restored 1.2 Lucene analysis to name fields.
This commit is contained in:
parent
39622d8b2b
commit
702f904e75
11 changed files with 184 additions and 240 deletions
|
@ -427,16 +427,16 @@
|
|||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="ture" multiValued="true"/>
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||
<field name="name" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="nameunstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameunanalyzed" type="string" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameraw" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="nameRaw" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="nameLowercase" type="string" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameUnstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
|
|
|
@ -29,6 +29,7 @@ import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
|||
import com.hp.hpl.jena.vocabulary.RDF;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.web.ContentType;
|
||||
|
||||
|
@ -74,13 +75,13 @@ public void doGet (HttpServletRequest req, HttpServletResponse res) throws IOExc
|
|||
String classUri = (String) getServletContext().getAttribute("classuri");
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
query.add(
|
||||
new TermQuery( new Term(Entity2LuceneDoc.term.RDFTYPE, classUri)),
|
||||
new TermQuery( new Term(VitroLuceneTermNames.RDFTYPE, classUri)),
|
||||
BooleanClause.Occur.MUST );
|
||||
|
||||
IndexSearcher index = LuceneIndexFactory.getIndexSearcher(getServletContext());
|
||||
TopDocs docs = index.search(query, null,
|
||||
ENTITY_LIST_CONTROLLER_MAX_RESULTS,
|
||||
new Sort(Entity2LuceneDoc.term.NAMELOWERCASE));
|
||||
new Sort(VitroLuceneTermNames.NAME_LOWERCASE));
|
||||
|
||||
if( docs == null ){
|
||||
log.error("Search of lucene index returned null");
|
||||
|
@ -97,7 +98,7 @@ public void doGet (HttpServletRequest req, HttpServletResponse res) throws IOExc
|
|||
if (hit != null) {
|
||||
Document doc = index.doc(hit.doc);
|
||||
if (doc != null) {
|
||||
String uri = doc.getField(Entity2LuceneDoc.term.URI).stringValue();
|
||||
String uri = doc.getField(VitroLuceneTermNames.URI).stringValue();
|
||||
resource = ResourceFactory.createResource(uri);
|
||||
node = (RDFNode) ResourceFactory.createResource(classUri);
|
||||
model.add(resource, RDF.type, node);
|
||||
|
|
|
@ -377,7 +377,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
|
|||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED;
|
||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
|
||||
|
@ -415,7 +415,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
|
|||
private Query makeUntokenizedNameQuery(String querystr) {
|
||||
|
||||
querystr = querystr.toLowerCase();
|
||||
String termName = VitroLuceneTermNames.NAMELOWERCASE;
|
||||
String termName = VitroLuceneTermNames.NAME_LOWERCASE;
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
log.debug("Adding wildcard query on unanalyzed name");
|
||||
query.add(
|
||||
|
|
|
@ -188,7 +188,7 @@ public class IndividualListController extends FreemarkerHttpServlet {
|
|||
try{
|
||||
docs = index.search(query, null,
|
||||
ENTITY_LIST_CONTROLLER_MAX_RESULTS,
|
||||
new Sort(Entity2LuceneDoc.term.NAMELOWERCASE));
|
||||
new Sort(Entity2LuceneDoc.term.NAME_LOWERCASE));
|
||||
}catch(Throwable th){
|
||||
log.error("Could not run search. " + th.getMessage());
|
||||
docs = null;
|
||||
|
@ -258,7 +258,7 @@ public class IndividualListController extends FreemarkerHttpServlet {
|
|||
Query alphaQuery = null;
|
||||
if( alpha != null && !"".equals(alpha) && alpha.length() == 1){
|
||||
alphaQuery =
|
||||
new PrefixQuery(new Term(Entity2LuceneDoc.term.NAMELOWERCASE, alpha.toLowerCase()));
|
||||
new PrefixQuery(new Term(Entity2LuceneDoc.term.NAME_LOWERCASE, alpha.toLowerCase()));
|
||||
query.add(alphaQuery,BooleanClause.Occur.MUST);
|
||||
}
|
||||
|
||||
|
|
|
@ -30,8 +30,7 @@ import org.apache.lucene.search.TopDocs;
|
|||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.json.JSONArray;
|
||||
|
||||
import com.hp.hpl.jena.sparql.lib.org.json.JSONObject;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
|
||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
|
||||
|
@ -118,7 +117,7 @@ public class AutocompleteController extends VitroAjaxController {
|
|||
try{
|
||||
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc);
|
||||
String uri = doc.get(VitroLuceneTermNames.URI);
|
||||
String name = doc.get(VitroLuceneTermNames.NAMERAW);
|
||||
String name = doc.get(VitroLuceneTermNames.NAME_RAW);
|
||||
SearchResult result = new SearchResult(name, uri);
|
||||
results.add(result);
|
||||
} catch(Exception e){
|
||||
|
@ -208,7 +207,7 @@ public class AutocompleteController extends VitroAjaxController {
|
|||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED;
|
||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
|
||||
|
@ -245,7 +244,7 @@ public class AutocompleteController extends VitroAjaxController {
|
|||
private Query makeUntokenizedNameQuery(String querystr) {
|
||||
|
||||
querystr = querystr.toLowerCase();
|
||||
String termName = VitroLuceneTermNames.NAMELOWERCASE;
|
||||
String termName = VitroLuceneTermNames.NAME_LOWERCASE;
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
log.debug("Adding wildcard query on unanalyzed name");
|
||||
query.add(
|
||||
|
|
|
@ -65,6 +65,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
|
|||
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
|
||||
|
@ -228,7 +229,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
Document document = searcherForRequest.doc(scoreDoc.doc);
|
||||
Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
|
||||
|
||||
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score);
|
||||
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED) + " score: " +scoreDoc.score);
|
||||
log.debug("Scoring of the doc explained " + explanation.toString());
|
||||
log.debug("Explanation's description "+ explanation.getDescription());
|
||||
log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
|
||||
|
@ -404,7 +405,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
Document doc;
|
||||
try {
|
||||
doc = searcher.doc(topDocs.scoreDocs[i].doc);
|
||||
String name =doc.get(Entity2LuceneDoc.term.NAME);
|
||||
String name =doc.get(Entity2LuceneDoc.term.NAME_STEMMED);
|
||||
if( name != null && name.length() > 0)
|
||||
alphas.add( name.substring(0, 1));
|
||||
} catch (CorruptIndexException e) {
|
||||
|
@ -621,7 +622,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
boolQuery.add( query, BooleanClause.Occur.MUST );
|
||||
boolQuery.add(
|
||||
new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME, alpha+'*')),
|
||||
new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME_STEMMED, alpha+'*')),
|
||||
BooleanClause.Occur.MUST);
|
||||
query = boolQuery;
|
||||
}
|
||||
|
@ -682,7 +683,15 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
// qp.setStemmedToUnstemmed(map);
|
||||
|
||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
|
||||
"name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
|
||||
VitroLuceneTermNames.NAME_STEMMED,
|
||||
VitroLuceneTermNames.NAME_UNSTEMMED,
|
||||
VitroLuceneTermNames.RDFTYPE,
|
||||
VitroLuceneTermNames.MONIKER,
|
||||
VitroLuceneTermNames.ALLTEXT,
|
||||
VitroLuceneTermNames.ALLTEXTUNSTEMMED,
|
||||
VitroLuceneTermNames.NAME_RAW,
|
||||
VitroLuceneTermNames.CLASSLOCALNAME,
|
||||
VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
|
||||
|
||||
// QueryParser qp = new QueryParser(Version.LUCENE_29, "name", analyzer);
|
||||
|
||||
|
|
|
@ -9,43 +9,34 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.servlet.ServletContext;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.json.JSONArray;
|
||||
|
||||
import com.hp.hpl.jena.sparql.lib.org.json.JSONObject;
|
||||
import org.json.JSONObject;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
|
||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
|
||||
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
|
||||
import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.SearchException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
|
||||
|
||||
/**
|
||||
* AutocompleteController generates autocomplete content
|
||||
* through a Lucene search.
|
||||
* through a Solr search.
|
||||
*/
|
||||
|
||||
// RY Rename to AutocompleteController once the transition to Solr is complete.
|
||||
public class SolrAutocompleteController extends VitroAjaxController {
|
||||
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
@ -53,10 +44,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
|
||||
//private static final String TEMPLATE_DEFAULT = "autocompleteResults.ftl";
|
||||
|
||||
private static String QUERY_PARAMETER_NAME = "term";
|
||||
private static final String PARAM_QUERY = "term";
|
||||
private static final String PARAM_RDFTYPE = "type";
|
||||
|
||||
String NORESULT_MSG = "";
|
||||
private int defaultMaxSearchSize= 1000;
|
||||
private static final int DEFAULT_MAX_HIT_COUNT = 1000;
|
||||
|
||||
public static final int MAX_QUERY_LENGTH = 500;
|
||||
|
||||
@Override
|
||||
protected Actions requiredActions(VitroRequest vreq) {
|
||||
|
@ -69,12 +63,9 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
|
||||
try {
|
||||
|
||||
int maxHitSize = defaultMaxSearchSize;
|
||||
String qtxt = vreq.getParameter(PARAM_QUERY);
|
||||
|
||||
String qtxt = vreq.getParameter(QUERY_PARAMETER_NAME);
|
||||
Analyzer analyzer = getAnalyzer(getServletContext());
|
||||
|
||||
Query query = getQuery(vreq, analyzer, qtxt);
|
||||
SolrQuery query = getQuery(qtxt, vreq);
|
||||
if (query == null ) {
|
||||
log.debug("query for '" + qtxt +"' is null.");
|
||||
doNoQuery(response);
|
||||
|
@ -82,43 +73,35 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
}
|
||||
log.debug("query for '" + qtxt +"' is " + query.toString());
|
||||
|
||||
IndexSearcher searcherForRequest = LuceneIndexFactory.getIndexSearcher(getServletContext());
|
||||
SolrServer solr = SolrSetup.getSolrServer(getServletContext());
|
||||
QueryResponse queryResponse = solr.query(query);
|
||||
|
||||
TopDocs topDocs = null;
|
||||
try{
|
||||
topDocs = searcherForRequest.search(query,null,maxHitSize);
|
||||
}catch(Throwable t){
|
||||
log.error("in first pass at search: " + t);
|
||||
// this is a hack to deal with odd cases where search and index threads interact
|
||||
try{
|
||||
wait(150);
|
||||
topDocs = searcherForRequest.search(query,null,maxHitSize);
|
||||
}catch (Exception e){
|
||||
log.error(e, e);
|
||||
doNoSearchResults(response);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if( topDocs == null || topDocs.scoreDocs == null){
|
||||
log.error("topDocs for a search was null");
|
||||
if ( queryResponse == null) {
|
||||
log.error("Query response for a search was null");
|
||||
doNoSearchResults(response);
|
||||
return;
|
||||
}
|
||||
|
||||
int hitsLength = topDocs.scoreDocs.length;
|
||||
if ( hitsLength < 1 ){
|
||||
SolrDocumentList docs = queryResponse.getResults();
|
||||
|
||||
if ( docs == null) {
|
||||
log.error("Docs for a search was null");
|
||||
doNoSearchResults(response);
|
||||
return;
|
||||
}
|
||||
|
||||
long hitCount = docs.getNumFound();
|
||||
log.debug("Number of hits = " + hitCount);
|
||||
if ( hitCount < 1 ) {
|
||||
doNoSearchResults(response);
|
||||
return;
|
||||
}
|
||||
log.debug("found "+hitsLength+" hits");
|
||||
|
||||
List<SearchResult> results = new ArrayList<SearchResult>();
|
||||
for(int i=0; i<topDocs.scoreDocs.length ;i++){
|
||||
for (SolrDocument doc : docs) {
|
||||
try{
|
||||
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc);
|
||||
String uri = doc.get(VitroLuceneTermNames.URI);
|
||||
String name = doc.get(VitroLuceneTermNames.NAMERAW);
|
||||
String uri = doc.get(VitroLuceneTermNames.URI).toString();
|
||||
String name = doc.get(VitroLuceneTermNames.NAME_RAW).toString();
|
||||
SearchResult result = new SearchResult(name, uri);
|
||||
results.add(result);
|
||||
} catch(Exception e){
|
||||
|
@ -138,143 +121,115 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
}
|
||||
response.getWriter().write(jsonArray.toString());
|
||||
|
||||
|
||||
} catch (Throwable e) {
|
||||
log.error(e, e);
|
||||
doSearchError(response);
|
||||
}
|
||||
}
|
||||
|
||||
private Analyzer getAnalyzer(ServletContext servletContext) throws SearchException {
|
||||
Object obj = servletContext.getAttribute(LuceneSetup.ANALYZER);
|
||||
if( obj == null || !(obj instanceof Analyzer) )
|
||||
throw new SearchException("Could not get analyzer");
|
||||
else
|
||||
return (Analyzer)obj;
|
||||
}
|
||||
private SolrQuery getQuery(String querystr, VitroRequest vreq) {
|
||||
|
||||
private Query getQuery(VitroRequest vreq, Analyzer analyzer,
|
||||
String querystr) throws SearchException{
|
||||
|
||||
Query query = null;
|
||||
try {
|
||||
if( querystr == null){
|
||||
log.error("There was no Parameter '"+ QUERY_PARAMETER_NAME
|
||||
+"' in the request.");
|
||||
return null;
|
||||
}else if( querystr.length() > MAX_QUERY_LENGTH ){
|
||||
log.debug("The search was too long. The maximum " +
|
||||
"query length is " + MAX_QUERY_LENGTH );
|
||||
return null;
|
||||
}
|
||||
|
||||
query = makeNameQuery(querystr, analyzer, vreq);
|
||||
|
||||
// Filter by type
|
||||
{
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
String typeParam = (String) vreq.getParameter("type");
|
||||
boolQuery.add( new TermQuery(
|
||||
new Term(VitroLuceneTermNames.RDFTYPE,
|
||||
typeParam)),
|
||||
BooleanClause.Occur.MUST);
|
||||
boolQuery.add(query, BooleanClause.Occur.MUST);
|
||||
query = boolQuery;
|
||||
}
|
||||
|
||||
} catch (Exception ex){
|
||||
throw new SearchException(ex.getMessage());
|
||||
if ( querystr == null) {
|
||||
log.error("There was no parameter '"+ PARAM_QUERY
|
||||
+"' in the request.");
|
||||
return null;
|
||||
} else if( querystr.length() > MAX_QUERY_LENGTH ) {
|
||||
log.debug("The search was too long. The maximum " +
|
||||
"query length is " + MAX_QUERY_LENGTH );
|
||||
return null;
|
||||
}
|
||||
|
||||
SolrQuery query = new SolrQuery();
|
||||
query = query.setStart(0);
|
||||
query = query.setRows(DEFAULT_MAX_HIT_COUNT);
|
||||
|
||||
query = setNameQuery(query, querystr, vreq);
|
||||
|
||||
// Filter by type
|
||||
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
|
||||
if (typeParam != null) {
|
||||
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
|
||||
}
|
||||
|
||||
// Set the fields to retrieve **** RY
|
||||
// query = query.setFields( ... );
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
private Query makeNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) {
|
||||
private SolrQuery setNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
|
||||
|
||||
String tokenizeParam = (String) request.getParameter("tokenize");
|
||||
boolean tokenize = "true".equals(tokenizeParam);
|
||||
|
||||
// Note: Stemming is only relevant if we are tokenizing: an untokenized name
|
||||
// query will not be stemmed. So we don't look at the stem parameter until we get to
|
||||
// makeTokenizedNameQuery().
|
||||
// setTokenizedNameQuery().
|
||||
if (tokenize) {
|
||||
return makeTokenizedNameQuery(querystr, analyzer, request);
|
||||
return setTokenizedNameQuery(query, querystr, request);
|
||||
} else {
|
||||
return makeUntokenizedNameQuery(querystr);
|
||||
return setUntokenizedNameQuery(query, querystr);
|
||||
}
|
||||
}
|
||||
|
||||
private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) {
|
||||
private SolrQuery setTokenizedNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
|
||||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED;
|
||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
|
||||
// Use the query parser to analyze the search term the same way the indexed text was analyzed.
|
||||
// For example, text is lowercased, and function words are stripped out.
|
||||
QueryParser parser = getQueryParser(termName, analyzer);
|
||||
|
||||
// The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match
|
||||
// "tales", which is indexed as "tale", while query term name:tales does. Obviously we need
|
||||
// the wildcard for name:tal*, so the only way to get them all to match is use a disjunction
|
||||
// of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
|
||||
// operator: e.g., +(name:tales name:tales*)
|
||||
try {
|
||||
log.debug("Adding non-wildcard query for " + querystr);
|
||||
Query query = parser.parse(querystr);
|
||||
boolQuery.add(query, BooleanClause.Occur.SHOULD);
|
||||
|
||||
// Prevent ParseException here when adding * after a space.
|
||||
// If there's a space at the end, we don't need the wildcard query.
|
||||
if (! querystr.endsWith(" ")) {
|
||||
log.debug("Adding wildcard query for " + querystr);
|
||||
Query wildcardQuery = parser.parse(querystr + "*");
|
||||
boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
log.debug("Name query is: " + boolQuery.toString());
|
||||
} catch (ParseException e) {
|
||||
log.warn(e, e);
|
||||
}
|
||||
|
||||
return boolQuery;
|
||||
}
|
||||
|
||||
private Query makeUntokenizedNameQuery(String querystr) {
|
||||
|
||||
querystr = querystr.toLowerCase();
|
||||
String termName = VitroLuceneTermNames.NAMELOWERCASE;
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
log.debug("Adding wildcard query on unanalyzed name");
|
||||
query.add(
|
||||
new WildcardQuery(new Term(termName, querystr + "*")),
|
||||
BooleanClause.Occur.MUST);
|
||||
// // Use the query parser to analyze the search term the same way the indexed text was analyzed.
|
||||
// // For example, text is lowercased, and function words are stripped out.
|
||||
// QueryParser parser = getQueryParser(termName);
|
||||
//
|
||||
// // The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match
|
||||
// // "tales", which is indexed as "tale", while query term name:tales does. Obviously we need
|
||||
// // the wildcard for name:tal*, so the only way to get them all to match is use a disjunction
|
||||
// // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
|
||||
// // operator: e.g., +(name:tales name:tales*)
|
||||
// try {
|
||||
// log.debug("Adding non-wildcard query for " + querystr);
|
||||
// Query query = parser.parse(querystr);
|
||||
// boolQuery.add(query, BooleanClause.Occur.SHOULD);
|
||||
//
|
||||
// // Prevent ParseException here when adding * after a space.
|
||||
// // If there's a space at the end, we don't need the wildcard query.
|
||||
// if (! querystr.endsWith(" ")) {
|
||||
// log.debug("Adding wildcard query for " + querystr);
|
||||
// Query wildcardQuery = parser.parse(querystr + "*");
|
||||
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
|
||||
// }
|
||||
//
|
||||
// log.debug("Name query is: " + boolQuery.toString());
|
||||
// } catch (ParseException e) {
|
||||
// log.warn(e, e);
|
||||
// }
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
private QueryParser getQueryParser(String searchField, Analyzer analyzer){
|
||||
// searchField indicates which field to search against when there is no term
|
||||
// indicated in the query string.
|
||||
// The analyzer is needed so that we use the same analyzer on the search queries as
|
||||
// was used on the text that was indexed.
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_29, searchField,analyzer);
|
||||
//this sets the query parser to AND all of the query terms it finds.
|
||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||
return qp;
|
||||
private SolrQuery setUntokenizedNameQuery(SolrQuery query, String querystr) {
|
||||
|
||||
//querystr = querystr.toLowerCase();
|
||||
querystr += "*";
|
||||
query = query.setQuery(querystr);
|
||||
// *** It's the df parameter that sets the field to search
|
||||
//String field = VitroLuceneTermNames.LABEL_LOWERCASE;
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
private void doNoQuery(HttpServletResponse response) throws IOException {
|
||||
// For now, we are not sending an error message back to the client because with the default autocomplete configuration it
|
||||
// chokes.
|
||||
// For now, we are not sending an error message back to the client because
|
||||
// with the default autocomplete configuration it chokes.
|
||||
doNoSearchResults(response);
|
||||
}
|
||||
|
||||
private void doSearchError(HttpServletResponse response) throws IOException {
|
||||
// For now, we are not sending an error message back to the client because with the default autocomplete configuration it
|
||||
// chokes.
|
||||
// For now, we are not sending an error message back to the client because
|
||||
// with the default autocomplete configuration it chokes.
|
||||
doNoSearchResults(response);
|
||||
}
|
||||
|
||||
|
@ -282,8 +237,6 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
response.getWriter().write("[]");
|
||||
}
|
||||
|
||||
public static final int MAX_QUERY_LENGTH = 500;
|
||||
|
||||
public class SearchResult implements Comparable<Object> {
|
||||
private String label;
|
||||
private String uri;
|
||||
|
|
|
@ -22,10 +22,6 @@ import javax.servlet.http.HttpServletResponse;
|
|||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.queryParser.MultiFieldQueryParser;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
|
@ -73,9 +69,8 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
private static final long serialVersionUID = 1L;
|
||||
private static final Log log = LogFactory.getLog(SolrPagedSearchController.class);
|
||||
|
||||
|
||||
private static final int DEFAULT_HITS_PER_PAGE = 25;
|
||||
private static final int DEFAULT_MAX_SEARCH_SIZE = 1000;
|
||||
private static final int DEFAULT_MAX_HIT_COUNT = 1000;
|
||||
|
||||
private static final String PARAM_XML_REQUEST = "xml";
|
||||
private static final String PARAM_START_INDEX = "startIndex";
|
||||
|
@ -169,15 +164,15 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
}
|
||||
log.debug("hitsPerPage is " + hitsPerPage);
|
||||
|
||||
int maxHitCount = DEFAULT_MAX_SEARCH_SIZE ;
|
||||
if( startIndex >= DEFAULT_MAX_SEARCH_SIZE - hitsPerPage )
|
||||
maxHitCount = startIndex + DEFAULT_MAX_SEARCH_SIZE ;
|
||||
int maxHitCount = DEFAULT_MAX_HIT_COUNT ;
|
||||
if( startIndex >= DEFAULT_MAX_HIT_COUNT - hitsPerPage )
|
||||
maxHitCount = startIndex + DEFAULT_MAX_HIT_COUNT ;
|
||||
|
||||
log.debug("maxHitSize is " + maxHitCount);
|
||||
|
||||
String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
|
||||
|
||||
log.debug("Query text is "+ qtxt); // + " Analyzer is "+ analyzer.toString());
|
||||
log.debug("Query text is \""+ qtxt + "\"");
|
||||
|
||||
SolrQuery query = getQuery(qtxt, maxHitCount, vreq);
|
||||
|
||||
|
@ -440,29 +435,27 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
|
||||
private SolrQuery getQuery(String queryText, int maxHitCount, VitroRequest vreq) {
|
||||
SolrQuery query = new SolrQuery(queryText);
|
||||
//SolrQuery query = new SolrQuery();
|
||||
//query.setQuery(queryText);
|
||||
|
||||
// Solr requires these values, but we don't want them to be the real values for this page
|
||||
// of results, else the refinement links won't work correctly: each page of results needs to
|
||||
// show refinement links generated for all results, not just for the results on the current page.
|
||||
query.setStart(0);
|
||||
query.setRows(maxHitCount);
|
||||
query.setStart(0)
|
||||
.setRows(maxHitCount);
|
||||
|
||||
// Classgroup filtering
|
||||
Object param = vreq.getParameter(PARAM_CLASSGROUP);
|
||||
if( param != null && !"".equals(param)){
|
||||
String classgroupParam = (String) vreq.getParameter(PARAM_CLASSGROUP);
|
||||
if ( ! StringUtils.isBlank(classgroupParam) ) {
|
||||
log.debug("Firing classgroup query ");
|
||||
log.debug("request.getParameter(classgroup) is "+ param.toString());
|
||||
query = query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + param + "\"");
|
||||
log.debug("request.getParameter(classgroup) is "+ classgroupParam);
|
||||
query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + classgroupParam + "\"");
|
||||
}
|
||||
|
||||
// rdf:type filtering
|
||||
param = vreq.getParameter(PARAM_RDFTYPE);
|
||||
if( param != null && !"".equals(param)){
|
||||
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
|
||||
if ( ! StringUtils.isBlank(typeParam) ) {
|
||||
log.debug("Firing type query ");
|
||||
log.debug("request.getParameter(type) is "+ param.toString());
|
||||
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + param + "\"");
|
||||
log.debug("request.getParameter(type) is "+ typeParam);
|
||||
query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
|
||||
}
|
||||
|
||||
//query.setQuery(queryText);
|
||||
|
@ -470,23 +463,6 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
|
|||
return query;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private QueryParser getQueryParser(Analyzer analyzer){
|
||||
|
||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[] {
|
||||
VitroLuceneTermNames.NAME,
|
||||
VitroLuceneTermNames.NAMEUNSTEMMED,
|
||||
VitroLuceneTermNames.RDFTYPE,
|
||||
VitroLuceneTermNames.ALLTEXT,
|
||||
VitroLuceneTermNames.ALLTEXTUNSTEMMED,
|
||||
VitroLuceneTermNames.NAMERAW,
|
||||
VitroLuceneTermNames.CLASSLOCALNAME,
|
||||
VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
|
||||
//"name", "nameunstemmed", "type", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
|
||||
|
||||
return qp;
|
||||
}
|
||||
|
||||
private class VClassGroupSearchLink extends LinkTemplateModel {
|
||||
|
||||
VClassGroupSearchLink(String querytext, VClassGroup classgroup) {
|
||||
|
|
|
@ -45,14 +45,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
public static String CLASSGROUP_URI = "classgroup";
|
||||
/** Modtime from db */
|
||||
public static String MODTIME = "modTime";
|
||||
/** Name of entity, tab or vclass */
|
||||
public static String NAME = "name";
|
||||
/** rdfs:label unanalyzed */
|
||||
public static String NAMELOWERCASE = "nameunanalyzed" ;
|
||||
/** Name of entity, unstemmed */
|
||||
public static String NAMEUNSTEMMED = "nameunstemmed";
|
||||
/** Unaltered name of individual, un-lowercased, un-stemmed, un-tokenized" */
|
||||
public static String NAMERAW = "nameraw";
|
||||
|
||||
/** time of index in msec since epoc */
|
||||
public static String INDEXEDTIME= "indexedTime";
|
||||
/** timekey of entity in yyyymmddhhmm */
|
||||
|
@ -78,6 +71,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
|
||||
/** class names in human readable form of an individual*/
|
||||
public static final String CLASSLOCALNAME = "classLocalName";
|
||||
|
||||
// Fields derived from rdfs:label
|
||||
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
||||
|
||||
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
|
||||
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
|
||||
public static String NAME_STEMMED = "nameStemmed"; // was NAME
|
||||
|
||||
}
|
||||
|
||||
private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
|
||||
|
@ -189,7 +196,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
//java class
|
||||
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
//Entity Name
|
||||
// Individual label
|
||||
if( ent.getRdfsLabel() != null )
|
||||
value=ent.getRdfsLabel();
|
||||
else{
|
||||
|
@ -198,21 +205,22 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
||||
value = ent.getLocalName();
|
||||
}
|
||||
Field name = new Field(term.NAME, value, Field.Store.YES, Field.Index.ANALYZED);
|
||||
doc.add( name );
|
||||
|
||||
Field nameUn = new Field(term.NAMEUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
nameUn.setBoost(NAME_BOOST);
|
||||
doc.add( nameUn );
|
||||
Field labelRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
labelRaw.setBoost(NAME_BOOST);
|
||||
doc.add(labelRaw);
|
||||
|
||||
// BK nameunanalyzed is used by IndividualListController
|
||||
Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
nameUnanalyzed.setBoost(NAME_BOOST);
|
||||
doc.add( nameUnanalyzed );
|
||||
Field labelLowerCase = new Field(term.NAME_LOWERCASE, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
labelLowerCase.setBoost(NAME_BOOST);
|
||||
doc.add(labelLowerCase);
|
||||
|
||||
Field nameRaw = new Field(term.NAMERAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
nameRaw.setBoost(NAME_BOOST);
|
||||
doc.add(nameRaw);
|
||||
Field labelUnstemmed = new Field(term.NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
labelUnstemmed.setBoost(NAME_BOOST);
|
||||
doc.add(labelUnstemmed);
|
||||
|
||||
Field labelStemmed = new Field(term.NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
labelStemmed.setBoost(NAME_BOOST);
|
||||
doc.add(labelStemmed);
|
||||
|
||||
|
||||
//Moniker
|
||||
|
|
|
@ -81,8 +81,7 @@ public class HtmlLowerStopAnalyzer extends Analyzer {
|
|||
|
||||
/**
|
||||
* Processes the input by first converting it to
|
||||
* lower case, then by eliminating stop words, and
|
||||
* finally by performing Porter stemming on it.
|
||||
* lower case, then by eliminating stop words.
|
||||
*
|
||||
* @param reader the Reader that
|
||||
* provides access to the input text
|
||||
|
|
|
@ -7,8 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
|||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -245,10 +245,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29));
|
||||
|
||||
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||
// analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAME, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(NAME_UNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue