NIHVIVO-2459 Untokenized, unstemmed autocomplete (e.g., in addAuthors form). Added commented-out configuration for Solr Suggester in solrconfig.xml.

This commit is contained in:
ryounes 2011-05-18 20:42:23 +00:00
parent bb07c9ad7e
commit a86480e6b7
3 changed files with 106 additions and 60 deletions

View file

@ -495,6 +495,7 @@
<field name="modType" type="ignored"/> <field name="modType" type="ignored"/>
<field name="JCLASS" type="ignored"/> <field name="JCLASS" type="ignored"/>
<!-- **************************** End Vitro Fields *************************** --> <!-- **************************** End Vitro Fields *************************** -->

View file

@ -385,7 +385,9 @@
be based on the last SolrCore to be initialized. be based on the last SolrCore to be initialized.
--> -->
<maxBooleanClauses>1024</maxBooleanClauses> <!-- Increasing to handle large wildcard queries used in IndividualListController.
See VIVO-384. -->
<maxBooleanClauses>50000</maxBooleanClauses>
<!-- Solr Internal Query Caches <!-- Solr Internal Query Caches
@ -1394,6 +1396,46 @@
</highlighting> </highlighting>
</searchComponent> </searchComponent>
<!-- Autocomplete -->
<!--
<searchComponent class="solr.SpellCheckComponent" name="suggest">
<lst name="spellchecker">
<str name="name">suggest</str>
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
<str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
-->
<!-- Alternatives to lookupImpl:
org.apache.solr.spelling.suggest.fst.FSTLookup [finite state automaton]
org.apache.solr.spelling.suggest.jaspell.JaspellLookup [default, jaspell-based]
org.apache.solr.spelling.suggest.tst.TSTLookup [ternary trees]
-->
<!-- the indexed field to derive suggestions from -->
<!--
<str name="field">nameLowercase</str>
<float name="threshold">0.005</float>
<str name="buildOnCommit">false</str>
<str name="storeDir">suggest</str>
-->
<!--
<str name="sourceLocation">american-english</str>
-->
<!--
</lst>
</searchComponent>
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
<lst name="defaults">
<str name="spellcheck">true</str>
<str name="spellcheck.dictionary">suggest</str>
<str name="spellcheck.onlyMorePopular">false</str>
<str name="spellcheck.count">100</str>
<str name="spellcheck.collate">false</str>
</lst>
<arr name="components">
<str>suggest</str>
</arr>
</requestHandler>
-->
<!-- Update Processors <!-- Update Processors
Chains of Update Processor Factories for dealing with Update Chains of Update Processor Factories for dealing with Update

View file

@ -13,15 +13,16 @@ import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.FacetParams;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
@ -113,7 +114,8 @@ public class SolrAutocompleteController extends VitroAjaxController {
} }
} }
Collections.sort(results); // See if we can do without this, since we set sort field on the query
//Collections.sort(results);
// map.put("results", results); // map.put("results", results);
// writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response); // writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response);
@ -130,37 +132,41 @@ public class SolrAutocompleteController extends VitroAjaxController {
} }
} }
private SolrQuery getQuery(String querystr, VitroRequest vreq) { private SolrQuery getQuery(String queryStr, VitroRequest vreq) {
if ( querystr == null) { if ( queryStr == null) {
log.error("There was no parameter '"+ PARAM_QUERY log.error("There was no parameter '"+ PARAM_QUERY
+"' in the request."); +"' in the request.");
return null; return null;
} else if( querystr.length() > MAX_QUERY_LENGTH ) { } else if( queryStr.length() > MAX_QUERY_LENGTH ) {
log.debug("The search was too long. The maximum " + log.debug("The search was too long. The maximum " +
"query length is " + MAX_QUERY_LENGTH ); "query length is " + MAX_QUERY_LENGTH );
return null; return null;
} }
SolrQuery query = new SolrQuery(); SolrQuery query = new SolrQuery();
query = query.setStart(0); query.setStart(0)
query = query.setRows(DEFAULT_MAX_HIT_COUNT); .setRows(DEFAULT_MAX_HIT_COUNT);
query = setNameQuery(query, querystr, vreq); setQuery(query, queryStr, vreq);
// Filter by type // Filter by type
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE); String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if (typeParam != null) { if (typeParam != null) {
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\""); query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
} }
// Set the fields to retrieve **** RY // query.setFields(VitroLuceneTermNames.NAME_RAW, VitroLuceneTermNames.URI) // fields to retrieve
// query = query.setFields( ... ); // .setSortField(VitroLuceneTermNames.NAME_RAW, SolrQuery.ORDER.asc);
return query; return query;
} }
private SolrQuery setNameQuery(SolrQuery query, String querystr, HttpServletRequest request) { private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
if (StringUtils.isBlank(queryStr)) {
log.error("No query string");
}
String tokenizeParam = (String) request.getParameter("tokenize"); String tokenizeParam = (String) request.getParameter("tokenize");
boolean tokenize = "true".equals(tokenizeParam); boolean tokenize = "true".equals(tokenizeParam);
@ -169,13 +175,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
// query will not be stemmed. So we don't look at the stem parameter until we get to // query will not be stemmed. So we don't look at the stem parameter until we get to
// setTokenizedNameQuery(). // setTokenizedNameQuery().
if (tokenize) { if (tokenize) {
return setTokenizedNameQuery(query, querystr, request); setTokenizedQuery(query, queryStr, request);
} else { } else {
return setUntokenizedNameQuery(query, querystr); setUntokenizedQuery(query, queryStr);
} }
} }
private SolrQuery setTokenizedNameQuery(SolrQuery query, String querystr, HttpServletRequest request) { private void setTokenizedQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
String stemParam = (String) request.getParameter("stem"); String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam); boolean stem = "true".equals(stemParam);
@ -193,15 +199,15 @@ public class SolrAutocompleteController extends VitroAjaxController {
// // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction // // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
// // operator: e.g., +(name:tales name:tales*) // // operator: e.g., +(name:tales name:tales*)
// try { // try {
// log.debug("Adding non-wildcard query for " + querystr); // log.debug("Adding non-wildcard query for " + queryStr);
// Query query = parser.parse(querystr); // Query query = parser.parse(queryStr);
// boolQuery.add(query, BooleanClause.Occur.SHOULD); // boolQuery.add(query, BooleanClause.Occur.SHOULD);
// //
// // Prevent ParseException here when adding * after a space. // // Prevent ParseException here when adding * after a space.
// // If there's a space at the end, we don't need the wildcard query. // // If there's a space at the end, we don't need the wildcard query.
// if (! querystr.endsWith(" ")) { // if (! queryStr.endsWith(" ")) {
// log.debug("Adding wildcard query for " + querystr); // log.debug("Adding wildcard query for " + queryStr);
// Query wildcardQuery = parser.parse(querystr + "*"); // Query wildcardQuery = parser.parse(queryStr + "*");
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD); // boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
// } // }
// //
@ -210,20 +216,17 @@ public class SolrAutocompleteController extends VitroAjaxController {
// log.warn(e, e); // log.warn(e, e);
// } // }
return query;
} }
private SolrQuery setUntokenizedNameQuery(SolrQuery query, String querystr) { private void setUntokenizedQuery(SolrQuery query, String queryStr) {
// Using facet method described in http://solr.pl/en/2010/10/18/solr-and-autocomplete-part-1/ // Don't know why we should have to do this; the analyzer should take care of it, but doesn't
// Consider using Solr Suggester in a future version. queryStr = queryStr.toLowerCase();
return query.setFacet(true) // Solr wants whitespace to be escaped with a backslash
.addFacetField(VitroLuceneTermNames.NAME_LOWERCASE) // Better: replace \s+
.setFacetMinCount(1) queryStr = queryStr.replaceAll(" ", "\\\\ ");
.setFacetLimit(MAX_QUERY_LENGTH) queryStr = VitroLuceneTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
.setFacetPrefix(querystr)//.toLowerCase()) query.setQuery(queryStr);
//.setFacetSort(FacetParams.FACET_SORT_INDEX) // sort by alpha (but doesn't work)
.setQuery("*:*");
} }