NIHVIVO-2459 Untokenized, unstemmed autocomplete (e.g., in addAuthors form). Added commented-out configuration for Solr Suggester in solrconfig.xml.

This commit is contained in:
ryounes 2011-05-18 20:42:23 +00:00
parent bb07c9ad7e
commit a86480e6b7
3 changed files with 106 additions and 60 deletions

View file

@ -470,32 +470,33 @@
when adding a document. when adding a document.
--> -->
<!-- **************************** Vitro Fields *************************** --> <!-- **************************** Vitro Fields *************************** -->
<field name="DocId" type="string" indexed="true" stored="true" required="true" /> <field name="DocId" type="string" indexed="true" stored="true" required="true" />
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/> <field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/> <field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/> <field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/> <field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/> <field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/> <field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/> <field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? --> <!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/> <field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
<field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/> <field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
<field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/> <field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/> <field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/> <field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
<field name="moniker" type="ignored" /> <field name="moniker" type="ignored" />
<field name="modType" type="ignored"/> <field name="modType" type="ignored"/>
<field name="JCLASS" type="ignored"/> <field name="JCLASS" type="ignored"/>
<!-- **************************** End Vitro Fields *************************** -->
<!-- **************************** End Vitro Fields *************************** -->
<!-- catchall field, containing all other searchable text fields (implemented <!-- catchall field, containing all other searchable text fields (implemented

View file

@ -384,8 +384,10 @@
disagree on this property, the value at any given moment will disagree on this property, the value at any given moment will
be based on the last SolrCore to be initialized. be based on the last SolrCore to be initialized.
--> -->
<maxBooleanClauses>1024</maxBooleanClauses> <!-- Increasing to handle large wildcard queries used in IndividualListController.
See VIVO-384. -->
<maxBooleanClauses>50000</maxBooleanClauses>
<!-- Solr Internal Query Caches <!-- Solr Internal Query Caches
@ -1394,6 +1396,46 @@
</highlighting> </highlighting>
</searchComponent> </searchComponent>
<!-- Autocomplete -->
<!--
<searchComponent class="solr.SpellCheckComponent" name="suggest">
<lst name="spellchecker">
<str name="name">suggest</str>
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
<str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
-->
<!-- Alternatives to lookupImpl:
org.apache.solr.spelling.suggest.fst.FSTLookup [finite state automaton]
org.apache.solr.spelling.suggest.jaspell.JaspellLookup [default, jaspell-based]
org.apache.solr.spelling.suggest.tst.TSTLookup [ternary trees]
-->
<!-- the indexed field to derive suggestions from -->
<!--
<str name="field">nameLowercase</str>
<float name="threshold">0.005</float>
<str name="buildOnCommit">false</str>
<str name="storeDir">suggest</str>
-->
<!--
<str name="sourceLocation">american-english</str>
-->
<!--
</lst>
</searchComponent>
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
<lst name="defaults">
<str name="spellcheck">true</str>
<str name="spellcheck.dictionary">suggest</str>
<str name="spellcheck.onlyMorePopular">false</str>
<str name="spellcheck.count">100</str>
<str name="spellcheck.collate">false</str>
</lst>
<arr name="components">
<str>suggest</str>
</arr>
</requestHandler>
-->
<!-- Update Processors <!-- Update Processors
Chains of Update Processor Factories for dealing with Update Chains of Update Processor Factories for dealing with Update

View file

@ -13,15 +13,16 @@ import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.FacetParams;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject; import org.json.JSONObject;
@ -113,7 +114,8 @@ public class SolrAutocompleteController extends VitroAjaxController {
} }
} }
Collections.sort(results); // See if we can do without this, since we set sort field on the query
//Collections.sort(results);
// map.put("results", results); // map.put("results", results);
// writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response); // writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response);
@ -130,37 +132,41 @@ public class SolrAutocompleteController extends VitroAjaxController {
} }
} }
private SolrQuery getQuery(String querystr, VitroRequest vreq) { private SolrQuery getQuery(String queryStr, VitroRequest vreq) {
if ( querystr == null) { if ( queryStr == null) {
log.error("There was no parameter '"+ PARAM_QUERY log.error("There was no parameter '"+ PARAM_QUERY
+"' in the request."); +"' in the request.");
return null; return null;
} else if( querystr.length() > MAX_QUERY_LENGTH ) { } else if( queryStr.length() > MAX_QUERY_LENGTH ) {
log.debug("The search was too long. The maximum " + log.debug("The search was too long. The maximum " +
"query length is " + MAX_QUERY_LENGTH ); "query length is " + MAX_QUERY_LENGTH );
return null; return null;
} }
SolrQuery query = new SolrQuery(); SolrQuery query = new SolrQuery();
query = query.setStart(0); query.setStart(0)
query = query.setRows(DEFAULT_MAX_HIT_COUNT); .setRows(DEFAULT_MAX_HIT_COUNT);
query = setNameQuery(query, querystr, vreq); setQuery(query, queryStr, vreq);
// Filter by type // Filter by type
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE); String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if (typeParam != null) { if (typeParam != null) {
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\""); query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
} }
// Set the fields to retrieve **** RY // query.setFields(VitroLuceneTermNames.NAME_RAW, VitroLuceneTermNames.URI) // fields to retrieve
// query = query.setFields( ... ); // .setSortField(VitroLuceneTermNames.NAME_RAW, SolrQuery.ORDER.asc);
return query; return query;
} }
private SolrQuery setNameQuery(SolrQuery query, String querystr, HttpServletRequest request) { private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
if (StringUtils.isBlank(queryStr)) {
log.error("No query string");
}
String tokenizeParam = (String) request.getParameter("tokenize"); String tokenizeParam = (String) request.getParameter("tokenize");
boolean tokenize = "true".equals(tokenizeParam); boolean tokenize = "true".equals(tokenizeParam);
@ -169,13 +175,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
// query will not be stemmed. So we don't look at the stem parameter until we get to // query will not be stemmed. So we don't look at the stem parameter until we get to
// setTokenizedNameQuery(). // setTokenizedNameQuery().
if (tokenize) { if (tokenize) {
return setTokenizedNameQuery(query, querystr, request); setTokenizedQuery(query, queryStr, request);
} else { } else {
return setUntokenizedNameQuery(query, querystr); setUntokenizedQuery(query, queryStr);
} }
} }
private SolrQuery setTokenizedNameQuery(SolrQuery query, String querystr, HttpServletRequest request) { private void setTokenizedQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
String stemParam = (String) request.getParameter("stem"); String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam); boolean stem = "true".equals(stemParam);
@ -193,15 +199,15 @@ public class SolrAutocompleteController extends VitroAjaxController {
// // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction // // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
// // operator: e.g., +(name:tales name:tales*) // // operator: e.g., +(name:tales name:tales*)
// try { // try {
// log.debug("Adding non-wildcard query for " + querystr); // log.debug("Adding non-wildcard query for " + queryStr);
// Query query = parser.parse(querystr); // Query query = parser.parse(queryStr);
// boolQuery.add(query, BooleanClause.Occur.SHOULD); // boolQuery.add(query, BooleanClause.Occur.SHOULD);
// //
// // Prevent ParseException here when adding * after a space. // // Prevent ParseException here when adding * after a space.
// // If there's a space at the end, we don't need the wildcard query. // // If there's a space at the end, we don't need the wildcard query.
// if (! querystr.endsWith(" ")) { // if (! queryStr.endsWith(" ")) {
// log.debug("Adding wildcard query for " + querystr); // log.debug("Adding wildcard query for " + queryStr);
// Query wildcardQuery = parser.parse(querystr + "*"); // Query wildcardQuery = parser.parse(queryStr + "*");
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD); // boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
// } // }
// //
@ -210,20 +216,17 @@ public class SolrAutocompleteController extends VitroAjaxController {
// log.warn(e, e); // log.warn(e, e);
// } // }
return query;
} }
private SolrQuery setUntokenizedNameQuery(SolrQuery query, String querystr) { private void setUntokenizedQuery(SolrQuery query, String queryStr) {
// Using facet method described in http://solr.pl/en/2010/10/18/solr-and-autocomplete-part-1/ // Don't know why we should have to do this; the analyzer should take care of it, but doesn't
// Consider using Solr Suggester in a future version. queryStr = queryStr.toLowerCase();
return query.setFacet(true) // Solr wants whitespace to be escaped with a backslash
.addFacetField(VitroLuceneTermNames.NAME_LOWERCASE) // Better: replace \s+
.setFacetMinCount(1) queryStr = queryStr.replaceAll(" ", "\\\\ ");
.setFacetLimit(MAX_QUERY_LENGTH) queryStr = VitroLuceneTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
.setFacetPrefix(querystr)//.toLowerCase()) query.setQuery(queryStr);
//.setFacetSort(FacetParams.FACET_SORT_INDEX) // sort by alpha (but doesn't work)
.setQuery("*:*");
} }