NIHVIVO-2459 Untokenized, unstemmed autocomplete (e.g., in addAuthors form). Added commented-out configuration for Solr Suggester in solrconfig.xml.
This commit is contained in:
parent
bb07c9ad7e
commit
a86480e6b7
3 changed files with 106 additions and 60 deletions
|
@ -470,32 +470,33 @@
|
|||
when adding a document.
|
||||
-->
|
||||
|
||||
<!-- **************************** Vitro Fields *************************** -->
|
||||
<!-- **************************** Vitro Fields *************************** -->
|
||||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
||||
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<field name="moniker" type="ignored" />
|
||||
<field name="modType" type="ignored"/>
|
||||
<field name="JCLASS" type="ignored"/>
|
||||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
||||
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<field name="moniker" type="ignored" />
|
||||
<field name="modType" type="ignored"/>
|
||||
<field name="JCLASS" type="ignored"/>
|
||||
|
||||
<!-- **************************** End Vitro Fields *************************** -->
|
||||
|
||||
<!-- **************************** End Vitro Fields *************************** -->
|
||||
|
||||
|
||||
<!-- catchall field, containing all other searchable text fields (implemented
|
||||
|
|
|
@ -384,8 +384,10 @@
|
|||
disagree on this property, the value at any given moment will
|
||||
be based on the last SolrCore to be initialized.
|
||||
|
||||
-->
|
||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
||||
-->
|
||||
<!-- Increasing to handle large wildcard queries used in IndividualListController.
|
||||
See VIVO-384. -->
|
||||
<maxBooleanClauses>50000</maxBooleanClauses>
|
||||
|
||||
|
||||
<!-- Solr Internal Query Caches
|
||||
|
@ -1394,6 +1396,46 @@
|
|||
</highlighting>
|
||||
</searchComponent>
|
||||
|
||||
<!-- Autocomplete -->
|
||||
<!--
|
||||
<searchComponent class="solr.SpellCheckComponent" name="suggest">
|
||||
<lst name="spellchecker">
|
||||
<str name="name">suggest</str>
|
||||
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
|
||||
<str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
|
||||
-->
|
||||
<!-- Alternatives to lookupImpl:
|
||||
org.apache.solr.spelling.suggest.fst.FSTLookup [finite state automaton]
|
||||
org.apache.solr.spelling.suggest.jaspell.JaspellLookup [default, jaspell-based]
|
||||
org.apache.solr.spelling.suggest.tst.TSTLookup [ternary trees]
|
||||
-->
|
||||
<!-- the indexed field to derive suggestions from -->
|
||||
<!--
|
||||
<str name="field">nameLowercase</str>
|
||||
<float name="threshold">0.005</float>
|
||||
<str name="buildOnCommit">false</str>
|
||||
<str name="storeDir">suggest</str>
|
||||
-->
|
||||
<!--
|
||||
<str name="sourceLocation">american-english</str>
|
||||
-->
|
||||
<!--
|
||||
</lst>
|
||||
</searchComponent>
|
||||
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
|
||||
<lst name="defaults">
|
||||
<str name="spellcheck">true</str>
|
||||
<str name="spellcheck.dictionary">suggest</str>
|
||||
<str name="spellcheck.onlyMorePopular">false</str>
|
||||
<str name="spellcheck.count">100</str>
|
||||
<str name="spellcheck.collate">false</str>
|
||||
</lst>
|
||||
<arr name="components">
|
||||
<str>suggest</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
-->
|
||||
|
||||
<!-- Update Processors
|
||||
|
||||
Chains of Update Processor Factories for dealing with Update
|
||||
|
|
|
@ -13,15 +13,16 @@ import javax.servlet.ServletException;
|
|||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServer;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.client.solrj.response.TermsResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
|
||||
|
@ -113,7 +114,8 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
}
|
||||
}
|
||||
|
||||
Collections.sort(results);
|
||||
// See if we can do without this, since we set sort field on the query
|
||||
//Collections.sort(results);
|
||||
|
||||
// map.put("results", results);
|
||||
// writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response);
|
||||
|
@ -130,38 +132,42 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
}
|
||||
}
|
||||
|
||||
private SolrQuery getQuery(String querystr, VitroRequest vreq) {
|
||||
private SolrQuery getQuery(String queryStr, VitroRequest vreq) {
|
||||
|
||||
if ( querystr == null) {
|
||||
if ( queryStr == null) {
|
||||
log.error("There was no parameter '"+ PARAM_QUERY
|
||||
+"' in the request.");
|
||||
return null;
|
||||
} else if( querystr.length() > MAX_QUERY_LENGTH ) {
|
||||
} else if( queryStr.length() > MAX_QUERY_LENGTH ) {
|
||||
log.debug("The search was too long. The maximum " +
|
||||
"query length is " + MAX_QUERY_LENGTH );
|
||||
return null;
|
||||
}
|
||||
|
||||
SolrQuery query = new SolrQuery();
|
||||
query = query.setStart(0);
|
||||
query = query.setRows(DEFAULT_MAX_HIT_COUNT);
|
||||
query.setStart(0)
|
||||
.setRows(DEFAULT_MAX_HIT_COUNT);
|
||||
|
||||
query = setNameQuery(query, querystr, vreq);
|
||||
setQuery(query, queryStr, vreq);
|
||||
|
||||
// Filter by type
|
||||
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
|
||||
if (typeParam != null) {
|
||||
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
|
||||
query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
|
||||
}
|
||||
|
||||
// Set the fields to retrieve **** RY
|
||||
// query = query.setFields( ... );
|
||||
|
||||
// query.setFields(VitroLuceneTermNames.NAME_RAW, VitroLuceneTermNames.URI) // fields to retrieve
|
||||
// .setSortField(VitroLuceneTermNames.NAME_RAW, SolrQuery.ORDER.asc);
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
private SolrQuery setNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
|
||||
private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||
|
||||
if (StringUtils.isBlank(queryStr)) {
|
||||
log.error("No query string");
|
||||
}
|
||||
|
||||
String tokenizeParam = (String) request.getParameter("tokenize");
|
||||
boolean tokenize = "true".equals(tokenizeParam);
|
||||
|
||||
|
@ -169,13 +175,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
// query will not be stemmed. So we don't look at the stem parameter until we get to
|
||||
// setTokenizedNameQuery().
|
||||
if (tokenize) {
|
||||
return setTokenizedNameQuery(query, querystr, request);
|
||||
setTokenizedQuery(query, queryStr, request);
|
||||
} else {
|
||||
return setUntokenizedNameQuery(query, querystr);
|
||||
setUntokenizedQuery(query, queryStr);
|
||||
}
|
||||
}
|
||||
|
||||
private SolrQuery setTokenizedNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
|
||||
private void setTokenizedQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
|
@ -193,15 +199,15 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
// // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
|
||||
// // operator: e.g., +(name:tales name:tales*)
|
||||
// try {
|
||||
// log.debug("Adding non-wildcard query for " + querystr);
|
||||
// Query query = parser.parse(querystr);
|
||||
// log.debug("Adding non-wildcard query for " + queryStr);
|
||||
// Query query = parser.parse(queryStr);
|
||||
// boolQuery.add(query, BooleanClause.Occur.SHOULD);
|
||||
//
|
||||
// // Prevent ParseException here when adding * after a space.
|
||||
// // If there's a space at the end, we don't need the wildcard query.
|
||||
// if (! querystr.endsWith(" ")) {
|
||||
// log.debug("Adding wildcard query for " + querystr);
|
||||
// Query wildcardQuery = parser.parse(querystr + "*");
|
||||
// if (! queryStr.endsWith(" ")) {
|
||||
// log.debug("Adding wildcard query for " + queryStr);
|
||||
// Query wildcardQuery = parser.parse(queryStr + "*");
|
||||
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
|
||||
// }
|
||||
//
|
||||
|
@ -209,21 +215,18 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
// } catch (ParseException e) {
|
||||
// log.warn(e, e);
|
||||
// }
|
||||
|
||||
return query;
|
||||
|
||||
}
|
||||
|
||||
private SolrQuery setUntokenizedNameQuery(SolrQuery query, String querystr) {
|
||||
private void setUntokenizedQuery(SolrQuery query, String queryStr) {
|
||||
|
||||
// Using facet method described in http://solr.pl/en/2010/10/18/solr-and-autocomplete-part-1/
|
||||
// Consider using Solr Suggester in a future version.
|
||||
return query.setFacet(true)
|
||||
.addFacetField(VitroLuceneTermNames.NAME_LOWERCASE)
|
||||
.setFacetMinCount(1)
|
||||
.setFacetLimit(MAX_QUERY_LENGTH)
|
||||
.setFacetPrefix(querystr)//.toLowerCase())
|
||||
//.setFacetSort(FacetParams.FACET_SORT_INDEX) // sort by alpha (but doesn't work)
|
||||
.setQuery("*:*");
|
||||
// Don't know why we should have to do this; the analyzer should take care of it, but doesn't
|
||||
queryStr = queryStr.toLowerCase();
|
||||
// Solr wants whitespace to be escaped with a backslash
|
||||
// Better: replace \s+
|
||||
queryStr = queryStr.replaceAll(" ", "\\\\ ");
|
||||
queryStr = VitroLuceneTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
|
||||
query.setQuery(queryStr);
|
||||
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue