NIHVIVO-2459 Untokenized, unstemmed autocomplete (e.g., in addAuthors form). Added commented-out configuration for Solr Suggester in solrconfig.xml.
This commit is contained in:
parent
bb07c9ad7e
commit
a86480e6b7
3 changed files with 106 additions and 60 deletions
|
@ -470,32 +470,33 @@
|
||||||
when adding a document.
|
when adding a document.
|
||||||
-->
|
-->
|
||||||
|
|
||||||
<!-- **************************** Vitro Fields *************************** -->
|
<!-- **************************** Vitro Fields *************************** -->
|
||||||
|
|
||||||
|
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||||
|
|
||||||
|
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||||
|
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||||
|
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||||
|
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||||
|
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
||||||
|
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
||||||
|
|
||||||
|
|
||||||
|
<field name="moniker" type="ignored" />
|
||||||
|
<field name="modType" type="ignored"/>
|
||||||
|
<field name="JCLASS" type="ignored"/>
|
||||||
|
|
||||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
|
||||||
|
<!-- **************************** End Vitro Fields *************************** -->
|
||||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
|
||||||
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
|
||||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
|
||||||
<field name="nameRaw" type="string" indexed="false" stored="true" multiValued="true"/>
|
|
||||||
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
|
||||||
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
|
||||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
|
||||||
<field name="THUMBNAIL" type="string" indexed="true" stored="true"/>
|
|
||||||
|
|
||||||
|
|
||||||
<field name="moniker" type="ignored" />
|
|
||||||
<field name="modType" type="ignored"/>
|
|
||||||
<field name="JCLASS" type="ignored"/>
|
|
||||||
|
|
||||||
<!-- **************************** End Vitro Fields *************************** -->
|
|
||||||
|
|
||||||
|
|
||||||
<!-- catchall field, containing all other searchable text fields (implemented
|
<!-- catchall field, containing all other searchable text fields (implemented
|
||||||
|
|
|
@ -384,8 +384,10 @@
|
||||||
disagree on this property, the value at any given moment will
|
disagree on this property, the value at any given moment will
|
||||||
be based on the last SolrCore to be initialized.
|
be based on the last SolrCore to be initialized.
|
||||||
|
|
||||||
-->
|
-->
|
||||||
<maxBooleanClauses>1024</maxBooleanClauses>
|
<!-- Increasing to handle large wildcard queries used in IndividualListController.
|
||||||
|
See VIVO-384. -->
|
||||||
|
<maxBooleanClauses>50000</maxBooleanClauses>
|
||||||
|
|
||||||
|
|
||||||
<!-- Solr Internal Query Caches
|
<!-- Solr Internal Query Caches
|
||||||
|
@ -1394,6 +1396,46 @@
|
||||||
</highlighting>
|
</highlighting>
|
||||||
</searchComponent>
|
</searchComponent>
|
||||||
|
|
||||||
|
<!-- Autocomplete -->
|
||||||
|
<!--
|
||||||
|
<searchComponent class="solr.SpellCheckComponent" name="suggest">
|
||||||
|
<lst name="spellchecker">
|
||||||
|
<str name="name">suggest</str>
|
||||||
|
<str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
|
||||||
|
<str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
|
||||||
|
-->
|
||||||
|
<!-- Alternatives to lookupImpl:
|
||||||
|
org.apache.solr.spelling.suggest.fst.FSTLookup [finite state automaton]
|
||||||
|
org.apache.solr.spelling.suggest.jaspell.JaspellLookup [default, jaspell-based]
|
||||||
|
org.apache.solr.spelling.suggest.tst.TSTLookup [ternary trees]
|
||||||
|
-->
|
||||||
|
<!-- the indexed field to derive suggestions from -->
|
||||||
|
<!--
|
||||||
|
<str name="field">nameLowercase</str>
|
||||||
|
<float name="threshold">0.005</float>
|
||||||
|
<str name="buildOnCommit">false</str>
|
||||||
|
<str name="storeDir">suggest</str>
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
<str name="sourceLocation">american-english</str>
|
||||||
|
-->
|
||||||
|
<!--
|
||||||
|
</lst>
|
||||||
|
</searchComponent>
|
||||||
|
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
|
||||||
|
<lst name="defaults">
|
||||||
|
<str name="spellcheck">true</str>
|
||||||
|
<str name="spellcheck.dictionary">suggest</str>
|
||||||
|
<str name="spellcheck.onlyMorePopular">false</str>
|
||||||
|
<str name="spellcheck.count">100</str>
|
||||||
|
<str name="spellcheck.collate">false</str>
|
||||||
|
</lst>
|
||||||
|
<arr name="components">
|
||||||
|
<str>suggest</str>
|
||||||
|
</arr>
|
||||||
|
</requestHandler>
|
||||||
|
-->
|
||||||
|
|
||||||
<!-- Update Processors
|
<!-- Update Processors
|
||||||
|
|
||||||
Chains of Update Processor Factories for dealing with Update
|
Chains of Update Processor Factories for dealing with Update
|
||||||
|
|
|
@ -13,15 +13,16 @@ import javax.servlet.ServletException;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.solr.client.solrj.SolrQuery;
|
import org.apache.solr.client.solrj.SolrQuery;
|
||||||
import org.apache.solr.client.solrj.SolrServer;
|
import org.apache.solr.client.solrj.SolrServer;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
|
import org.apache.solr.client.solrj.response.TermsResponse;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.SolrDocumentList;
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
import org.apache.solr.common.params.FacetParams;
|
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
|
||||||
|
@ -113,7 +114,8 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Collections.sort(results);
|
// See if we can do without this, since we set sort field on the query
|
||||||
|
//Collections.sort(results);
|
||||||
|
|
||||||
// map.put("results", results);
|
// map.put("results", results);
|
||||||
// writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response);
|
// writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response);
|
||||||
|
@ -130,38 +132,42 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private SolrQuery getQuery(String querystr, VitroRequest vreq) {
|
private SolrQuery getQuery(String queryStr, VitroRequest vreq) {
|
||||||
|
|
||||||
if ( querystr == null) {
|
if ( queryStr == null) {
|
||||||
log.error("There was no parameter '"+ PARAM_QUERY
|
log.error("There was no parameter '"+ PARAM_QUERY
|
||||||
+"' in the request.");
|
+"' in the request.");
|
||||||
return null;
|
return null;
|
||||||
} else if( querystr.length() > MAX_QUERY_LENGTH ) {
|
} else if( queryStr.length() > MAX_QUERY_LENGTH ) {
|
||||||
log.debug("The search was too long. The maximum " +
|
log.debug("The search was too long. The maximum " +
|
||||||
"query length is " + MAX_QUERY_LENGTH );
|
"query length is " + MAX_QUERY_LENGTH );
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
SolrQuery query = new SolrQuery();
|
SolrQuery query = new SolrQuery();
|
||||||
query = query.setStart(0);
|
query.setStart(0)
|
||||||
query = query.setRows(DEFAULT_MAX_HIT_COUNT);
|
.setRows(DEFAULT_MAX_HIT_COUNT);
|
||||||
|
|
||||||
query = setNameQuery(query, querystr, vreq);
|
setQuery(query, queryStr, vreq);
|
||||||
|
|
||||||
// Filter by type
|
// Filter by type
|
||||||
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
|
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
|
||||||
if (typeParam != null) {
|
if (typeParam != null) {
|
||||||
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
|
query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the fields to retrieve **** RY
|
// query.setFields(VitroLuceneTermNames.NAME_RAW, VitroLuceneTermNames.URI) // fields to retrieve
|
||||||
// query = query.setFields( ... );
|
// .setSortField(VitroLuceneTermNames.NAME_RAW, SolrQuery.ORDER.asc);
|
||||||
|
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
||||||
private SolrQuery setNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
|
private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||||
|
|
||||||
|
if (StringUtils.isBlank(queryStr)) {
|
||||||
|
log.error("No query string");
|
||||||
|
}
|
||||||
|
|
||||||
String tokenizeParam = (String) request.getParameter("tokenize");
|
String tokenizeParam = (String) request.getParameter("tokenize");
|
||||||
boolean tokenize = "true".equals(tokenizeParam);
|
boolean tokenize = "true".equals(tokenizeParam);
|
||||||
|
|
||||||
|
@ -169,13 +175,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
||||||
// query will not be stemmed. So we don't look at the stem parameter until we get to
|
// query will not be stemmed. So we don't look at the stem parameter until we get to
|
||||||
// setTokenizedNameQuery().
|
// setTokenizedNameQuery().
|
||||||
if (tokenize) {
|
if (tokenize) {
|
||||||
return setTokenizedNameQuery(query, querystr, request);
|
setTokenizedQuery(query, queryStr, request);
|
||||||
} else {
|
} else {
|
||||||
return setUntokenizedNameQuery(query, querystr);
|
setUntokenizedQuery(query, queryStr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private SolrQuery setTokenizedNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
|
private void setTokenizedQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||||
|
|
||||||
String stemParam = (String) request.getParameter("stem");
|
String stemParam = (String) request.getParameter("stem");
|
||||||
boolean stem = "true".equals(stemParam);
|
boolean stem = "true".equals(stemParam);
|
||||||
|
@ -193,15 +199,15 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
||||||
// // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
|
// // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
|
||||||
// // operator: e.g., +(name:tales name:tales*)
|
// // operator: e.g., +(name:tales name:tales*)
|
||||||
// try {
|
// try {
|
||||||
// log.debug("Adding non-wildcard query for " + querystr);
|
// log.debug("Adding non-wildcard query for " + queryStr);
|
||||||
// Query query = parser.parse(querystr);
|
// Query query = parser.parse(queryStr);
|
||||||
// boolQuery.add(query, BooleanClause.Occur.SHOULD);
|
// boolQuery.add(query, BooleanClause.Occur.SHOULD);
|
||||||
//
|
//
|
||||||
// // Prevent ParseException here when adding * after a space.
|
// // Prevent ParseException here when adding * after a space.
|
||||||
// // If there's a space at the end, we don't need the wildcard query.
|
// // If there's a space at the end, we don't need the wildcard query.
|
||||||
// if (! querystr.endsWith(" ")) {
|
// if (! queryStr.endsWith(" ")) {
|
||||||
// log.debug("Adding wildcard query for " + querystr);
|
// log.debug("Adding wildcard query for " + queryStr);
|
||||||
// Query wildcardQuery = parser.parse(querystr + "*");
|
// Query wildcardQuery = parser.parse(queryStr + "*");
|
||||||
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
|
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
|
||||||
// }
|
// }
|
||||||
//
|
//
|
||||||
|
@ -209,21 +215,18 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
||||||
// } catch (ParseException e) {
|
// } catch (ParseException e) {
|
||||||
// log.warn(e, e);
|
// log.warn(e, e);
|
||||||
// }
|
// }
|
||||||
|
|
||||||
return query;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private SolrQuery setUntokenizedNameQuery(SolrQuery query, String querystr) {
|
private void setUntokenizedQuery(SolrQuery query, String queryStr) {
|
||||||
|
|
||||||
// Using facet method described in http://solr.pl/en/2010/10/18/solr-and-autocomplete-part-1/
|
// Don't know why we should have to do this; the analyzer should take care of it, but doesn't
|
||||||
// Consider using Solr Suggester in a future version.
|
queryStr = queryStr.toLowerCase();
|
||||||
return query.setFacet(true)
|
// Solr wants whitespace to be escaped with a backslash
|
||||||
.addFacetField(VitroLuceneTermNames.NAME_LOWERCASE)
|
// Better: replace \s+
|
||||||
.setFacetMinCount(1)
|
queryStr = queryStr.replaceAll(" ", "\\\\ ");
|
||||||
.setFacetLimit(MAX_QUERY_LENGTH)
|
queryStr = VitroLuceneTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
|
||||||
.setFacetPrefix(querystr)//.toLowerCase())
|
query.setQuery(queryStr);
|
||||||
//.setFacetSort(FacetParams.FACET_SORT_INDEX) // sort by alpha (but doesn't work)
|
|
||||||
.setQuery("*:*");
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue