NIHVIVO-2459 Define edgeNGram field for autocomplete on people names. NIHVIVO-2437 Refactoring in search controllers to remove outdated references to Lucene.

This commit is contained in:
ryounes 2011-06-27 20:54:15 +00:00
parent cfcc73d83a
commit 15f5fba80b
13 changed files with 155 additions and 107 deletions

View file

@ -258,16 +258,36 @@
<!-- Like text, but without synonyms and stemming. Good for autocomplete matching of proper names, where we want to remove
stop words but not stem. -->
<fieldType name="textUnstemmed" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<fieldType name="text_unstemmed" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true"
/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true" />
<filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true" />
<!-- <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
splitOnCaseChange="1"/> -->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
@ -443,7 +463,19 @@
See http://wiki.apache.org/solr/SpatialSearch
-->
<fieldtype name="geohash" class="solr.GeoHashField"/>
<fieldtype name="edgengram_untokenized" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25" side="front"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldtype>
</types>
@ -489,8 +521,14 @@
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
<!-- A sortable version of nameLowercase -->
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
<field name="nameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="nameUnstemmed" type="text_unstemmed" indexed="true" stored="false" multiValued="true"/>
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- Untokenized autocomplete on name (suitable for person names) -->
<field name="acNameUntokenized" type="edgengram_untokenized" indexed="true" stored="false" multiValued="true" />
<!--
<field name="acNameTokenized" type="edgengram_tokenized" indexed="true" stored="false" multiValued="true" />
<field name="acNameStemmed" type="edgengram_stemmed" indexed="true" stored="false" multiValued="true" />
-->
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>

View file

@ -709,7 +709,7 @@
<lst name="defaults">
<str name="defType">edismax</str>
<str name="qf">nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo</str>
<str name="pf">targetInfo nameStemmed nameUnstemmed</str>
<str name="pf">targetInfo</str>
<str name="echoParams">explicit</str>
<str name="ps">2</str>
<str name="qs">2</str>