NIHVIVO-2459 Solr field definitions for autocomplete
This commit is contained in:
parent
eac9e5dca1
commit
ad98e7723c
7 changed files with 56 additions and 29 deletions
|
@ -224,7 +224,7 @@
|
|||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||
</analyzer>
|
||||
|
@ -242,6 +242,20 @@
|
|||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Like text, but without synonyms and stemming. Good for autocomplete matching of proper names, where we want to remove
|
||||
stop words but not stem. -->
|
||||
<fieldType name="textUnstemmed" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
enablePositionIncrements="true"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||
|
@ -423,8 +437,6 @@
|
|||
|
||||
<!-- **************************** Vitro Fields *************************** -->
|
||||
|
||||
|
||||
|
||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||
|
||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||
|
@ -434,10 +446,10 @@
|
|||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||
<field name="nameRaw" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<!-- RY Not sure if we need to store nameLowercase -->
|
||||
<field name="nameLowercase" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="nameUnstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
||||
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||
|
|
|
@ -377,7 +377,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
|
|||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
|
||||
|
|
|
@ -207,7 +207,7 @@ public class AutocompleteController extends VitroAjaxController {
|
|||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
|
||||
|
|
|
@ -229,7 +229,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
Document document = searcherForRequest.doc(scoreDoc.doc);
|
||||
Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
|
||||
|
||||
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED) + " score: " +scoreDoc.score);
|
||||
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED) + " score: " +scoreDoc.score);
|
||||
log.debug("Scoring of the doc explained " + explanation.toString());
|
||||
log.debug("Explanation's description "+ explanation.getDescription());
|
||||
log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
|
||||
|
@ -405,7 +405,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
Document doc;
|
||||
try {
|
||||
doc = searcher.doc(topDocs.scoreDocs[i].doc);
|
||||
String name =doc.get(Entity2LuceneDoc.term.NAME_STEMMED);
|
||||
String name =doc.get(Entity2LuceneDoc.term.AC_NAME_STEMMED);
|
||||
if( name != null && name.length() > 0)
|
||||
alphas.add( name.substring(0, 1));
|
||||
} catch (CorruptIndexException e) {
|
||||
|
@ -622,7 +622,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
boolQuery.add( query, BooleanClause.Occur.MUST );
|
||||
boolQuery.add(
|
||||
new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME_STEMMED, alpha+'*')),
|
||||
new WildcardQuery(new Term(Entity2LuceneDoc.term.AC_NAME_STEMMED, alpha+'*')),
|
||||
BooleanClause.Occur.MUST);
|
||||
query = boolQuery;
|
||||
}
|
||||
|
@ -683,8 +683,8 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
|||
// qp.setStemmedToUnstemmed(map);
|
||||
|
||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
|
||||
VitroLuceneTermNames.NAME_STEMMED,
|
||||
VitroLuceneTermNames.NAME_UNSTEMMED,
|
||||
VitroLuceneTermNames.AC_NAME_STEMMED,
|
||||
VitroLuceneTermNames.AC_NAME_UNSTEMMED,
|
||||
VitroLuceneTermNames.RDFTYPE,
|
||||
VitroLuceneTermNames.MONIKER,
|
||||
VitroLuceneTermNames.ALLTEXT,
|
||||
|
|
|
@ -12,6 +12,7 @@ import java.util.Map;
|
|||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -21,8 +22,10 @@ import org.apache.solr.client.solrj.SolrServer;
|
|||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.json.JSONArray;
|
||||
import org.json.JSONObject;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
|
||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
|
||||
|
@ -176,7 +179,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED ;
|
||||
|
||||
BooleanQuery boolQuery = new BooleanQuery();
|
||||
|
||||
|
@ -214,9 +217,18 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
|
||||
//querystr = querystr.toLowerCase();
|
||||
querystr += "*";
|
||||
query = query.setQuery(querystr);
|
||||
// *** It's the df parameter that sets the field to search
|
||||
//String field = VitroLuceneTermNames.LABEL_LOWERCASE;
|
||||
//query = query.setQuery(VitroLuceneTermNames.NAME_LOWERCASE + ":" + querystr);
|
||||
//query.addFilterQuery(VitroLuceneTermNames.NAME_LOWERCASE);
|
||||
//query.setQuery(querystr);
|
||||
|
||||
try {
|
||||
SolrConfig config = new SolrConfig();
|
||||
|
||||
} catch (Exception e) {
|
||||
// TODO Auto-generated catch block
|
||||
log.error(e, e);
|
||||
return null;
|
||||
}
|
||||
|
||||
return query;
|
||||
}
|
||||
|
|
|
@ -73,17 +73,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
public static final String CLASSLOCALNAME = "classLocalName";
|
||||
|
||||
// Fields derived from rdfs:label
|
||||
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
|
||||
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming.
|
||||
* Used only in retrieval rather than search. **/
|
||||
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
||||
|
||||
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
|
||||
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
|
||||
/** rdfs:label lowercased, tokenized, stop words, no stemming.
|
||||
* Used for autocomplete matching on proper names. **/
|
||||
public static String AC_NAME_UNSTEMMED = "acNameUnstemmed"; // was NAMEUNSTEMMED
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
|
||||
public static String NAME_STEMMED = "nameStemmed"; // was NAME
|
||||
/** rdfs:label lowercased, tokenized, stop words, stemmed.
|
||||
* Used for autocomplete matching where stemming is desired (e.g., book titles) **/
|
||||
public static String AC_NAME_STEMMED = "acNameStemmed"; // was NAME
|
||||
|
||||
}
|
||||
|
||||
|
@ -215,11 +218,11 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
nameLowerCase.setBoost(NAME_BOOST);
|
||||
doc.add(nameLowerCase);
|
||||
|
||||
Field nameUnstemmed = new Field(term.NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
Field nameUnstemmed = new Field(term.AC_NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
nameUnstemmed.setBoost(NAME_BOOST);
|
||||
doc.add(nameUnstemmed);
|
||||
|
||||
Field nameStemmed = new Field(term.NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
nameStemmed.setBoost(NAME_BOOST);
|
||||
doc.add(nameStemmed);
|
||||
|
||||
|
|
|
@ -7,8 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
|||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -246,8 +246,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
|
||||
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAME_UNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(AC_NAME_UNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||
|
|
Loading…
Add table
Reference in a new issue