NIHVIVO-2459 Solr field definitions for autocomplete
This commit is contained in:
parent
eac9e5dca1
commit
ad98e7723c
7 changed files with 56 additions and 29 deletions
|
@ -224,7 +224,7 @@
|
||||||
words="stopwords.txt"
|
words="stopwords.txt"
|
||||||
enablePositionIncrements="true"
|
enablePositionIncrements="true"
|
||||||
/>
|
/>
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
<filter class="solr.SnowballPorterFilterFactory" language="English" protected="protwords.txt"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
|
@ -242,6 +242,20 @@
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldType>
|
</fieldType>
|
||||||
|
|
||||||
|
<!-- Like text, but without synonyms and stemming. Good for autocomplete matching of proper names, where we want to remove
|
||||||
|
stop words but not stem. -->
|
||||||
|
<fieldType name="textUnstemmed" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer>
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.StopFilterFactory"
|
||||||
|
ignoreCase="true"
|
||||||
|
words="stopwords.txt"
|
||||||
|
enablePositionIncrements="true"
|
||||||
|
/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
<!-- Less flexible matching, but less false matches. Probably not ideal for product names,
|
||||||
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
|
||||||
|
@ -423,8 +437,6 @@
|
||||||
|
|
||||||
<!-- **************************** Vitro Fields *************************** -->
|
<!-- **************************** Vitro Fields *************************** -->
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
|
||||||
|
|
||||||
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
|
||||||
|
@ -434,10 +446,10 @@
|
||||||
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
|
||||||
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
|
||||||
<field name="nameRaw" type="string" indexed="true" stored="true" multiValued="true"/>
|
<field name="nameRaw" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
<!-- RY Not sure if we need to store nameLowercase -->
|
<!-- RY Not sure if we need to store nameLowercase. Is it ever displayed? -->
|
||||||
<field name="nameLowercase" type="string" indexed="true" stored="true" multiValued="true"/>
|
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
|
||||||
<field name="nameUnstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
<field name="acNameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
|
||||||
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
<field name="acNameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
|
||||||
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>
|
||||||
|
|
|
@ -377,7 +377,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
|
||||||
|
|
||||||
String stemParam = (String) request.getParameter("stem");
|
String stemParam = (String) request.getParameter("stem");
|
||||||
boolean stem = "true".equals(stemParam);
|
boolean stem = "true".equals(stemParam);
|
||||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||||
|
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
|
|
||||||
|
|
|
@ -207,7 +207,7 @@ public class AutocompleteController extends VitroAjaxController {
|
||||||
|
|
||||||
String stemParam = (String) request.getParameter("stem");
|
String stemParam = (String) request.getParameter("stem");
|
||||||
boolean stem = "true".equals(stemParam);
|
boolean stem = "true".equals(stemParam);
|
||||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||||
|
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
|
|
||||||
|
|
|
@ -229,7 +229,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
Document document = searcherForRequest.doc(scoreDoc.doc);
|
Document document = searcherForRequest.doc(scoreDoc.doc);
|
||||||
Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
|
Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
|
||||||
|
|
||||||
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED) + " score: " +scoreDoc.score);
|
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED) + " score: " +scoreDoc.score);
|
||||||
log.debug("Scoring of the doc explained " + explanation.toString());
|
log.debug("Scoring of the doc explained " + explanation.toString());
|
||||||
log.debug("Explanation's description "+ explanation.getDescription());
|
log.debug("Explanation's description "+ explanation.getDescription());
|
||||||
log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
|
log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
|
||||||
|
@ -405,7 +405,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
Document doc;
|
Document doc;
|
||||||
try {
|
try {
|
||||||
doc = searcher.doc(topDocs.scoreDocs[i].doc);
|
doc = searcher.doc(topDocs.scoreDocs[i].doc);
|
||||||
String name =doc.get(Entity2LuceneDoc.term.NAME_STEMMED);
|
String name =doc.get(Entity2LuceneDoc.term.AC_NAME_STEMMED);
|
||||||
if( name != null && name.length() > 0)
|
if( name != null && name.length() > 0)
|
||||||
alphas.add( name.substring(0, 1));
|
alphas.add( name.substring(0, 1));
|
||||||
} catch (CorruptIndexException e) {
|
} catch (CorruptIndexException e) {
|
||||||
|
@ -622,7 +622,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
boolQuery.add( query, BooleanClause.Occur.MUST );
|
boolQuery.add( query, BooleanClause.Occur.MUST );
|
||||||
boolQuery.add(
|
boolQuery.add(
|
||||||
new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME_STEMMED, alpha+'*')),
|
new WildcardQuery(new Term(Entity2LuceneDoc.term.AC_NAME_STEMMED, alpha+'*')),
|
||||||
BooleanClause.Occur.MUST);
|
BooleanClause.Occur.MUST);
|
||||||
query = boolQuery;
|
query = boolQuery;
|
||||||
}
|
}
|
||||||
|
@ -683,8 +683,8 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
|
||||||
// qp.setStemmedToUnstemmed(map);
|
// qp.setStemmedToUnstemmed(map);
|
||||||
|
|
||||||
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
|
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
|
||||||
VitroLuceneTermNames.NAME_STEMMED,
|
VitroLuceneTermNames.AC_NAME_STEMMED,
|
||||||
VitroLuceneTermNames.NAME_UNSTEMMED,
|
VitroLuceneTermNames.AC_NAME_UNSTEMMED,
|
||||||
VitroLuceneTermNames.RDFTYPE,
|
VitroLuceneTermNames.RDFTYPE,
|
||||||
VitroLuceneTermNames.MONIKER,
|
VitroLuceneTermNames.MONIKER,
|
||||||
VitroLuceneTermNames.ALLTEXT,
|
VitroLuceneTermNames.ALLTEXT,
|
||||||
|
|
|
@ -12,6 +12,7 @@ import java.util.Map;
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -21,8 +22,10 @@ import org.apache.solr.client.solrj.SolrServer;
|
||||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||||
import org.apache.solr.common.SolrDocument;
|
import org.apache.solr.common.SolrDocument;
|
||||||
import org.apache.solr.common.SolrDocumentList;
|
import org.apache.solr.common.SolrDocumentList;
|
||||||
|
import org.apache.solr.core.SolrConfig;
|
||||||
import org.json.JSONArray;
|
import org.json.JSONArray;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
|
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
|
||||||
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
|
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
|
||||||
|
@ -176,7 +179,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
||||||
|
|
||||||
String stemParam = (String) request.getParameter("stem");
|
String stemParam = (String) request.getParameter("stem");
|
||||||
boolean stem = "true".equals(stemParam);
|
boolean stem = "true".equals(stemParam);
|
||||||
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
|
String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED ;
|
||||||
|
|
||||||
BooleanQuery boolQuery = new BooleanQuery();
|
BooleanQuery boolQuery = new BooleanQuery();
|
||||||
|
|
||||||
|
@ -214,9 +217,18 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
||||||
|
|
||||||
//querystr = querystr.toLowerCase();
|
//querystr = querystr.toLowerCase();
|
||||||
querystr += "*";
|
querystr += "*";
|
||||||
query = query.setQuery(querystr);
|
//query = query.setQuery(VitroLuceneTermNames.NAME_LOWERCASE + ":" + querystr);
|
||||||
// *** It's the df parameter that sets the field to search
|
//query.addFilterQuery(VitroLuceneTermNames.NAME_LOWERCASE);
|
||||||
//String field = VitroLuceneTermNames.LABEL_LOWERCASE;
|
//query.setQuery(querystr);
|
||||||
|
|
||||||
|
try {
|
||||||
|
SolrConfig config = new SolrConfig();
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
// TODO Auto-generated catch block
|
||||||
|
log.error(e, e);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,17 +73,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
public static final String CLASSLOCALNAME = "classLocalName";
|
public static final String CLASSLOCALNAME = "classLocalName";
|
||||||
|
|
||||||
// Fields derived from rdfs:label
|
// Fields derived from rdfs:label
|
||||||
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
|
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming.
|
||||||
|
* Used only in retrieval rather than search. **/
|
||||||
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
||||||
|
|
||||||
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
||||||
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
||||||
|
|
||||||
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
|
/** rdfs:label lowercased, tokenized, stop words, no stemming.
|
||||||
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
|
* Used for autocomplete matching on proper names. **/
|
||||||
|
public static String AC_NAME_UNSTEMMED = "acNameUnstemmed"; // was NAMEUNSTEMMED
|
||||||
|
|
||||||
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
|
/** rdfs:label lowercased, tokenized, stop words, stemmed.
|
||||||
public static String NAME_STEMMED = "nameStemmed"; // was NAME
|
* Used for autocomplete matching where stemming is desired (e.g., book titles) **/
|
||||||
|
public static String AC_NAME_STEMMED = "acNameStemmed"; // was NAME
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -215,11 +218,11 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
||||||
nameLowerCase.setBoost(NAME_BOOST);
|
nameLowerCase.setBoost(NAME_BOOST);
|
||||||
doc.add(nameLowerCase);
|
doc.add(nameLowerCase);
|
||||||
|
|
||||||
Field nameUnstemmed = new Field(term.NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
Field nameUnstemmed = new Field(term.AC_NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||||
nameUnstemmed.setBoost(NAME_BOOST);
|
nameUnstemmed.setBoost(NAME_BOOST);
|
||||||
doc.add(nameUnstemmed);
|
doc.add(nameUnstemmed);
|
||||||
|
|
||||||
Field nameStemmed = new Field(term.NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||||
nameStemmed.setBoost(NAME_BOOST);
|
nameStemmed.setBoost(NAME_BOOST);
|
||||||
doc.add(nameStemmed);
|
doc.add(nameStemmed);
|
||||||
|
|
||||||
|
|
|
@ -7,8 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -246,8 +246,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
||||||
|
|
||||||
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
|
||||||
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
analyzer.addAnalyzer(NAME_UNSTEMMED, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(AC_NAME_UNSTEMMED, new HtmlLowerStopAnalyzer());
|
||||||
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||||
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||||
|
|
Loading…
Add table
Reference in a new issue