NIHVIVO-2459 Define edgeNGram field for autocomplete on people names. NIHVIVO-2437 Refactoring in search controllers to remove outdated references to Lucene.

This commit is contained in:
ryounes 2011-06-27 20:54:15 +00:00
parent cfcc73d83a
commit 15f5fba80b
13 changed files with 155 additions and 107 deletions

View file

@ -258,15 +258,35 @@
<!-- Like text, but without synonyms and stemming. Good for autocomplete matching of proper names, where we want to remove <!-- Like text, but without synonyms and stemming. Good for autocomplete matching of proper names, where we want to remove
stop words but not stem. --> stop words but not stem. -->
<fieldType name="textUnstemmed" class="solr.TextField" positionIncrementGap="100"> <fieldType name="text_unstemmed" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory" <filter class="solr.StopFilterFactory"
ignoreCase="true" ignoreCase="true"
words="stopwords.txt" words="stopwords.txt"
enablePositionIncrements="true" enablePositionIncrements="true" />
/> <filter class="solr.WordDelimiterFilterFactory"
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/> generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="stopwords.txt"
enablePositionIncrements="true" />
<!-- <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1"
generateNumberParts="1"
catenateWords="0"
catenateNumbers="0"
catenateAll="0"
splitOnCaseChange="1"/> -->
<filter class="solr.LowerCaseFilterFactory"/> <filter class="solr.LowerCaseFilterFactory"/>
</analyzer> </analyzer>
</fieldType> </fieldType>
@ -444,6 +464,18 @@
--> -->
<fieldtype name="geohash" class="solr.GeoHashField"/> <fieldtype name="geohash" class="solr.GeoHashField"/>
<fieldtype name="edgengram_untokenized" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
<filter class="solr.EdgeNGramFilterFactory" minGramSize="2" maxGramSize="25" side="front"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldtype>
</types> </types>
@ -489,8 +521,14 @@
<field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/> <field name="nameLowercase" type="lowercase" indexed="true" stored="true" multiValued="true"/>
<!-- A sortable version of nameLowercase --> <!-- A sortable version of nameLowercase -->
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" /> <field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
<field name="nameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/> <field name="nameUnstemmed" type="text_unstemmed" indexed="true" stored="false" multiValued="true"/>
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/> <field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- Untokenized autocomplete on name (suitable for person names) -->
<field name="acNameUntokenized" type="edgengram_untokenized" indexed="true" stored="false" multiValued="true" />
<!--
<field name="acNameTokenized" type="edgengram_tokenized" indexed="true" stored="false" multiValued="true" />
<field name="acNameStemmed" type="edgengram_stemmed" indexed="true" stored="false" multiValued="true" />
-->
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/> <field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/> <field name="NAME_PHONETIC" type ="phonetic" indexed="true" stored="false" multiValued="true"/>

View file

@ -709,7 +709,7 @@
<lst name="defaults"> <lst name="defaults">
<str name="defType">edismax</str> <str name="defType">edismax</str>
<str name="qf">nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo</str> <str name="qf">nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo</str>
<str name="pf">targetInfo nameStemmed nameUnstemmed</str> <str name="pf">targetInfo</str>
<str name="echoParams">explicit</str> <str name="echoParams">explicit</str>
<str name="ps">2</str> <str name="ps">2</str>
<str name="qs">2</str> <str name="qs">2</str>

View file

@ -21,7 +21,7 @@ import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup; import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
public class IndividualListRdfController extends VitroHttpServlet { public class IndividualListRdfController extends VitroHttpServlet {
@ -35,13 +35,13 @@ public class IndividualListRdfController extends VitroHttpServlet {
// Make the query // Make the query
String vclassUri = req.getParameter("vclass"); String vclassUri = req.getParameter("vclass");
String queryStr = VitroLuceneTermNames.RDFTYPE + ":\"" + vclassUri + "\""; String queryStr = VitroSearchTermNames.RDFTYPE + ":\"" + vclassUri + "\"";
SolrQuery query = new SolrQuery(queryStr); SolrQuery query = new SolrQuery(queryStr);
query.setStart(0) query.setStart(0)
.setRows(ENTITY_LIST_CONTROLLER_MAX_RESULTS) .setRows(ENTITY_LIST_CONTROLLER_MAX_RESULTS)
.setFields(VitroLuceneTermNames.URI); .setFields(VitroSearchTermNames.URI);
// For now, we're only displaying the url, so no need to sort. // For now, we're only displaying the url, so no need to sort.
//.setSortField(VitroLuceneTermNames.NAME_LOWERCASE_SINGLE_VALUED); //.setSortField(VitroSearchTermNames.NAME_LOWERCASE_SINGLE_VALUED);
// Execute the query // Execute the query
SolrServer solr = SolrSetup.getSolrServer(getServletContext()); SolrServer solr = SolrSetup.getSolrServer(getServletContext());
@ -65,7 +65,7 @@ public class IndividualListRdfController extends VitroHttpServlet {
Model model = ModelFactory.createDefaultModel(); Model model = ModelFactory.createDefaultModel();
for (SolrDocument doc : docs) { for (SolrDocument doc : docs) {
String uri = doc.get(VitroLuceneTermNames.URI).toString(); String uri = doc.get(VitroSearchTermNames.URI).toString();
Resource resource = ResourceFactory.createResource(uri); Resource resource = ResourceFactory.createResource(uri);
RDFNode node = (RDFNode) ResourceFactory.createResource(vclassUri); RDFNode node = (RDFNode) ResourceFactory.createResource(vclassUri);
model.add(resource, RDF.type, node); model.add(resource, RDF.type, node);

View file

@ -39,8 +39,8 @@ import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties; import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao; import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao;
import edu.cornell.mannlib.vitro.webapp.search.SearchException; import edu.cornell.mannlib.vitro.webapp.search.SearchException;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
@ -359,7 +359,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) {
String termName = VitroLuceneTermNames.NAME_STEMMED; String termName = VitroSearchTermNames.NAME_STEMMED;
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
@ -425,7 +425,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
if (typeParam != null) { if (typeParam != null) {
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add( new TermQuery( boolQuery.add( new TermQuery(
new Term(VitroLuceneTermNames.RDFTYPE, new Term(VitroSearchTermNames.RDFTYPE,
typeParam)), typeParam)),
BooleanClause.Occur.MUST); BooleanClause.Occur.MUST);
boolQuery.add(query, BooleanClause.Occur.MUST); boolQuery.add(query, BooleanClause.Occur.MUST);
@ -440,7 +440,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
if ( ! StringUtils.isEmpty(pvPair[0]) ) { if ( ! StringUtils.isEmpty(pvPair[0]) ) {
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(new TermQuery(new Term( boolQuery.add(new TermQuery(new Term(
VitroLuceneTermNames.RDFTYPE, pvPair[0])), VitroSearchTermNames.RDFTYPE, pvPair[0])),
BooleanClause.Occur.MUST); BooleanClause.Occur.MUST);
boolQuery.add(extraQuery, BooleanClause.Occur.MUST); boolQuery.add(extraQuery, BooleanClause.Occur.MUST);
extraQuery = boolQuery; extraQuery = boolQuery;

View file

@ -5,10 +5,8 @@ package edu.cornell.mannlib.vitro.webapp.controller;
import java.io.IOException; import java.io.IOException;
import java.io.PrintWriter; import java.io.PrintWriter;
import java.io.Writer; import java.io.Writer;
import java.net.URLEncoder;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Enumeration;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.ListIterator; import java.util.ListIterator;
@ -34,11 +32,8 @@ import edu.cornell.mannlib.vitro.webapp.beans.DataProperty;
import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup; import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.SolrIndividualListController.PageRecord;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.SolrIndividualListController; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.SolrIndividualListController;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.dao.jena.VClassGroupCache; import edu.cornell.mannlib.vitro.webapp.dao.jena.VClassGroupCache;
import edu.cornell.mannlib.vitro.webapp.edit.n3editing.configuration.EditConfiguration; import edu.cornell.mannlib.vitro.webapp.edit.n3editing.configuration.EditConfiguration;
@ -220,7 +215,6 @@ public class SolrJsonServlet extends VitroHttpServlet {
throw new Exception("parameter vclassId URI parameter expected "); throw new Exception("parameter vclassId URI parameter expected ");
} }
vclassIds = Arrays.asList(vitroClassIdStr); vclassIds = Arrays.asList(vitroClassIdStr);
//rObj = getLuceneIndividualsByVClass(vclass.getURI(),req, getServletContext());
rObj = getSolrIndividualsByVClasses(vclassIds,req, getServletContext()); rObj = getSolrIndividualsByVClasses(vclassIds,req, getServletContext());
}catch(Exception ex){ }catch(Exception ex){
errorMessage = ex.toString(); errorMessage = ex.toString();
@ -271,7 +265,7 @@ public class SolrJsonServlet extends VitroHttpServlet {
vreq.getWebappDaoFactory().getIndividualDao(), vreq.getWebappDaoFactory().getIndividualDao(),
context); context);
} catch(Exception ex) { } catch(Exception ex) {
log.error("Error in retrieval of Lucene results for VClass " + vclassURIs.toString(), ex); log.error("Error in retrieval of search results for VClass " + vclassURIs.toString(), ex);
} }
return map; return map;

View file

@ -15,8 +15,6 @@ import javax.servlet.ServletException;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.search.Query;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
@ -31,7 +29,7 @@ import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.Exc
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues;
import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao; import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup; import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individual.ListedIndividualTemplateModel; import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individual.ListedIndividualTemplateModel;
import freemarker.ext.beans.BeansWrapper; import freemarker.ext.beans.BeansWrapper;
@ -166,7 +164,7 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
//Pulling out common code that is used for both single (regular) vclass query and multiple (intersection) query //Pulling out common code that is used for both single (regular) vclass query and multiple (intersection) query
public static Map<String,Object> getResultsForVClasses(List<String> vclassURIs, int page, String alpha, IndividualDao indDao, ServletContext context) public static Map<String,Object> getResultsForVClasses(List<String> vclassURIs, int page, String alpha, IndividualDao indDao, ServletContext context)
throws CorruptIndexException, IOException, ServletException{ throws IOException, ServletException{
Map<String,Object> rvMap = new HashMap<String,Object>(); Map<String,Object> rvMap = new HashMap<String,Object>();
try{ try{
SolrQuery query = getQuery(vclassURIs, alpha, page); SolrQuery query = getQuery(vclassURIs, alpha, page);
@ -181,10 +179,10 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
} }
public static Map<String,Object> getResultsForVClass(String vclassURI, int page, String alpha, IndividualDao indDao, ServletContext context) public static Map<String,Object> getResultsForVClass(String vclassURI, int page, String alpha, IndividualDao indDao, ServletContext context)
throws CorruptIndexException, IOException, ServletException{ throws IOException, ServletException{
Map<String,Object> rvMap = new HashMap<String,Object>(); Map<String,Object> rvMap = new HashMap<String,Object>();
try{ try{
//make lucene query for this rdf:type //make query for this rdf:type
List<String> classUris = new ArrayList<String>(); List<String> classUris = new ArrayList<String>();
classUris.add(vclassURI); classUris.add(vclassURI);
SolrQuery query = getQuery(classUris, alpha, page); SolrQuery query = getQuery(classUris, alpha, page);
@ -199,11 +197,10 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
} }
public static Map<String,Object> getResultsForVClassIntersections(List<String> vclassURIs, int page, String alpha, IndividualDao indDao, ServletContext context) public static Map<String,Object> getResultsForVClassIntersections(List<String> vclassURIs, int page, String alpha, IndividualDao indDao, ServletContext context)
throws CorruptIndexException, IOException, ServletException{ throws IOException, ServletException{
Map<String,Object> rvMap = new HashMap<String,Object>(); Map<String,Object> rvMap = new HashMap<String,Object>();
try{ try{
//make lucene query for multiple rdf types // make query for multiple rdf types
//change to solr
SolrQuery query = getQuery(vclassURIs, alpha, page); SolrQuery query = getQuery(vclassURIs, alpha, page);
//get results corresponding to this query //get results corresponding to this query
rvMap = getResultsForVClassQuery(query, page, alpha, indDao, context); rvMap = getResultsForVClassQuery(query, page, alpha, indDao, context);
@ -221,7 +218,7 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
* into a DAO or similar object. * into a DAO or similar object.
*/ */
public static Map<String,Object> getResultsForVClassQuery(SolrQuery query, int page, String alpha, IndividualDao indDao, ServletContext context) public static Map<String,Object> getResultsForVClassQuery(SolrQuery query, int page, String alpha, IndividualDao indDao, ServletContext context)
throws CorruptIndexException, IOException, ServletException { throws IOException, ServletException {
Map<String,Object> rvMap = new HashMap<String,Object>(); Map<String,Object> rvMap = new HashMap<String,Object>();
SolrServer solr = SolrSetup.getSolrServer(context); SolrServer solr = SolrSetup.getSolrServer(context);
QueryResponse response = null; QueryResponse response = null;
@ -249,7 +246,7 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
List<Individual> individuals = new ArrayList<Individual>(); List<Individual> individuals = new ArrayList<Individual>();
for (SolrDocument doc : docs) { for (SolrDocument doc : docs) {
String uri = doc.get(VitroLuceneTermNames.URI).toString(); String uri = doc.get(VitroSearchTermNames.URI).toString();
Individual individual = indDao.getIndividualByURI( uri ); Individual individual = indDao.getIndividualByURI( uri );
if (individual != null) { if (individual != null) {
individuals.add(individual); individuals.add(individual);
@ -286,7 +283,7 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
try{ try{
//query term for rdf:type - multiple types possible //query term for rdf:type - multiple types possible
for(String vclassUri: vclassUris) { for(String vclassUri: vclassUris) {
queryTypes.add(VitroLuceneTermNames.RDFTYPE + ":\"" + vclassUri + "\" "); queryTypes.add(VitroSearchTermNames.RDFTYPE + ":\"" + vclassUri + "\" ");
} }
if(queryTypes.size() > 1) { if(queryTypes.size() > 1) {
@ -299,14 +296,14 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
// Add alpha filter if it is needed // Add alpha filter if it is needed
if ( alpha != null && !"".equals(alpha) && alpha.length() == 1) { if ( alpha != null && !"".equals(alpha) && alpha.length() == 1) {
queryText += VitroLuceneTermNames.NAME_LOWERCASE + ":" + alpha.toLowerCase() + "*"; queryText += VitroSearchTermNames.NAME_LOWERCASE + ":" + alpha.toLowerCase() + "*";
} }
SolrQuery query = new SolrQuery(queryText); SolrQuery query = new SolrQuery(queryText);
log.debug("Query text is " + queryText); log.debug("Query text is " + queryText);
int start = (page-1)*INDIVIDUALS_PER_PAGE; int start = (page-1)*INDIVIDUALS_PER_PAGE;
query.setStart(start) query.setStart(start)
.setRows(INDIVIDUALS_PER_PAGE) .setRows(INDIVIDUALS_PER_PAGE)
.setSortField(VitroLuceneTermNames.NAME_LOWERCASE_SINGLE_VALUED, SolrQuery.ORDER.asc); .setSortField(VitroSearchTermNames.NAME_LOWERCASE_SINGLE_VALUED, SolrQuery.ORDER.asc);
return query; return query;
} catch (Exception ex){ } catch (Exception ex){
log.error(ex,ex); log.error(ex,ex);

View file

@ -2,7 +2,7 @@
package edu.cornell.mannlib.vitro.webapp.search; package edu.cornell.mannlib.vitro.webapp.search;
public class VitroTermNames { public class VitroSearchTermNames {
/** Id of entity, vclass or tab */ /** Id of entity, vclass or tab */
public static String URI = "URI"; public static String URI = "URI";
/** lucene document id */ /** lucene document id */
@ -46,16 +46,23 @@ public class VitroTermNames {
// Fields derived from rdfs:label // Fields derived from rdfs:label
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/ /** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
public static String NAME_RAW = "nameRaw"; // was NAMERAW public static String NAME_RAW = "nameRaw"; //
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/ /** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE public static String NAME_LOWERCASE = "nameLowercase"; //
/** Same as NAME_LOWERCASE, but single-valued so it's sortable. **/
// RY Need to control how indexing selects which of multiple values to copy.
public static String NAME_LOWERCASE_SINGLE_VALUED = "nameLowercaseSingleValued";
/** rdfs:label lowercased, tokenized, stop words, no stemming **/ /** rdfs:label lowercased, tokenized, stop words, no stemming **/
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED public static String NAME_UNSTEMMED = "nameUnstemmed";
/** rdfs:label lowercased, tokenized, stop words, stemmed **/ /** rdfs:label lowercased, tokenized, stop words, stemmed **/
public static String NAME_STEMMED = "nameStemmed"; // was NAME public static String NAME_STEMMED = "nameStemmed";
/** rdfs:label lowercased, untokenized, edge-n-gram-filtered for autocomplete on people names **/
public static String AC_NAME_UNTOKENIZED = "acNameUntokenized";
/** field for beta values of all documents **/ /** field for beta values of all documents **/
public static final String BETA = "BETA"; public static final String BETA = "BETA";

View file

@ -61,7 +61,7 @@ public class IndexController extends FreemarkerHttpServlet {
return new ExceptionResponseValues(Template.ERROR_MESSAGE.toString(), body, e); return new ExceptionResponseValues(Template.ERROR_MESSAGE.toString(), body, e);
} }
body.put("message","Rebuilding of index started."); body.put("message","Rebuild of search index started. A message will be written to the vivo log when indexing is complete.");
return new TemplateResponseValues(Template.MESSAGE.toString(), body); return new TemplateResponseValues(Template.MESSAGE.toString(), body);
} }
} }

View file

@ -16,11 +16,9 @@ import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.search.BooleanQuery;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrDocumentList;
import org.json.JSONArray; import org.json.JSONArray;
@ -30,7 +28,7 @@ import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController; import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup; import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
/** /**
@ -102,10 +100,10 @@ public class SolrAutocompleteController extends VitroAjaxController {
List<SearchResult> results = new ArrayList<SearchResult>(); List<SearchResult> results = new ArrayList<SearchResult>();
for (SolrDocument doc : docs) { for (SolrDocument doc : docs) {
try{ try{
String uri = doc.get(VitroLuceneTermNames.URI).toString(); String uri = doc.get(VitroSearchTermNames.URI).toString();
// VitroLuceneTermNames.NAME_RAW is a multivalued field, so doc.get() returns a list // VitroSearchTermNames.NAME_RAW is a multivalued field, so doc.get() returns a list
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
String name = ((List<String>) doc.get(VitroLuceneTermNames.NAME_RAW)).get(0); String name = ((List<String>) doc.get(VitroSearchTermNames.NAME_RAW)).get(0);
SearchResult result = new SearchResult(name, uri); SearchResult result = new SearchResult(name, uri);
results.add(result); results.add(result);
} catch(Exception e){ } catch(Exception e){
@ -149,13 +147,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
// Filter by type // Filter by type
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE); String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if (typeParam != null) { if (typeParam != null) {
query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\""); query.addFilterQuery(VitroSearchTermNames.RDFTYPE + ":\"" + typeParam + "\"");
} }
query.setFields(VitroLuceneTermNames.NAME_RAW, VitroLuceneTermNames.URI); // fields to retrieve query.setFields(VitroSearchTermNames.NAME_RAW, VitroSearchTermNames.URI); // fields to retrieve
// Can't sort on multivalued field, so we sort the results in Java when we get them. // Can't sort on multivalued field, so we sort the results in Java when we get them.
// query.setSortField(VitroLuceneTermNames.NAME_LOWERCASE, SolrQuery.ORDER.asc); // query.setSortField(VitroSearchTermNames.NAME_LOWERCASE, SolrQuery.ORDER.asc);
return query; return query;
} }
@ -188,7 +186,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
// String stemParam = (String) request.getParameter("stem"); // String stemParam = (String) request.getParameter("stem");
// boolean stem = "true".equals(stemParam); // boolean stem = "true".equals(stemParam);
// String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED ; // String termName = stem ? VitroSearchTermNames.AC_NAME_STEMMED : VitroSearchTermNames.AC_NAME_UNSTEMMED ;
// // Use the query parser to analyze the search term the same way the indexed text was analyzed. // // Use the query parser to analyze the search term the same way the indexed text was analyzed.
// // For example, text is lowercased, and function words are stripped out. // // For example, text is lowercased, and function words are stripped out.
@ -217,7 +215,20 @@ public class SolrAutocompleteController extends VitroAjaxController {
// log.warn(e, e); // log.warn(e, e);
// } // }
setUntokenizedQuery(query, queryStr); //setUntokenizedQuery(query, queryStr);
String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam);
String termName = stem ? VitroSearchTermNames.NAME_STEMMED : VitroSearchTermNames.NAME_UNSTEMMED;
// We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries
queryStr = queryStr.toLowerCase();
// Solr wants whitespace to be escaped with a backslash
// Better: replace \s+
queryStr = queryStr.replaceAll(" ", "\\\\ ");
queryStr = termName + ":" + queryStr + "*";
query.setQuery(queryStr);
} }
private void setUntokenizedQuery(SolrQuery query, String queryStr) { private void setUntokenizedQuery(SolrQuery query, String queryStr) {
@ -227,7 +238,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
// Solr wants whitespace to be escaped with a backslash // Solr wants whitespace to be escaped with a backslash
// Better: replace \s+ // Better: replace \s+
queryStr = queryStr.replaceAll(" ", "\\\\ "); queryStr = queryStr.replaceAll(" ", "\\\\ ");
queryStr = VitroLuceneTermNames.NAME_LOWERCASE + ":" + queryStr + "*"; queryStr = VitroSearchTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
query.setQuery(queryStr); query.setQuery(queryStr);
} }

View file

@ -45,11 +45,10 @@ import edu.cornell.mannlib.vitro.webapp.dao.VClassDao;
import edu.cornell.mannlib.vitro.webapp.dao.VClassGroupDao; import edu.cornell.mannlib.vitro.webapp.dao.VClassGroupDao;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.SearchException; import edu.cornell.mannlib.vitro.webapp.search.SearchException;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter; import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery; import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup; import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel; import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
@ -230,7 +229,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
for(int i = startIndex; i < lastHitToShow; i++){ for(int i = startIndex; i < lastHitToShow; i++){
try { try {
SolrDocument doc = docs.get(i); SolrDocument doc = docs.get(i);
String uri = doc.get(VitroLuceneTermNames.URI).toString(); String uri = doc.get(VitroSearchTermNames.URI).toString();
log.debug("Retrieving individual with uri "+ uri); log.debug("Retrieving individual with uri "+ uri);
Individual ent = new IndividualImpl(); Individual ent = new IndividualImpl();
ent.setURI(uri); ent.setURI(uri);
@ -351,7 +350,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
for(int i = 0; i < maxHits && n > grpsFound ;i++){ for(int i = 0; i < maxHits && n > grpsFound ;i++){
try{ try{
SolrDocument doc = docs.get(i); SolrDocument doc = docs.get(i);
Collection<Object> grps = doc.getFieldValues(VitroTermNames.CLASSGROUP_URI); Collection<Object> grps = doc.getFieldValues(VitroSearchTermNames.CLASSGROUP_URI);
if (grps != null) { if (grps != null) {
for (Object o : grps) { for (Object o : grps) {
String groupUri = o.toString(); String groupUri = o.toString();
@ -418,7 +417,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
HashSet<String> typesInHits = new HashSet<String>(); HashSet<String> typesInHits = new HashSet<String>();
for (SolrDocument doc : docs) { for (SolrDocument doc : docs) {
try { try {
Collection<Object> types = doc.getFieldValues(VitroLuceneTermNames.RDFTYPE); Collection<Object> types = doc.getFieldValues(VitroSearchTermNames.RDFTYPE);
if (types != null) { if (types != null) {
for (Object o : types) { for (Object o : types) {
String typeUri = o.toString(); String typeUri = o.toString();
@ -446,7 +445,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
if ( ! StringUtils.isBlank(classgroupParam) ) { if ( ! StringUtils.isBlank(classgroupParam) ) {
log.debug("Firing classgroup query "); log.debug("Firing classgroup query ");
log.debug("request.getParameter(classgroup) is "+ classgroupParam); log.debug("request.getParameter(classgroup) is "+ classgroupParam);
query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + classgroupParam + "\""); query.addFilterQuery(VitroSearchTermNames.CLASSGROUP_URI + ":\"" + classgroupParam + "\"");
} }
// rdf:type filtering // rdf:type filtering
@ -454,7 +453,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
if ( ! StringUtils.isBlank(typeParam) ) { if ( ! StringUtils.isBlank(typeParam) ) {
log.debug("Firing type query "); log.debug("Firing type query ");
log.debug("request.getParameter(type) is "+ typeParam); log.debug("request.getParameter(type) is "+ typeParam);
query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\""); query.addFilterQuery(VitroSearchTermNames.RDFTYPE + ":\"" + typeParam + "\"");
} }
//query.setQuery(queryText); //query.setQuery(queryText);

View file

@ -36,7 +36,7 @@ import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModel;
import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
public class CalculateParameters implements DocumentModifier { public class CalculateParameters implements DocumentModifier {
@ -67,15 +67,15 @@ public class CalculateParameters implements DocumentModifier {
private static Log log = LogFactory.getLog(CalculateParameters.class); private static Log log = LogFactory.getLog(CalculateParameters.class);
private static final String[] fieldsToAddBetaTo = { private static final String[] fieldsToAddBetaTo = {
VitroTermNames.NAME_RAW, VitroSearchTermNames.NAME_RAW,
VitroTermNames.NAME_LOWERCASE, VitroSearchTermNames.NAME_LOWERCASE,
VitroTermNames.NAME_UNSTEMMED, VitroSearchTermNames.NAME_UNSTEMMED,
VitroTermNames.NAME_STEMMED VitroSearchTermNames.NAME_STEMMED
}; };
private static final String[] fieldsToMultiplyBetaBy = { private static final String[] fieldsToMultiplyBetaBy = {
VitroTermNames.ALLTEXT, VitroSearchTermNames.ALLTEXT,
VitroTermNames.ALLTEXTUNSTEMMED, VitroSearchTermNames.ALLTEXTUNSTEMMED,
}; };
public CalculateParameters(Dataset dataset){ public CalculateParameters(Dataset dataset){
@ -298,7 +298,7 @@ public class CalculateParameters implements DocumentModifier {
f.addValue(info.toString(),getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST); f.addValue(info.toString(),getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);
} }
SolrInputField f = doc.getField(VitroTermNames.targetInfo); SolrInputField f = doc.getField(VitroSearchTermNames.targetInfo);
f.addValue(adjInfo[1],f.getBoost()); f.addValue(adjInfo[1],f.getBoost());
doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST); doc.setDocumentBoost(getBeta(uri)*phi*IndividualToSolrDocument.ALL_TEXT_BOOST);

View file

@ -26,7 +26,7 @@ import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.shared.Lock; import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
public class ContextNodeFields implements DocumentModifier{ public class ContextNodeFields implements DocumentModifier{
@ -107,8 +107,8 @@ public class ContextNodeFields implements DocumentModifier{
log.debug("retrieving context node values.."); log.debug("retrieving context node values..");
SolrInputField field = doc.getField(VitroTermNames.ALLTEXT); SolrInputField field = doc.getField(VitroSearchTermNames.ALLTEXT);
SolrInputField targetField = doc.getField(VitroTermNames.targetInfo); SolrInputField targetField = doc.getField(VitroSearchTermNames.targetInfo);
StringBuffer objectProperties = new StringBuffer(); StringBuffer objectProperties = new StringBuffer();

View file

@ -22,7 +22,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.search.beans.ClassProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ClassProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
@ -30,7 +30,7 @@ public class IndividualToSolrDocument {
public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName()); public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
public static VitroTermNames term = new VitroTermNames(); public static VitroSearchTermNames term = new VitroSearchTermNames();
private static String entClassName = Individual.class.getName(); private static String entClassName = Individual.class.getName();
@ -195,16 +195,18 @@ public class IndividualToSolrDocument {
if(documentModifiers == null || documentModifiers.isEmpty()){ if(documentModifiers == null || documentModifiers.isEmpty()){
doc.addField(term.NAME_RAW, value, NAME_BOOST); doc.addField(term.NAME_RAW, value, NAME_BOOST);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST); doc.addField(term.NAME_LOWERCASE, value, NAME_BOOST);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST); doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST); doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST); doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
doc.addField(term.AC_NAME_UNTOKENIZED, value);
}else{ }else{
doc.addField(term.NAME_RAW, value); doc.addField(term.NAME_RAW, value);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase()); doc.addField(term.NAME_LOWERCASE, value);
doc.addField(term.NAME_UNSTEMMED, value); doc.addField(term.NAME_UNSTEMMED, value);
doc.addField(term.NAME_STEMMED, value); doc.addField(term.NAME_STEMMED, value);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST); doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
doc.addField(term.AC_NAME_UNTOKENIZED, value);
} }
@ -289,7 +291,7 @@ public class IndividualToSolrDocument {
Individual ent = null; Individual ent = null;
if( result != null && result instanceof Document){ if( result != null && result instanceof Document){
Document hit = (Document) result; Document hit = (Document) result;
String id = hit.get(term.URI); String id = hit.get(VitroSearchTermNames.URI);
ent = new IndividualImpl(); ent = new IndividualImpl();
ent.setURI(id); ent.setURI(id);
} }
@ -297,29 +299,29 @@ public class IndividualToSolrDocument {
} }
private void fillContextNodes(){ private void fillContextNodes(){
this.contextNodeClassNames.add("Role"); IndividualToSolrDocument.contextNodeClassNames.add("Role");
this.contextNodeClassNames.add("AttendeeRole"); IndividualToSolrDocument.contextNodeClassNames.add("AttendeeRole");
this.contextNodeClassNames.add("ClinicalRole"); IndividualToSolrDocument.contextNodeClassNames.add("ClinicalRole");
this.contextNodeClassNames.add("LeaderRole"); IndividualToSolrDocument.contextNodeClassNames.add("LeaderRole");
this.contextNodeClassNames.add("MemberRole"); IndividualToSolrDocument.contextNodeClassNames.add("MemberRole");
this.contextNodeClassNames.add("OutreachProviderRole"); IndividualToSolrDocument.contextNodeClassNames.add("OutreachProviderRole");
this.contextNodeClassNames.add("PresenterRole"); IndividualToSolrDocument.contextNodeClassNames.add("PresenterRole");
this.contextNodeClassNames.add("ResearcherRole"); IndividualToSolrDocument.contextNodeClassNames.add("ResearcherRole");
this.contextNodeClassNames.add("InvestigatorRole"); IndividualToSolrDocument.contextNodeClassNames.add("InvestigatorRole");
this.contextNodeClassNames.add("CoPrincipalInvestigatorRole"); IndividualToSolrDocument.contextNodeClassNames.add("CoPrincipalInvestigatorRole");
this.contextNodeClassNames.add("PrincipalInvestigatorRole"); IndividualToSolrDocument.contextNodeClassNames.add("PrincipalInvestigatorRole");
this.contextNodeClassNames.add("ServiceProviderRole"); IndividualToSolrDocument.contextNodeClassNames.add("ServiceProviderRole");
this.contextNodeClassNames.add("TeacherRole"); IndividualToSolrDocument.contextNodeClassNames.add("TeacherRole");
this.contextNodeClassNames.add("Position"); IndividualToSolrDocument.contextNodeClassNames.add("Position");
this.contextNodeClassNames.add("FacultyAdministrativePosition"); IndividualToSolrDocument.contextNodeClassNames.add("FacultyAdministrativePosition");
this.contextNodeClassNames.add("FacultyPosition"); IndividualToSolrDocument.contextNodeClassNames.add("FacultyPosition");
this.contextNodeClassNames.add("LibrarianPosition"); IndividualToSolrDocument.contextNodeClassNames.add("LibrarianPosition");
this.contextNodeClassNames.add("Non-AcademicPosition"); IndividualToSolrDocument.contextNodeClassNames.add("Non-AcademicPosition");
this.contextNodeClassNames.add("Non-FacultyAcademicPosition"); IndividualToSolrDocument.contextNodeClassNames.add("Non-FacultyAcademicPosition");
this.contextNodeClassNames.add("PostdoctoralPosition"); IndividualToSolrDocument.contextNodeClassNames.add("PostdoctoralPosition");
this.contextNodeClassNames.add("AdvisingRelationship"); IndividualToSolrDocument.contextNodeClassNames.add("AdvisingRelationship");
this.contextNodeClassNames.add("Authorship"); IndividualToSolrDocument.contextNodeClassNames.add("Authorship");
this.contextNodeClassNames.add("AcademicDegree"); IndividualToSolrDocument.contextNodeClassNames.add("AcademicDegree");
} }