NIHVIVO-2459 Two edgeNGram field definitions for autocomplete. Untokenized autocomplete search.

This commit is contained in:
ryounes 2011-06-28 16:57:47 +00:00
parent 179d2b80d4
commit a7c271a9bd
4 changed files with 102 additions and 105 deletions

View file

@ -64,6 +64,13 @@ public class VitroSearchTermNames {
/** rdfs:label lowercased, untokenized, edge-n-gram-filtered for autocomplete on people names **/
public static String AC_NAME_UNTOKENIZED = "acNameUntokenized";
/** rdfs:label lowercased, tokenized, stop words, stemmed, edge-n-gram-filtered for autocomplete
* on non-person labels such as book titles and grant names **/
public static String AC_NAME_STEMMED = "acNameStemmed";
/* There is currently no use case for an autocomplete search field that is tokenized but not stemmed.
public static String AC_NAME_TOKENIZED = "acNameTokenized"; */
/** field for beta values of all documents **/
public static final String BETA = "BETA";
public static final String PHI = "PHI";

View file

@ -142,7 +142,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
query.setStart(0)
.setRows(DEFAULT_MAX_HIT_COUNT);
setQuery(query, queryStr, vreq);
setNameQuery(query, queryStr, vreq);
// Filter by type
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
@ -158,7 +158,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
return query;
}
private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
private void setNameQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
if (StringUtils.isBlank(queryStr)) {
log.error("No query string");
@ -171,13 +171,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
// query will not be stemmed. So we don't look at the stem parameter until we get to
// setTokenizedNameQuery().
if (tokenize) {
setTokenizedQuery(query, queryStr, request);
setTokenizedNameQuery(query, queryStr, request);
} else {
setUntokenizedQuery(query, queryStr);
setUntokenizedNameQuery(query, queryStr);
}
}
private void setTokenizedQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
private void setTokenizedNameQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
// RY 5/18/2011 For now, just doing untokenized query, due to the interactions of wildcard
// query and stemming described below. Need to find a way to do this in Solr.
@ -215,30 +215,15 @@ public class SolrAutocompleteController extends VitroAjaxController {
// log.warn(e, e);
// }
//setUntokenizedQuery(query, queryStr);
String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam);
String termName = stem ? VitroSearchTermNames.NAME_STEMMED : VitroSearchTermNames.NAME_UNSTEMMED;
// We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries
queryStr = queryStr.toLowerCase();
// Solr wants whitespace to be escaped with a backslash
// Better: replace \s+
queryStr = queryStr.replaceAll(" ", "\\\\ ");
queryStr = termName + ":" + queryStr + "*";
query.setQuery(queryStr);
setUntokenizedNameQuery(query, queryStr);
}
private void setUntokenizedQuery(SolrQuery query, String queryStr) {
private void setUntokenizedNameQuery(SolrQuery query, String queryStr) {
// We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries
queryStr = queryStr.toLowerCase();
// Solr wants whitespace to be escaped with a backslash
// Better: replace \s+
queryStr = queryStr.replaceAll(" ", "\\\\ ");
queryStr = VitroSearchTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
queryStr = queryStr.replaceAll("\\s+", "\\\\ ");
queryStr = VitroSearchTermNames.AC_NAME_UNTOKENIZED + ":" + queryStr;
query.setQuery(queryStr);
}

View file

@ -175,47 +175,50 @@ public class IndividualToSolrDocument {
String t=null;
addUri = new StringBuffer();
addUri.append("");
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
continue;
try {
objectNames.append(" ");
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
addUri.append(" ");
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
} catch (Exception e) {
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
continue;
try {
objectNames.append(" ");
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
addUri.append(" ");
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
}
}
}
if(documentModifiers == null || documentModifiers.isEmpty()){
if(documentModifiers == null || documentModifiers.isEmpty()){
doc.addField(term.NAME_RAW, value, NAME_BOOST);
doc.addField(term.NAME_LOWERCASE, value, NAME_BOOST);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
doc.addField(term.NAME_UNSTEMMED, value, NAME_BOOST);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
doc.addField(term.AC_NAME_UNTOKENIZED, value);
}else{
doc.addField(term.AC_NAME_STEMMED, value);
}else{
doc.addField(term.NAME_RAW, value);
doc.addField(term.NAME_LOWERCASE, value);
doc.addField(term.NAME_UNSTEMMED, value);
doc.addField(term.NAME_STEMMED, value);
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
doc.addField(term.AC_NAME_UNTOKENIZED, value);
}
doc.addField(term.AC_NAME_UNTOKENIZED, value);
doc.addField(term.AC_NAME_STEMMED, value);
}
long tMoniker = System.currentTimeMillis();
if(documentModifiers == null || documentModifiers.isEmpty()){
//boost for entity
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0)
doc.setDocumentBoost(ind.getSearchBoost());
//boost for entity
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0) {
doc.setDocumentBoost(ind.getSearchBoost());
}
}
//thumbnail