NIHVIVO-2459 Two edgeNGram field definitions for autocomplete. Untokenized autocomplete search.
This commit is contained in:
parent
179d2b80d4
commit
a7c271a9bd
4 changed files with 102 additions and 105 deletions
|
@ -64,6 +64,13 @@ public class VitroSearchTermNames {
|
|||
/** rdfs:label lowercased, untokenized, edge-n-gram-filtered for autocomplete on people names **/
|
||||
public static String AC_NAME_UNTOKENIZED = "acNameUntokenized";
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, stemmed, edge-n-gram-filtered for autocomplete
|
||||
* on non-person labels such as book titles and grant names **/
|
||||
public static String AC_NAME_STEMMED = "acNameStemmed";
|
||||
|
||||
/* There is currently no use case for an autocomplete search field that is tokenized but not stemmed.
|
||||
public static String AC_NAME_TOKENIZED = "acNameTokenized"; */
|
||||
|
||||
/** field for beta values of all documents **/
|
||||
public static final String BETA = "BETA";
|
||||
public static final String PHI = "PHI";
|
||||
|
|
|
@ -142,7 +142,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
query.setStart(0)
|
||||
.setRows(DEFAULT_MAX_HIT_COUNT);
|
||||
|
||||
setQuery(query, queryStr, vreq);
|
||||
setNameQuery(query, queryStr, vreq);
|
||||
|
||||
// Filter by type
|
||||
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
|
||||
|
@ -158,7 +158,7 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
return query;
|
||||
}
|
||||
|
||||
private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||
private void setNameQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||
|
||||
if (StringUtils.isBlank(queryStr)) {
|
||||
log.error("No query string");
|
||||
|
@ -171,13 +171,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
// query will not be stemmed. So we don't look at the stem parameter until we get to
|
||||
// setTokenizedNameQuery().
|
||||
if (tokenize) {
|
||||
setTokenizedQuery(query, queryStr, request);
|
||||
setTokenizedNameQuery(query, queryStr, request);
|
||||
} else {
|
||||
setUntokenizedQuery(query, queryStr);
|
||||
setUntokenizedNameQuery(query, queryStr);
|
||||
}
|
||||
}
|
||||
|
||||
private void setTokenizedQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||
private void setTokenizedNameQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
|
||||
|
||||
// RY 5/18/2011 For now, just doing untokenized query, due to the interactions of wildcard
|
||||
// query and stemming described below. Need to find a way to do this in Solr.
|
||||
|
@ -215,30 +215,15 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
// log.warn(e, e);
|
||||
// }
|
||||
|
||||
//setUntokenizedQuery(query, queryStr);
|
||||
|
||||
String stemParam = (String) request.getParameter("stem");
|
||||
boolean stem = "true".equals(stemParam);
|
||||
String termName = stem ? VitroSearchTermNames.NAME_STEMMED : VitroSearchTermNames.NAME_UNSTEMMED;
|
||||
|
||||
// We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries
|
||||
queryStr = queryStr.toLowerCase();
|
||||
// Solr wants whitespace to be escaped with a backslash
|
||||
// Better: replace \s+
|
||||
queryStr = queryStr.replaceAll(" ", "\\\\ ");
|
||||
queryStr = termName + ":" + queryStr + "*";
|
||||
query.setQuery(queryStr);
|
||||
|
||||
setUntokenizedNameQuery(query, queryStr);
|
||||
}
|
||||
|
||||
private void setUntokenizedQuery(SolrQuery query, String queryStr) {
|
||||
private void setUntokenizedNameQuery(SolrQuery query, String queryStr) {
|
||||
|
||||
// We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries
|
||||
queryStr = queryStr.toLowerCase();
|
||||
// Solr wants whitespace to be escaped with a backslash
|
||||
// Better: replace \s+
|
||||
queryStr = queryStr.replaceAll(" ", "\\\\ ");
|
||||
queryStr = VitroSearchTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
|
||||
queryStr = queryStr.replaceAll("\\s+", "\\\\ ");
|
||||
queryStr = VitroSearchTermNames.AC_NAME_UNTOKENIZED + ":" + queryStr;
|
||||
query.setQuery(queryStr);
|
||||
|
||||
}
|
||||
|
|
|
@ -175,47 +175,50 @@ public class IndividualToSolrDocument {
|
|||
String t=null;
|
||||
addUri = new StringBuffer();
|
||||
addUri.append("");
|
||||
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
|
||||
if (objectPropertyStatements != null) {
|
||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||
while (objectPropertyStmtIter.hasNext()) {
|
||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
||||
continue;
|
||||
try {
|
||||
objectNames.append(" ");
|
||||
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
|
||||
addUri.append(" ");
|
||||
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
|
||||
} catch (Exception e) {
|
||||
List<ObjectPropertyStatement> objectPropertyStatements = ind.getObjectPropertyStatements();
|
||||
if (objectPropertyStatements != null) {
|
||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||
while (objectPropertyStmtIter.hasNext()) {
|
||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
||||
continue;
|
||||
try {
|
||||
objectNames.append(" ");
|
||||
objectNames.append(((t=objectPropertyStmt.getObject().getName()) == null)?"":t);
|
||||
addUri.append(" ");
|
||||
addUri.append(((t=objectPropertyStmt.getObject().getURI()) == null)?"":t);
|
||||
} catch (Exception e) {
|
||||
log.debug("could not index name of related object: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(documentModifiers == null || documentModifiers.isEmpty()){
|
||||
if(documentModifiers == null || documentModifiers.isEmpty()){
|
||||
doc.addField(term.NAME_RAW, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_LOWERCASE, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
|
||||
doc.addField(term.NAME_UNSTEMMED, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
|
||||
doc.addField(term.AC_NAME_UNTOKENIZED, value);
|
||||
}else{
|
||||
doc.addField(term.AC_NAME_STEMMED, value);
|
||||
}else{
|
||||
doc.addField(term.NAME_RAW, value);
|
||||
doc.addField(term.NAME_LOWERCASE, value);
|
||||
doc.addField(term.NAME_UNSTEMMED, value);
|
||||
doc.addField(term.NAME_STEMMED, value);
|
||||
doc.addField(term.NAME_PHONETIC, value, PHONETIC_BOOST);
|
||||
doc.addField(term.AC_NAME_UNTOKENIZED, value);
|
||||
}
|
||||
doc.addField(term.AC_NAME_UNTOKENIZED, value);
|
||||
doc.addField(term.AC_NAME_STEMMED, value);
|
||||
}
|
||||
|
||||
|
||||
long tMoniker = System.currentTimeMillis();
|
||||
|
||||
if(documentModifiers == null || documentModifiers.isEmpty()){
|
||||
//boost for entity
|
||||
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0)
|
||||
doc.setDocumentBoost(ind.getSearchBoost());
|
||||
//boost for entity
|
||||
if(ind.getSearchBoost() != null && ind.getSearchBoost() != 0) {
|
||||
doc.setDocumentBoost(ind.getSearchBoost());
|
||||
}
|
||||
}
|
||||
|
||||
//thumbnail
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue