NIHVIVO-2459 Improvements to stemmed autocomplete matching. NIHVIVO-2801 Fix error in SolrAutocompleteController.
This commit is contained in:
parent
cebc368738
commit
50b159710b
7 changed files with 97 additions and 36 deletions
|
@ -259,7 +259,7 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords-name.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"
|
||||
|
@ -458,7 +458,7 @@
|
|||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords-name.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"
|
||||
|
@ -470,7 +470,7 @@
|
|||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true"
|
||||
words="stopwords.txt" enablePositionIncrements="true" />
|
||||
words="stopwords-name.txt" enablePositionIncrements="true" />
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1"
|
||||
generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0"
|
||||
|
|
38
solr/exampleSolr/conf/stopwords-name.txt
Normal file
38
solr/exampleSolr/conf/stopwords-name.txt
Normal file
|
@ -0,0 +1,38 @@
|
|||
# Standard english stop words taken from Lucene's StopAnalyzer
|
||||
# Stopwords used in autocomplete (label-matching) fields, since full list in stopwords.txt may be too inclusive. Since label includes things like
|
||||
# book titles, we want a smaller set of stopwords.
|
||||
|
||||
a
|
||||
an
|
||||
and
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
but
|
||||
by
|
||||
for
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
no
|
||||
not
|
||||
of
|
||||
on
|
||||
or
|
||||
s
|
||||
t
|
||||
that
|
||||
the
|
||||
their
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
to
|
||||
was
|
||||
will
|
||||
with
|
|
@ -130,9 +130,6 @@ our
|
|||
out
|
||||
over
|
||||
re
|
||||
said
|
||||
same
|
||||
see
|
||||
should
|
||||
since
|
||||
so
|
||||
|
@ -156,13 +153,10 @@ to
|
|||
too
|
||||
under
|
||||
up
|
||||
use
|
||||
very
|
||||
want
|
||||
was
|
||||
way
|
||||
we
|
||||
well
|
||||
were
|
||||
what
|
||||
when
|
||||
|
|
|
@ -254,7 +254,7 @@ public class SolrIndividualListController extends FreemarkerHttpServlet {
|
|||
if (individual != null) {
|
||||
individualsAdded++;
|
||||
individuals.add(individual);
|
||||
log.debug("Adding individual " + uri + " to individuals for display");
|
||||
log.debug("Adding individual " + uri + " to individual list display");
|
||||
} else {
|
||||
log.debug("No existing individual for search document with uri = " + uri);
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ public class IndividualJena extends IndividualImpl implements Individual {
|
|||
private OntResource ind = null;
|
||||
private WebappDaoFactoryJena webappDaoFactory = null;
|
||||
private Float _searchBoostJena = null;
|
||||
private boolean retreivedNullRdfsLabel = false;
|
||||
private boolean retrievedNullRdfsLabel = false;
|
||||
|
||||
public IndividualJena(OntResource ind, WebappDaoFactoryJena wadf) {
|
||||
this.ind = ind;
|
||||
|
@ -88,13 +88,13 @@ public class IndividualJena extends IndividualImpl implements Individual {
|
|||
public String getRdfsLabel() {
|
||||
if (this.rdfsLabel != null) {
|
||||
return rdfsLabel;
|
||||
} else if( this.rdfsLabel == null && retreivedNullRdfsLabel ){
|
||||
} else if( this.rdfsLabel == null && retrievedNullRdfsLabel ){
|
||||
return null;
|
||||
} else {
|
||||
ind.getOntModel().enterCriticalSection(Lock.READ);
|
||||
try {
|
||||
this.rdfsLabel = webappDaoFactory.getJenaBaseDao().getLabel(ind);
|
||||
retreivedNullRdfsLabel = this.rdfsLabel == null;
|
||||
retrievedNullRdfsLabel = this.rdfsLabel == null;
|
||||
return this.rdfsLabel;
|
||||
} finally {
|
||||
ind.getOntModel().leaveCriticalSection();
|
||||
|
|
|
@ -99,15 +99,26 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
|
||||
List<SearchResult> results = new ArrayList<SearchResult>();
|
||||
for (SolrDocument doc : docs) {
|
||||
try{
|
||||
try {
|
||||
String uri = doc.get(VitroSearchTermNames.URI).toString();
|
||||
// VitroSearchTermNames.NAME_RAW is a multivalued field, so doc.get() returns a list
|
||||
@SuppressWarnings("unchecked")
|
||||
String name = ((List<String>) doc.get(VitroSearchTermNames.NAME_RAW)).get(0);
|
||||
// RY 7/1/2011
|
||||
// Comment was: VitroSearchTermNames.NAME_RAW is a multivalued field, so doc.get() returns a list.
|
||||
// Changed to: VitroSearchTermNames.NAME_RAW is a multivalued field, so doc.get() could return a list
|
||||
// But in fact: I'm no longer seeing any lists returned for individuals with multiple labels. Not sure
|
||||
// if this is new behavior or what. ???
|
||||
Object nameRaw = doc.get(VitroSearchTermNames.NAME_RAW);
|
||||
String name = null;
|
||||
if (nameRaw instanceof List<?>) {
|
||||
@SuppressWarnings("unchecked")
|
||||
List<String> nameRawList = (List<String>) nameRaw;
|
||||
name = nameRawList.get(0);
|
||||
} else {
|
||||
name = (String) nameRaw;
|
||||
}
|
||||
SearchResult result = new SearchResult(name, uri);
|
||||
results.add(result);
|
||||
} catch(Exception e){
|
||||
log.error("problem getting usable Individuals from search " +
|
||||
log.error("problem getting usable individuals from search " +
|
||||
"hits" + e.getMessage());
|
||||
}
|
||||
}
|
||||
|
@ -195,31 +206,48 @@ public class SolrAutocompleteController extends VitroAjaxController {
|
|||
|
||||
String acTermName = VitroSearchTermNames.AC_NAME_STEMMED;
|
||||
String nonAcTermName = VitroSearchTermNames.NAME_STEMMED;
|
||||
String acQueryStr;
|
||||
|
||||
if (queryStr.endsWith(" ")) {
|
||||
// Solr wants whitespace to be escaped with a backslash
|
||||
queryStr = queryStr.replaceAll("\\s+", "\\\\ ");
|
||||
queryStr = nonAcTermName + ":" + queryStr;
|
||||
acQueryStr = makeTermQuery(nonAcTermName, queryStr, true);
|
||||
} else {
|
||||
int indexOfLastWord = queryStr.lastIndexOf(" ") + 1;
|
||||
String queryStr1 = queryStr.substring(0, indexOfLastWord);
|
||||
String queryStr2 = queryStr.substring(indexOfLastWord);
|
||||
queryStr = nonAcTermName + ":\"" + queryStr1 + "\"+" + acTermName + ":" + queryStr2;
|
||||
List<String> terms = new ArrayList<String>(2);
|
||||
|
||||
String allButLastWord = queryStr.substring(0, indexOfLastWord);
|
||||
if (StringUtils.isNotBlank(allButLastWord)) {
|
||||
terms.add(makeTermQuery(nonAcTermName, allButLastWord, true));
|
||||
}
|
||||
|
||||
String lastWord = queryStr.substring(indexOfLastWord);
|
||||
if (StringUtils.isNotBlank(lastWord)) {
|
||||
terms.add(makeTermQuery(acTermName, lastWord, false));
|
||||
}
|
||||
|
||||
acQueryStr = StringUtils.join(terms, " AND ");
|
||||
}
|
||||
|
||||
log.debug("Tokenized name query string = " + queryStr);
|
||||
query.setQuery(queryStr);
|
||||
log.debug("Tokenized name query string = " + acQueryStr);
|
||||
query.setQuery(acQueryStr);
|
||||
|
||||
}
|
||||
|
||||
private void setUntokenizedNameQuery(SolrQuery query, String queryStr) {
|
||||
|
||||
queryStr = queryStr.trim();
|
||||
// Solr wants whitespace to be escaped with a backslash
|
||||
queryStr = queryStr.replaceAll("\\s+", "\\\\ ");
|
||||
queryStr = VitroSearchTermNames.AC_NAME_UNTOKENIZED + ":" + queryStr;
|
||||
private void setUntokenizedNameQuery(SolrQuery query, String queryStr) {
|
||||
queryStr = queryStr.trim();
|
||||
queryStr = makeTermQuery(VitroSearchTermNames.AC_NAME_UNTOKENIZED, queryStr, true);
|
||||
query.setQuery(queryStr);
|
||||
|
||||
}
|
||||
|
||||
private String makeTermQuery(String term, String queryStr, boolean mayContainWhitespace) {
|
||||
if (mayContainWhitespace) {
|
||||
queryStr = "\"" + escapeWhitespaceInQueryString(queryStr) + "\"";
|
||||
}
|
||||
return term + ":" + queryStr;
|
||||
}
|
||||
|
||||
private String escapeWhitespaceInQueryString(String queryStr) {
|
||||
// Solr wants whitespace to be escaped with a backslash
|
||||
return queryStr.replaceAll("\\s+", "\\\\ ");
|
||||
}
|
||||
|
||||
private void doNoQuery(HttpServletResponse response) throws IOException {
|
||||
|
|
|
@ -178,9 +178,10 @@ public class IndividualToSolrDocument {
|
|||
|
||||
private void addLabel(Individual ind, SolrInputDocument doc) {
|
||||
String value = "";
|
||||
if(ind.getRdfsLabel() != null)
|
||||
value = ind.getRdfsLabel();
|
||||
else{
|
||||
String label = ind.getRdfsLabel();
|
||||
if (label != null) {
|
||||
value = label;
|
||||
} else {
|
||||
value = ind.getLocalName();
|
||||
}
|
||||
doc.addField(term.NAME_RAW, value);
|
||||
|
|
Loading…
Add table
Reference in a new issue