From 50b159710bc3f56dbd26a203b87db4b4eae0bb35 Mon Sep 17 00:00:00 2001 From: ryounes Date: Fri, 1 Jul 2011 16:19:31 +0000 Subject: [PATCH] NIHVIVO-2459 Improvements to stemmed autocomplete matching. NIHVIVO-2801 Fix error in SolrAutocompleteController. --- solr/exampleSolr/conf/schema.xml | 6 +- solr/exampleSolr/conf/stopwords-name.txt | 38 +++++++++++ solr/exampleSolr/conf/stopwords.txt | 6 -- .../SolrIndividualListController.java | 2 +- .../vitro/webapp/dao/jena/IndividualJena.java | 6 +- .../SolrAutocompleteController.java | 68 +++++++++++++------ .../search/solr/IndividualToSolrDocument.java | 7 +- 7 files changed, 97 insertions(+), 36 deletions(-) create mode 100644 solr/exampleSolr/conf/stopwords-name.txt diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index a89fcb66c..6e5bcadce 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xml @@ -259,7 +259,7 @@ + words="stopwords-name.txt" enablePositionIncrements="true" /> + words="stopwords-name.txt" enablePositionIncrements="true" /> + words="stopwords-name.txt" enablePositionIncrements="true" /> ) doc.get(VitroSearchTermNames.NAME_RAW)).get(0); + // RY 7/1/2011 + // Comment was: VitroSearchTermNames.NAME_RAW is a multivalued field, so doc.get() returns a list. + // Changed to: VitroSearchTermNames.NAME_RAW is a multivalued field, so doc.get() could return a list + // But in fact: I'm no longer seeing any lists returned for individuals with multiple labels. Not sure + // if this is new behavior or what. ??? + Object nameRaw = doc.get(VitroSearchTermNames.NAME_RAW); + String name = null; + if (nameRaw instanceof List) { + @SuppressWarnings("unchecked") + List nameRawList = (List) nameRaw; + name = nameRawList.get(0); + } else { + name = (String) nameRaw; + } SearchResult result = new SearchResult(name, uri); results.add(result); } catch(Exception e){ - log.error("problem getting usable Individuals from search " + + log.error("problem getting usable individuals from search " + "hits" + e.getMessage()); } } @@ -195,31 +206,48 @@ public class SolrAutocompleteController extends VitroAjaxController { String acTermName = VitroSearchTermNames.AC_NAME_STEMMED; String nonAcTermName = VitroSearchTermNames.NAME_STEMMED; + String acQueryStr; if (queryStr.endsWith(" ")) { - // Solr wants whitespace to be escaped with a backslash - queryStr = queryStr.replaceAll("\\s+", "\\\\ "); - queryStr = nonAcTermName + ":" + queryStr; + acQueryStr = makeTermQuery(nonAcTermName, queryStr, true); } else { int indexOfLastWord = queryStr.lastIndexOf(" ") + 1; - String queryStr1 = queryStr.substring(0, indexOfLastWord); - String queryStr2 = queryStr.substring(indexOfLastWord); - queryStr = nonAcTermName + ":\"" + queryStr1 + "\"+" + acTermName + ":" + queryStr2; + List terms = new ArrayList(2); + + String allButLastWord = queryStr.substring(0, indexOfLastWord); + if (StringUtils.isNotBlank(allButLastWord)) { + terms.add(makeTermQuery(nonAcTermName, allButLastWord, true)); + } + + String lastWord = queryStr.substring(indexOfLastWord); + if (StringUtils.isNotBlank(lastWord)) { + terms.add(makeTermQuery(acTermName, lastWord, false)); + } + + acQueryStr = StringUtils.join(terms, " AND "); } - log.debug("Tokenized name query string = " + queryStr); - query.setQuery(queryStr); + log.debug("Tokenized name query string = " + acQueryStr); + query.setQuery(acQueryStr); } - private void setUntokenizedNameQuery(SolrQuery query, String queryStr) { - - queryStr = queryStr.trim(); - // Solr wants whitespace to be escaped with a backslash - queryStr = queryStr.replaceAll("\\s+", "\\\\ "); - queryStr = VitroSearchTermNames.AC_NAME_UNTOKENIZED + ":" + queryStr; + private void setUntokenizedNameQuery(SolrQuery query, String queryStr) { + queryStr = queryStr.trim(); + queryStr = makeTermQuery(VitroSearchTermNames.AC_NAME_UNTOKENIZED, queryStr, true); query.setQuery(queryStr); - + } + + private String makeTermQuery(String term, String queryStr, boolean mayContainWhitespace) { + if (mayContainWhitespace) { + queryStr = "\"" + escapeWhitespaceInQueryString(queryStr) + "\""; + } + return term + ":" + queryStr; + } + + private String escapeWhitespaceInQueryString(String queryStr) { + // Solr wants whitespace to be escaped with a backslash + return queryStr.replaceAll("\\s+", "\\\\ "); } private void doNoQuery(HttpServletResponse response) throws IOException { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index 58a2e5712..32ff22e75 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -178,9 +178,10 @@ public class IndividualToSolrDocument { private void addLabel(Individual ind, SolrInputDocument doc) { String value = ""; - if(ind.getRdfsLabel() != null) - value = ind.getRdfsLabel(); - else{ + String label = ind.getRdfsLabel(); + if (label != null) { + value = label; + } else { value = ind.getLocalName(); } doc.addField(term.NAME_RAW, value);