From ad98e7723c8ad6b72d62b3d7bd4988e4f927b6f7 Mon Sep 17 00:00:00 2001 From: ryounes Date: Mon, 16 May 2011 19:16:47 +0000 Subject: [PATCH] NIHVIVO-2459 Solr field definitions for autocomplete --- solr/exampleSolr/conf/schema.xml | 26 ++++++++++++++----- .../controller/JSONReconcileServlet.java | 2 +- .../controller/AutocompleteController.java | 2 +- .../controller/PagedSearchController.java | 10 +++---- .../SolrAutocompleteController.java | 20 +++++++++++--- .../search/lucene/Entity2LuceneDoc.java | 17 +++++++----- .../webapp/search/lucene/LuceneSetup.java | 8 +++--- 7 files changed, 56 insertions(+), 29 deletions(-) diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index de35f58f7..310278693 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xml @@ -224,7 +224,7 @@ words="stopwords.txt" enablePositionIncrements="true" /> - + @@ -242,6 +242,20 @@ + + + + + + + + + @@ -423,8 +437,6 @@ - - @@ -434,10 +446,10 @@ - - - - + + + + diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONReconcileServlet.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONReconcileServlet.java index fe4b9f63b..5f9827518 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONReconcileServlet.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONReconcileServlet.java @@ -377,7 +377,7 @@ public class JSONReconcileServlet extends VitroHttpServlet { String stemParam = (String) request.getParameter("stem"); boolean stem = "true".equals(stemParam); - String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED; + String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED; BooleanQuery boolQuery = new BooleanQuery(); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java index aa37476ab..2e83e231f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java @@ -207,7 +207,7 @@ public class AutocompleteController extends VitroAjaxController { String stemParam = (String) request.getParameter("stem"); boolean stem = "true".equals(stemParam); - String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED; + String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED; BooleanQuery boolQuery = new BooleanQuery(); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java index 4aa6a5756..482267dd8 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java @@ -229,7 +229,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear Document document = searcherForRequest.doc(scoreDoc.doc); Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc); - log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED) + " score: " +scoreDoc.score); + log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED) + " score: " +scoreDoc.score); log.debug("Scoring of the doc explained " + explanation.toString()); log.debug("Explanation's description "+ explanation.getDescription()); log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT)); @@ -405,7 +405,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear Document doc; try { doc = searcher.doc(topDocs.scoreDocs[i].doc); - String name =doc.get(Entity2LuceneDoc.term.NAME_STEMMED); + String name =doc.get(Entity2LuceneDoc.term.AC_NAME_STEMMED); if( name != null && name.length() > 0) alphas.add( name.substring(0, 1)); } catch (CorruptIndexException e) { @@ -622,7 +622,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add( query, BooleanClause.Occur.MUST ); boolQuery.add( - new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME_STEMMED, alpha+'*')), + new WildcardQuery(new Term(Entity2LuceneDoc.term.AC_NAME_STEMMED, alpha+'*')), BooleanClause.Occur.MUST); query = boolQuery; } @@ -683,8 +683,8 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear // qp.setStemmedToUnstemmed(map); MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{ - VitroLuceneTermNames.NAME_STEMMED, - VitroLuceneTermNames.NAME_UNSTEMMED, + VitroLuceneTermNames.AC_NAME_STEMMED, + VitroLuceneTermNames.AC_NAME_UNSTEMMED, VitroLuceneTermNames.RDFTYPE, VitroLuceneTermNames.MONIKER, VitroLuceneTermNames.ALLTEXT, diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrAutocompleteController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrAutocompleteController.java index 677244105..bed61a8f9 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrAutocompleteController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrAutocompleteController.java @@ -12,6 +12,7 @@ import java.util.Map; import javax.servlet.ServletException; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import javax.xml.parsers.ParserConfigurationException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -21,8 +22,10 @@ import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; +import org.apache.solr.core.SolrConfig; import org.json.JSONArray; import org.json.JSONObject; +import org.xml.sax.SAXException; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers; @@ -176,7 +179,7 @@ public class SolrAutocompleteController extends VitroAjaxController { String stemParam = (String) request.getParameter("stem"); boolean stem = "true".equals(stemParam); - String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED; + String termName = stem ? VitroLuceneTermNames.AC_NAME_STEMMED : VitroLuceneTermNames.AC_NAME_UNSTEMMED ; BooleanQuery boolQuery = new BooleanQuery(); @@ -214,9 +217,18 @@ public class SolrAutocompleteController extends VitroAjaxController { //querystr = querystr.toLowerCase(); querystr += "*"; - query = query.setQuery(querystr); - // *** It's the df parameter that sets the field to search - //String field = VitroLuceneTermNames.LABEL_LOWERCASE; + //query = query.setQuery(VitroLuceneTermNames.NAME_LOWERCASE + ":" + querystr); + //query.addFilterQuery(VitroLuceneTermNames.NAME_LOWERCASE); + //query.setQuery(querystr); + + try { + SolrConfig config = new SolrConfig(); + + } catch (Exception e) { + // TODO Auto-generated catch block + log.error(e, e); + return null; + } return query; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index 637542f20..30831f197 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -73,17 +73,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{ public static final String CLASSLOCALNAME = "classLocalName"; // Fields derived from rdfs:label - /** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/ + /** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming. + * Used only in retrieval rather than search. **/ public static String NAME_RAW = "nameRaw"; // was NAMERAW /** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/ public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE - /** rdfs:label lowercased, tokenized, stop words, no stemming **/ - public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED + /** rdfs:label lowercased, tokenized, stop words, no stemming. + * Used for autocomplete matching on proper names. **/ + public static String AC_NAME_UNSTEMMED = "acNameUnstemmed"; // was NAMEUNSTEMMED - /** rdfs:label lowercased, tokenized, stop words, stemmed **/ - public static String NAME_STEMMED = "nameStemmed"; // was NAME + /** rdfs:label lowercased, tokenized, stop words, stemmed. + * Used for autocomplete matching where stemming is desired (e.g., book titles) **/ + public static String AC_NAME_STEMMED = "acNameStemmed"; // was NAME } @@ -215,11 +218,11 @@ public class Entity2LuceneDoc implements Obj2DocIface{ nameLowerCase.setBoost(NAME_BOOST); doc.add(nameLowerCase); - Field nameUnstemmed = new Field(term.NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED); + Field nameUnstemmed = new Field(term.AC_NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED); nameUnstemmed.setBoost(NAME_BOOST); doc.add(nameUnstemmed); - Field nameStemmed = new Field(term.NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED); + Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED); nameStemmed.setBoost(NAME_BOOST); doc.add(nameStemmed); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java index 94411f132..149fa4914 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java @@ -7,8 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER; -import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED; -import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED; +import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED; +import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE; import java.io.File; @@ -246,8 +246,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer()); - analyzer.addAnalyzer(NAME_UNSTEMMED, new HtmlLowerStopAnalyzer()); - analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer()); + analyzer.addAnalyzer(AC_NAME_UNSTEMMED, new HtmlLowerStopAnalyzer()); + analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());