diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index 4438e571a..9b69f2189 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xml @@ -486,6 +486,10 @@ + diff --git a/webapp/config/example.deploy.properties b/webapp/config/example.deploy.properties index 2818cb7f6..9148818b3 100644 --- a/webapp/config/example.deploy.properties +++ b/webapp/config/example.deploy.properties @@ -38,10 +38,11 @@ webapp.name = vitro vitro.home.directory = /usr/local/vitro/data # -# SMTP host which the "Contact Us" form can use to send mail. If this is left -# empty, the "Contact Us" form will be disabled. +# SMTP host used to send email, and the email recipient. +# If these are left empty, email is disabled. # -Vitro.smtpHost = +email.smtpHost = appsmtp.mail.cornell.edu +email.replyTo = rjy7@cornell.edu # # The basic parameters for a MySQL database connection. Change the end of the diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonReconcileServlet.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonReconcileServlet.java index 62f86e445..7c802d054 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonReconcileServlet.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonReconcileServlet.java @@ -15,6 +15,7 @@ import javax.servlet.ServletOutputStream; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; @@ -29,7 +30,6 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.util.Version; import org.json.JSONArray; import org.json.JSONException; @@ -53,7 +53,8 @@ import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup; */ public class SolrJsonReconcileServlet extends VitroHttpServlet { - private static String QUERY_PARAMETER_NAME = "term"; + private static final long serialVersionUID = 1L; + private static String QUERY_PARAMETER_NAME = "term"; public static final int MAX_QUERY_LENGTH = 500; private static final Log log = LogFactory.getLog(SolrJsonReconcileServlet.class.getName()); @@ -70,14 +71,14 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { super.doGet(req, resp); resp.setContentType("application/json"); VitroRequest vreq = new VitroRequest(req); - System.out.println("vreq"); - System.out.println(vreq.getWebappDaoFactory()); + log.debug("vreq"); + log.debug(vreq.getWebappDaoFactory()); try { if (vreq.getParameter("query") != null || vreq.getParameter("queries") != null) { JSONObject qJson = getResult(vreq, req, resp); - System.out.println("result: " + qJson.toString()); + log.debug("result: " + qJson.toString()); String responseStr = (vreq.getParameter("callback") == null) ? qJson .toString() : vreq.getParameter("callback") + "(" + qJson.toString() + ")"; @@ -124,8 +125,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { // "q2":{"query":"Dina","type":"http://xmlns.com/foaf/0.1/Person","type_strict":"should"}} String qStr = (String) qObj; queries.add(qStr); - System.out.println(); - System.out.println("query: " + qStr + "\n"); + log.debug("\nquery: " + qStr + "\n"); } try { @@ -158,7 +158,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { } } } catch (JSONException ex) { - System.err.println("JSONReconcileServlet JSONException: " + ex); + log.error("JSONException: " + ex); throw new ServletException("JSONReconcileServlet JSONException: " + ex); } @@ -327,15 +327,15 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { } } catch (JSONException ex) { - System.err.println("JSONReconcileServlet JSONException: " + ex); + log.error("JSONException: " + ex); throw new ServletException("JSONReconcileServlet JSONException: " + ex); } catch (SearchException ex) { - System.err.println("JSONReconcileServlet SearchException: " + ex); + log.error("SearchException: " + ex); throw new ServletException("JSONReconcileServlet SearchException: " + ex); } catch (IOException ex) { - System.err.println("JSONReconcileServlet IOException: " + ex); + log.error("IOException: " + ex); throw new ServletException("JSONReconcileServlet IOException: " + ex); } @@ -354,30 +354,12 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { private Query makeReconcileNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { - /* Original code - String tokenizeParam = (String) request.getParameter("tokenize"); - boolean tokenize = "true".equals(tokenizeParam); - - // Note: Stemming is only relevant if we are tokenizing: an untokenized name - // query will not be stemmed. So we don't look at the stem parameter until we get to - // makeTokenizedNameQuery(). - if (tokenize) { - return makeTokenizedNameQuery(querystr, analyzer, request); - } else { - return makeUntokenizedNameQuery(querystr); - } - */ - - // modified code for reconciliation service - request.setAttribute("stem", true); return makeTokenizedNameQuery(querystr, analyzer, request); } private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { - - String stemParam = (String) request.getParameter("stem"); - boolean stem = "true".equals(stemParam); - String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED; + + String termName = VitroLuceneTermNames.NAME_STEMMED; BooleanQuery boolQuery = new BooleanQuery(); @@ -407,23 +389,9 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { } catch (ParseException e) { log.warn(e, e); } - - + return boolQuery; } - - private Query makeUntokenizedNameQuery(String querystr) { - - querystr = querystr.toLowerCase(); - String termName = VitroLuceneTermNames.NAME_LOWERCASE; - BooleanQuery query = new BooleanQuery(); - log.debug("Adding wildcard query on unanalyzed name"); - query.add( - new WildcardQuery(new Term(termName, querystr + "*")), - BooleanClause.Occur.MUST); - - return query; - } private QueryParser getQueryParser(String searchField, Analyzer analyzer){ // searchField indicates which field to search against when there is no term @@ -450,10 +418,8 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { "query length is " + MAX_QUERY_LENGTH ); return null; } - - + query = makeReconcileNameQuery(querystr, analyzer, request); - // filter by type if (typeParam != null) { @@ -471,7 +437,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet { while (it.hasNext()) { String[] pvPair = it.next(); Query extraQuery = makeReconcileNameQuery(pvPair[1], analyzer, request); - if (!"".equals(pvPair[0]) && pvPair[0] != null) { + if ( ! StringUtils.isEmpty(pvPair[0]) ) { BooleanQuery boolQuery = new BooleanQuery(); boolQuery.add(new TermQuery(new Term( VitroLuceneTermNames.RDFTYPE, pvPair[0])), diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonServlet.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonServlet.java index 5ed7fb989..405e7ae4f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonServlet.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/SolrJsonServlet.java @@ -53,7 +53,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; public class SolrJsonServlet extends VitroHttpServlet { private static final long serialVersionUID = 1L; - private static final Log log = LogFactory.getLog(SolrJsonServlet.class.getName()); + private static final Log log = LogFactory.getLog(SolrJsonServlet.class); private static final int REPLY_SIZE = 256; @Override diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrAutocompleteController.java.txt b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrAutocompleteController.java.txt new file mode 100644 index 000000000..95cab465e --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrAutocompleteController.java.txt @@ -0,0 +1,310 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.search.controller; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import org.apache.commons.lang.StringUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.lucene.search.BooleanQuery; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServer; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.client.solrj.response.TermsResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrDocumentList; +import org.json.JSONArray; +import org.json.JSONObject; + +import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions; +import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers; +import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; +import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController; +import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; +import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup; + +/** + * AutocompleteController generates autocomplete content + * through a Solr search. + */ + +// RY Rename to AutocompleteController once the transition to Solr is complete. +public class SolrAutocompleteController extends VitroAjaxController { + + private static final long serialVersionUID = 1L; + private static final Log log = LogFactory.getLog(SolrAutocompleteController.class); + + //private static final String TEMPLATE_DEFAULT = "autocompleteResults.ftl"; + + private static final String PARAM_QUERY = "term"; + private static final String PARAM_RDFTYPE = "type"; + + String NORESULT_MSG = ""; + private static final int DEFAULT_MAX_HIT_COUNT = 1000; + + public static final int MAX_QUERY_LENGTH = 500; + + @Override + protected Actions requiredActions(VitroRequest vreq) { + return new Actions(new UseBasicAjaxControllers()); + } + + @Override + protected void doRequest(VitroRequest vreq, HttpServletResponse response) + throws IOException, ServletException { + + try { + + String qtxt = vreq.getParameter(PARAM_QUERY); + + SolrQuery query = getQuery(qtxt, vreq); + if (query == null ) { + log.debug("query for '" + qtxt +"' is null."); + doNoQuery(response); + return; + } + log.debug("query for '" + qtxt +"' is " + query.toString()); + + SolrServer solr = SolrSetup.getSolrServer(getServletContext()); + QueryResponse queryResponse = solr.query(query); + + if ( queryResponse == null) { + log.error("Query response for a search was null"); + doNoSearchResults(response); + return; + } + + SolrDocumentList docs = queryResponse.getResults(); + + if ( docs == null) { + log.error("Docs for a search was null"); + doNoSearchResults(response); + return; + } + + long hitCount = docs.getNumFound(); + log.debug("Number of hits = " + hitCount); + if ( hitCount < 1 ) { + doNoSearchResults(response); + return; + } + + List results = new ArrayList(); + for (SolrDocument doc : docs) { + try{ + String uri = doc.get(VitroLuceneTermNames.URI).toString(); + // VitroLuceneTermNames.NAME_RAW is a multivalued field, so doc.get() returns a list + @SuppressWarnings("unchecked") + String name = ((List) doc.get(VitroLuceneTermNames.NAME_RAW)).get(0); + SearchResult result = new SearchResult(name, uri); + results.add(result); + } catch(Exception e){ + log.error("problem getting usable Individuals from search " + + "hits" + e.getMessage()); + } + } + + Collections.sort(results); + + // map.put("results", results); + // writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response); + + JSONArray jsonArray = new JSONArray(); + for (SearchResult result : results) { + jsonArray.put(result.toMap()); + } + response.getWriter().write(jsonArray.toString()); + + } catch (Throwable e) { + log.error(e, e); + doSearchError(response); + } + } + + private SolrQuery getQuery(String queryStr, VitroRequest vreq) { + + if ( queryStr == null) { + log.error("There was no parameter '"+ PARAM_QUERY + +"' in the request."); + return null; + } else if( queryStr.length() > MAX_QUERY_LENGTH ) { + log.debug("The search was too long. The maximum " + + "query length is " + MAX_QUERY_LENGTH ); + return null; + } + + SolrQuery query = new SolrQuery(); + query.setStart(0) + .setRows(DEFAULT_MAX_HIT_COUNT); + + setQuery(query, queryStr, vreq); + + // Filter by type + String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE); + if (typeParam != null) { + query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\""); + } + + query.setFields(VitroLuceneTermNames.NAME_RAW, VitroLuceneTermNames.URI); // fields to retrieve + + // Can't sort on multivalued field, so sort results in Java when we get them + // query.setSortField(VitroLuceneTermNames.NAME_LOWERCASE, SolrQuery.ORDER.asc); + + return query; + } + + private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) { + + if (StringUtils.isBlank(queryStr)) { + log.error("No query string"); + } + + String tokenizeParam = (String) request.getParameter("tokenize"); + boolean tokenize = "true".equals(tokenizeParam); + + if (tokenize) { + // Stemming is only relevant if we are tokenizing: an untokenized name + // query will not be stemmed. + String stemParam = (String) request.getParameter("stem"); + boolean stem = "true".equals(stemParam); + if (stem) { + setStemmedQuery(query, queryStr); + } else { + setTokenizedQuery(query, queryStr); + } + } else { + setUntokenizedQuery(query, queryStr); + } + } + + private void setStemmedQuery(SolrQuery query, String queryStr) { + + String termName = VitroLuceneTermNames.NAME_STEMMED; + } + + private void setTokenizedQuery(SolrQuery query, String queryStr) { + + // RY 5/18/2011 For now, just doing untokenized query, due to the interactions of wildcard + // query and stemming described below. Need to find a way to do this in Solr. + // Should take the same approach if we can figure out how to do a disjunction. + // Probably just add an explicit "OR" between the terms. + + + // We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries + queryStr = queryStr.toLowerCase(); + // Solr wants whitespace to be escaped with a backslash + // Better: replace \s+ + queryStr = queryStr.replaceAll(" ", "\\\\ "); + queryStr = VitroLuceneTermNames.NAME_UNSTEMMED + ":" + queryStr + "*"; + query.setQuery(queryStr); + +// // Use the query parser to analyze the search term the same way the indexed text was analyzed. +// // For example, text is lowercased, and function words are stripped out. +// QueryParser parser = getQueryParser(termName); +// +// // The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match +// // "tales", which is indexed as "tale", while query term name:tales does. Obviously we need +// // the wildcard for name:tal*, so the only way to get them all to match is use a disjunction +// // of wildcard and non-wildcard queries. The query will have only an implicit disjunction +// // operator: e.g., +(name:tales name:tales*) +// try { +// log.debug("Adding non-wildcard query for " + querystr); +// Query query = parser.parse(querystr); +// boolQuery.add(query, BooleanClause.Occur.SHOULD); +// +// // Prevent ParseException here when adding * after a space. +// // If there's a space at the end, we don't need the wildcard query. +// if (! querystr.endsWith(" ")) { +// log.debug("Adding wildcard query for " + querystr); +// Query wildcardQuery = parser.parse(querystr + "*"); +// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD); +// } +// +// log.debug("Name query is: " + boolQuery.toString()); +// } catch (ParseException e) { +// log.warn(e, e); +// } + + setUntokenizedQuery(query, queryStr); + } + + private void setUntokenizedQuery(SolrQuery query, String queryStr) { + + // We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries + queryStr = queryStr.toLowerCase(); + // Solr wants whitespace to be escaped with a backslash + // Better: replace \s+ + queryStr = queryStr.replaceAll(" ", "\\\\ "); + queryStr = VitroLuceneTermNames.NAME_LOWERCASE + ":" + queryStr + "*"; + query.setQuery(queryStr); + + } + + private void doNoQuery(HttpServletResponse response) throws IOException { + // For now, we are not sending an error message back to the client because + // with the default autocomplete configuration it chokes. + doNoSearchResults(response); + } + + private void doSearchError(HttpServletResponse response) throws IOException { + // For now, we are not sending an error message back to the client because + // with the default autocomplete configuration it chokes. + doNoSearchResults(response); + } + + private void doNoSearchResults(HttpServletResponse response) throws IOException { + response.getWriter().write("[]"); + } + + public class SearchResult implements Comparable { + private String label; + private String uri; + + SearchResult(String label, String uri) { + this.label = label; + this.uri = uri; + } + + public String getLabel() { + return label; + } + + public String getJsonLabel() { + return JSONObject.quote(label); + } + + public String getUri() { + return uri; + } + + public String getJsonUri() { + return JSONObject.quote(uri); + } + + Map toMap() { + Map map = new HashMap(); + map.put("label", label); + map.put("uri", uri); + return map; + } + + public int compareTo(Object o) throws ClassCastException { + if ( !(o instanceof SearchResult) ) { + throw new ClassCastException("Error in SearchResult.compareTo(): expected SearchResult object."); + } + SearchResult sr = (SearchResult) o; + return label.compareToIgnoreCase(sr.getLabel()); + } + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrPagedSearchController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrPagedSearchController.java index 4b7e4e44c..6b92dbea2 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrPagedSearchController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrPagedSearchController.java @@ -336,7 +336,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet { /** - * Get the class groups represented for the individuals in the topDocs. + * Get the class groups represented for the individuals in the documents. */ private List getClassGroups(VClassGroupDao grpDao, SolrDocumentList docs) { LinkedHashMap grpMap = grpDao.getClassGroupMap();