NIHVIVO-2459, NIHVIVO-2643 Solr versions of autocomplete and json reconcile

This commit is contained in:
ryounes 2011-05-31 14:58:40 +00:00
parent 04edace976
commit 4c93736902
6 changed files with 336 additions and 55 deletions

View file

@ -486,6 +486,10 @@
<field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" /> <field name="nameLowercaseSingleValued" type="lowercase" indexed="true" stored="false" multiValued="false" />
<field name="nameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/> <field name="nameUnstemmed" type="textUnstemmed" indexed="true" stored="false" multiValued="true"/>
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/> <field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<!--
<field name="prefixNameUnstemmed" type="prefixTextUnstemmed" indexed="true" stored="false" multiValued="true"/>
<field name="prefixNameStemmed" type="prefixText" indexed="true" stored="false" multiValued="true"/>
-->
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/> <field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>

View file

@ -38,10 +38,11 @@ webapp.name = vitro
vitro.home.directory = /usr/local/vitro/data vitro.home.directory = /usr/local/vitro/data
# #
# SMTP host which the "Contact Us" form can use to send mail. If this is left # SMTP host used to send email, and the email recipient.
# empty, the "Contact Us" form will be disabled. # If these are left empty, email is disabled.
# #
Vitro.smtpHost = email.smtpHost = appsmtp.mail.cornell.edu
email.replyTo = rjy7@cornell.edu
# #
# The basic parameters for a MySQL database connection. Change the end of the # The basic parameters for a MySQL database connection. Change the end of the

View file

@ -15,6 +15,7 @@ import javax.servlet.ServletOutputStream;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
@ -29,7 +30,6 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONException; import org.json.JSONException;
@ -53,7 +53,8 @@ import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
*/ */
public class SolrJsonReconcileServlet extends VitroHttpServlet { public class SolrJsonReconcileServlet extends VitroHttpServlet {
private static String QUERY_PARAMETER_NAME = "term"; private static final long serialVersionUID = 1L;
private static String QUERY_PARAMETER_NAME = "term";
public static final int MAX_QUERY_LENGTH = 500; public static final int MAX_QUERY_LENGTH = 500;
private static final Log log = LogFactory.getLog(SolrJsonReconcileServlet.class.getName()); private static final Log log = LogFactory.getLog(SolrJsonReconcileServlet.class.getName());
@ -70,14 +71,14 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
super.doGet(req, resp); super.doGet(req, resp);
resp.setContentType("application/json"); resp.setContentType("application/json");
VitroRequest vreq = new VitroRequest(req); VitroRequest vreq = new VitroRequest(req);
System.out.println("vreq"); log.debug("vreq");
System.out.println(vreq.getWebappDaoFactory()); log.debug(vreq.getWebappDaoFactory());
try { try {
if (vreq.getParameter("query") != null if (vreq.getParameter("query") != null
|| vreq.getParameter("queries") != null) { || vreq.getParameter("queries") != null) {
JSONObject qJson = getResult(vreq, req, resp); JSONObject qJson = getResult(vreq, req, resp);
System.out.println("result: " + qJson.toString()); log.debug("result: " + qJson.toString());
String responseStr = (vreq.getParameter("callback") == null) ? qJson String responseStr = (vreq.getParameter("callback") == null) ? qJson
.toString() : vreq.getParameter("callback") + "(" .toString() : vreq.getParameter("callback") + "("
+ qJson.toString() + ")"; + qJson.toString() + ")";
@ -124,8 +125,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
// "q2":{"query":"Dina","type":"http://xmlns.com/foaf/0.1/Person","type_strict":"should"}} // "q2":{"query":"Dina","type":"http://xmlns.com/foaf/0.1/Person","type_strict":"should"}}
String qStr = (String) qObj; String qStr = (String) qObj;
queries.add(qStr); queries.add(qStr);
System.out.println(); log.debug("\nquery: " + qStr + "\n");
System.out.println("query: " + qStr + "\n");
} }
try { try {
@ -158,7 +158,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
} }
} }
} catch (JSONException ex) { } catch (JSONException ex) {
System.err.println("JSONReconcileServlet JSONException: " + ex); log.error("JSONException: " + ex);
throw new ServletException("JSONReconcileServlet JSONException: " throw new ServletException("JSONReconcileServlet JSONException: "
+ ex); + ex);
} }
@ -327,15 +327,15 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
} }
} catch (JSONException ex) { } catch (JSONException ex) {
System.err.println("JSONReconcileServlet JSONException: " + ex); log.error("JSONException: " + ex);
throw new ServletException("JSONReconcileServlet JSONException: " throw new ServletException("JSONReconcileServlet JSONException: "
+ ex); + ex);
} catch (SearchException ex) { } catch (SearchException ex) {
System.err.println("JSONReconcileServlet SearchException: " + ex); log.error("SearchException: " + ex);
throw new ServletException("JSONReconcileServlet SearchException: " throw new ServletException("JSONReconcileServlet SearchException: "
+ ex); + ex);
} catch (IOException ex) { } catch (IOException ex) {
System.err.println("JSONReconcileServlet IOException: " + ex); log.error("IOException: " + ex);
throw new ServletException("JSONReconcileServlet IOException: " throw new ServletException("JSONReconcileServlet IOException: "
+ ex); + ex);
} }
@ -354,30 +354,12 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
private Query makeReconcileNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { private Query makeReconcileNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) {
/* Original code
String tokenizeParam = (String) request.getParameter("tokenize");
boolean tokenize = "true".equals(tokenizeParam);
// Note: Stemming is only relevant if we are tokenizing: an untokenized name
// query will not be stemmed. So we don't look at the stem parameter until we get to
// makeTokenizedNameQuery().
if (tokenize) {
return makeTokenizedNameQuery(querystr, analyzer, request);
} else {
return makeUntokenizedNameQuery(querystr);
}
*/
// modified code for reconciliation service
request.setAttribute("stem", true);
return makeTokenizedNameQuery(querystr, analyzer, request); return makeTokenizedNameQuery(querystr, analyzer, request);
} }
private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) {
String stemParam = (String) request.getParameter("stem"); String termName = VitroLuceneTermNames.NAME_STEMMED;
boolean stem = "true".equals(stemParam);
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
@ -408,23 +390,9 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
log.warn(e, e); log.warn(e, e);
} }
return boolQuery; return boolQuery;
} }
private Query makeUntokenizedNameQuery(String querystr) {
querystr = querystr.toLowerCase();
String termName = VitroLuceneTermNames.NAME_LOWERCASE;
BooleanQuery query = new BooleanQuery();
log.debug("Adding wildcard query on unanalyzed name");
query.add(
new WildcardQuery(new Term(termName, querystr + "*")),
BooleanClause.Occur.MUST);
return query;
}
private QueryParser getQueryParser(String searchField, Analyzer analyzer){ private QueryParser getQueryParser(String searchField, Analyzer analyzer){
// searchField indicates which field to search against when there is no term // searchField indicates which field to search against when there is no term
// indicated in the query string. // indicated in the query string.
@ -451,10 +419,8 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
return null; return null;
} }
query = makeReconcileNameQuery(querystr, analyzer, request); query = makeReconcileNameQuery(querystr, analyzer, request);
// filter by type // filter by type
if (typeParam != null) { if (typeParam != null) {
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
@ -471,7 +437,7 @@ public class SolrJsonReconcileServlet extends VitroHttpServlet {
while (it.hasNext()) { while (it.hasNext()) {
String[] pvPair = it.next(); String[] pvPair = it.next();
Query extraQuery = makeReconcileNameQuery(pvPair[1], analyzer, request); Query extraQuery = makeReconcileNameQuery(pvPair[1], analyzer, request);
if (!"".equals(pvPair[0]) && pvPair[0] != null) { if ( ! StringUtils.isEmpty(pvPair[0]) ) {
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add(new TermQuery(new Term( boolQuery.add(new TermQuery(new Term(
VitroLuceneTermNames.RDFTYPE, pvPair[0])), VitroLuceneTermNames.RDFTYPE, pvPair[0])),

View file

@ -53,7 +53,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
public class SolrJsonServlet extends VitroHttpServlet { public class SolrJsonServlet extends VitroHttpServlet {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private static final Log log = LogFactory.getLog(SolrJsonServlet.class.getName()); private static final Log log = LogFactory.getLog(SolrJsonServlet.class);
private static final int REPLY_SIZE = 256; private static final int REPLY_SIZE = 256;
@Override @Override

View file

@ -0,0 +1,310 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.controller;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.search.BooleanQuery;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.json.JSONArray;
import org.json.JSONObject;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
/**
* AutocompleteController generates autocomplete content
* through a Solr search.
*/
// RY Rename to AutocompleteController once the transition to Solr is complete.
public class SolrAutocompleteController extends VitroAjaxController {
private static final long serialVersionUID = 1L;
private static final Log log = LogFactory.getLog(SolrAutocompleteController.class);
//private static final String TEMPLATE_DEFAULT = "autocompleteResults.ftl";
private static final String PARAM_QUERY = "term";
private static final String PARAM_RDFTYPE = "type";
String NORESULT_MSG = "";
private static final int DEFAULT_MAX_HIT_COUNT = 1000;
public static final int MAX_QUERY_LENGTH = 500;
@Override
protected Actions requiredActions(VitroRequest vreq) {
return new Actions(new UseBasicAjaxControllers());
}
@Override
protected void doRequest(VitroRequest vreq, HttpServletResponse response)
throws IOException, ServletException {
try {
String qtxt = vreq.getParameter(PARAM_QUERY);
SolrQuery query = getQuery(qtxt, vreq);
if (query == null ) {
log.debug("query for '" + qtxt +"' is null.");
doNoQuery(response);
return;
}
log.debug("query for '" + qtxt +"' is " + query.toString());
SolrServer solr = SolrSetup.getSolrServer(getServletContext());
QueryResponse queryResponse = solr.query(query);
if ( queryResponse == null) {
log.error("Query response for a search was null");
doNoSearchResults(response);
return;
}
SolrDocumentList docs = queryResponse.getResults();
if ( docs == null) {
log.error("Docs for a search was null");
doNoSearchResults(response);
return;
}
long hitCount = docs.getNumFound();
log.debug("Number of hits = " + hitCount);
if ( hitCount < 1 ) {
doNoSearchResults(response);
return;
}
List<SearchResult> results = new ArrayList<SearchResult>();
for (SolrDocument doc : docs) {
try{
String uri = doc.get(VitroLuceneTermNames.URI).toString();
// VitroLuceneTermNames.NAME_RAW is a multivalued field, so doc.get() returns a list
@SuppressWarnings("unchecked")
String name = ((List<String>) doc.get(VitroLuceneTermNames.NAME_RAW)).get(0);
SearchResult result = new SearchResult(name, uri);
results.add(result);
} catch(Exception e){
log.error("problem getting usable Individuals from search " +
"hits" + e.getMessage());
}
}
Collections.sort(results);
// map.put("results", results);
// writeTemplate(TEMPLATE_DEFAULT, map, config, vreq, response);
JSONArray jsonArray = new JSONArray();
for (SearchResult result : results) {
jsonArray.put(result.toMap());
}
response.getWriter().write(jsonArray.toString());
} catch (Throwable e) {
log.error(e, e);
doSearchError(response);
}
}
private SolrQuery getQuery(String queryStr, VitroRequest vreq) {
if ( queryStr == null) {
log.error("There was no parameter '"+ PARAM_QUERY
+"' in the request.");
return null;
} else if( queryStr.length() > MAX_QUERY_LENGTH ) {
log.debug("The search was too long. The maximum " +
"query length is " + MAX_QUERY_LENGTH );
return null;
}
SolrQuery query = new SolrQuery();
query.setStart(0)
.setRows(DEFAULT_MAX_HIT_COUNT);
setQuery(query, queryStr, vreq);
// Filter by type
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if (typeParam != null) {
query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
}
query.setFields(VitroLuceneTermNames.NAME_RAW, VitroLuceneTermNames.URI); // fields to retrieve
// Can't sort on multivalued field, so sort results in Java when we get them
// query.setSortField(VitroLuceneTermNames.NAME_LOWERCASE, SolrQuery.ORDER.asc);
return query;
}
private void setQuery(SolrQuery query, String queryStr, HttpServletRequest request) {
if (StringUtils.isBlank(queryStr)) {
log.error("No query string");
}
String tokenizeParam = (String) request.getParameter("tokenize");
boolean tokenize = "true".equals(tokenizeParam);
if (tokenize) {
// Stemming is only relevant if we are tokenizing: an untokenized name
// query will not be stemmed.
String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam);
if (stem) {
setStemmedQuery(query, queryStr);
} else {
setTokenizedQuery(query, queryStr);
}
} else {
setUntokenizedQuery(query, queryStr);
}
}
private void setStemmedQuery(SolrQuery query, String queryStr) {
String termName = VitroLuceneTermNames.NAME_STEMMED;
}
private void setTokenizedQuery(SolrQuery query, String queryStr) {
// RY 5/18/2011 For now, just doing untokenized query, due to the interactions of wildcard
// query and stemming described below. Need to find a way to do this in Solr.
// Should take the same approach if we can figure out how to do a disjunction.
// Probably just add an explicit "OR" between the terms.
// We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries
queryStr = queryStr.toLowerCase();
// Solr wants whitespace to be escaped with a backslash
// Better: replace \s+
queryStr = queryStr.replaceAll(" ", "\\\\ ");
queryStr = VitroLuceneTermNames.NAME_UNSTEMMED + ":" + queryStr + "*";
query.setQuery(queryStr);
// // Use the query parser to analyze the search term the same way the indexed text was analyzed.
// // For example, text is lowercased, and function words are stripped out.
// QueryParser parser = getQueryParser(termName);
//
// // The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match
// // "tales", which is indexed as "tale", while query term name:tales does. Obviously we need
// // the wildcard for name:tal*, so the only way to get them all to match is use a disjunction
// // of wildcard and non-wildcard queries. The query will have only an implicit disjunction
// // operator: e.g., +(name:tales name:tales*)
// try {
// log.debug("Adding non-wildcard query for " + querystr);
// Query query = parser.parse(querystr);
// boolQuery.add(query, BooleanClause.Occur.SHOULD);
//
// // Prevent ParseException here when adding * after a space.
// // If there's a space at the end, we don't need the wildcard query.
// if (! querystr.endsWith(" ")) {
// log.debug("Adding wildcard query for " + querystr);
// Query wildcardQuery = parser.parse(querystr + "*");
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
// }
//
// log.debug("Name query is: " + boolQuery.toString());
// } catch (ParseException e) {
// log.warn(e, e);
// }
setUntokenizedQuery(query, queryStr);
}
private void setUntokenizedQuery(SolrQuery query, String queryStr) {
// We have to lowercase manually, because Solr doesn't do text analysis on wildcard queries
queryStr = queryStr.toLowerCase();
// Solr wants whitespace to be escaped with a backslash
// Better: replace \s+
queryStr = queryStr.replaceAll(" ", "\\\\ ");
queryStr = VitroLuceneTermNames.NAME_LOWERCASE + ":" + queryStr + "*";
query.setQuery(queryStr);
}
private void doNoQuery(HttpServletResponse response) throws IOException {
// For now, we are not sending an error message back to the client because
// with the default autocomplete configuration it chokes.
doNoSearchResults(response);
}
private void doSearchError(HttpServletResponse response) throws IOException {
// For now, we are not sending an error message back to the client because
// with the default autocomplete configuration it chokes.
doNoSearchResults(response);
}
private void doNoSearchResults(HttpServletResponse response) throws IOException {
response.getWriter().write("[]");
}
public class SearchResult implements Comparable<Object> {
private String label;
private String uri;
SearchResult(String label, String uri) {
this.label = label;
this.uri = uri;
}
public String getLabel() {
return label;
}
public String getJsonLabel() {
return JSONObject.quote(label);
}
public String getUri() {
return uri;
}
public String getJsonUri() {
return JSONObject.quote(uri);
}
Map<String, String> toMap() {
Map<String, String> map = new HashMap<String, String>();
map.put("label", label);
map.put("uri", uri);
return map;
}
public int compareTo(Object o) throws ClassCastException {
if ( !(o instanceof SearchResult) ) {
throw new ClassCastException("Error in SearchResult.compareTo(): expected SearchResult object.");
}
SearchResult sr = (SearchResult) o;
return label.compareToIgnoreCase(sr.getLabel());
}
}
}

View file

@ -336,7 +336,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
/** /**
* Get the class groups represented for the individuals in the topDocs. * Get the class groups represented for the individuals in the documents.
*/ */
private List<VClassGroup> getClassGroups(VClassGroupDao grpDao, SolrDocumentList docs) { private List<VClassGroup> getClassGroups(VClassGroupDao grpDao, SolrDocumentList docs) {
LinkedHashMap<String,VClassGroup> grpMap = grpDao.getClassGroupMap(); LinkedHashMap<String,VClassGroup> grpMap = grpDao.getClassGroupMap();