NIHVIVO-2459 Work on SolrAutocompleteController (not working yet). Refactoring VitroLuceneTermNames rdfs:label field names to clarify what the fields are. Restored 1.2 Lucene analysis to name fields.

This commit is contained in:
ryounes 2011-05-13 22:43:47 +00:00
parent 39622d8b2b
commit 702f904e75
11 changed files with 184 additions and 240 deletions

View file

@ -427,16 +427,16 @@
<field name="DocId" type="string" indexed="true" stored="true" required="true" />
<field name="type" type="string" indexed="true" stored="true" omitNorms="ture" multiValued="true"/>
<field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
<field name="name" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="nameunstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="nameunanalyzed" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="nameraw" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="nameRaw" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="nameLowercase" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="nameUnstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>

View file

@ -29,6 +29,7 @@ import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
import edu.cornell.mannlib.vitro.webapp.web.ContentType;
@ -74,13 +75,13 @@ public void doGet (HttpServletRequest req, HttpServletResponse res) throws IOExc
String classUri = (String) getServletContext().getAttribute("classuri");
BooleanQuery query = new BooleanQuery();
query.add(
new TermQuery( new Term(Entity2LuceneDoc.term.RDFTYPE, classUri)),
new TermQuery( new Term(VitroLuceneTermNames.RDFTYPE, classUri)),
BooleanClause.Occur.MUST );
IndexSearcher index = LuceneIndexFactory.getIndexSearcher(getServletContext());
TopDocs docs = index.search(query, null,
ENTITY_LIST_CONTROLLER_MAX_RESULTS,
new Sort(Entity2LuceneDoc.term.NAMELOWERCASE));
new Sort(VitroLuceneTermNames.NAME_LOWERCASE));
if( docs == null ){
log.error("Search of lucene index returned null");
@ -97,7 +98,7 @@ public void doGet (HttpServletRequest req, HttpServletResponse res) throws IOExc
if (hit != null) {
Document doc = index.doc(hit.doc);
if (doc != null) {
String uri = doc.getField(Entity2LuceneDoc.term.URI).stringValue();
String uri = doc.getField(VitroLuceneTermNames.URI).stringValue();
resource = ResourceFactory.createResource(uri);
node = (RDFNode) ResourceFactory.createResource(classUri);
model.add(resource, RDF.type, node);

View file

@ -377,7 +377,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam);
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED;
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
BooleanQuery boolQuery = new BooleanQuery();
@ -415,7 +415,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
private Query makeUntokenizedNameQuery(String querystr) {
querystr = querystr.toLowerCase();
String termName = VitroLuceneTermNames.NAMELOWERCASE;
String termName = VitroLuceneTermNames.NAME_LOWERCASE;
BooleanQuery query = new BooleanQuery();
log.debug("Adding wildcard query on unanalyzed name");
query.add(

View file

@ -188,7 +188,7 @@ public class IndividualListController extends FreemarkerHttpServlet {
try{
docs = index.search(query, null,
ENTITY_LIST_CONTROLLER_MAX_RESULTS,
new Sort(Entity2LuceneDoc.term.NAMELOWERCASE));
new Sort(Entity2LuceneDoc.term.NAME_LOWERCASE));
}catch(Throwable th){
log.error("Could not run search. " + th.getMessage());
docs = null;
@ -258,7 +258,7 @@ public class IndividualListController extends FreemarkerHttpServlet {
Query alphaQuery = null;
if( alpha != null && !"".equals(alpha) && alpha.length() == 1){
alphaQuery =
new PrefixQuery(new Term(Entity2LuceneDoc.term.NAMELOWERCASE, alpha.toLowerCase()));
new PrefixQuery(new Term(Entity2LuceneDoc.term.NAME_LOWERCASE, alpha.toLowerCase()));
query.add(alphaQuery,BooleanClause.Occur.MUST);
}

View file

@ -30,8 +30,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
import org.json.JSONArray;
import com.hp.hpl.jena.sparql.lib.org.json.JSONObject;
import org.json.JSONObject;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
@ -118,7 +117,7 @@ public class AutocompleteController extends VitroAjaxController {
try{
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc);
String uri = doc.get(VitroLuceneTermNames.URI);
String name = doc.get(VitroLuceneTermNames.NAMERAW);
String name = doc.get(VitroLuceneTermNames.NAME_RAW);
SearchResult result = new SearchResult(name, uri);
results.add(result);
} catch(Exception e){
@ -208,7 +207,7 @@ public class AutocompleteController extends VitroAjaxController {
String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam);
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED;
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
BooleanQuery boolQuery = new BooleanQuery();
@ -245,7 +244,7 @@ public class AutocompleteController extends VitroAjaxController {
private Query makeUntokenizedNameQuery(String querystr) {
querystr = querystr.toLowerCase();
String termName = VitroLuceneTermNames.NAMELOWERCASE;
String termName = VitroLuceneTermNames.NAME_LOWERCASE;
BooleanQuery query = new BooleanQuery();
log.debug("Adding wildcard query on unanalyzed name");
query.add(

View file

@ -65,6 +65,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
@ -228,7 +229,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
Document document = searcherForRequest.doc(scoreDoc.doc);
Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score);
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED) + " score: " +scoreDoc.score);
log.debug("Scoring of the doc explained " + explanation.toString());
log.debug("Explanation's description "+ explanation.getDescription());
log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
@ -404,7 +405,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
Document doc;
try {
doc = searcher.doc(topDocs.scoreDocs[i].doc);
String name =doc.get(Entity2LuceneDoc.term.NAME);
String name =doc.get(Entity2LuceneDoc.term.NAME_STEMMED);
if( name != null && name.length() > 0)
alphas.add( name.substring(0, 1));
} catch (CorruptIndexException e) {
@ -621,7 +622,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add( query, BooleanClause.Occur.MUST );
boolQuery.add(
new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME, alpha+'*')),
new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME_STEMMED, alpha+'*')),
BooleanClause.Occur.MUST);
query = boolQuery;
}
@ -682,7 +683,15 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
// qp.setStemmedToUnstemmed(map);
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
"name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
VitroLuceneTermNames.NAME_STEMMED,
VitroLuceneTermNames.NAME_UNSTEMMED,
VitroLuceneTermNames.RDFTYPE,
VitroLuceneTermNames.MONIKER,
VitroLuceneTermNames.ALLTEXT,
VitroLuceneTermNames.ALLTEXTUNSTEMMED,
VitroLuceneTermNames.NAME_RAW,
VitroLuceneTermNames.CLASSLOCALNAME,
VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
// QueryParser qp = new QueryParser(Version.LUCENE_29, "name", analyzer);

View file

@ -9,43 +9,34 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.json.JSONArray;
import com.hp.hpl.jena.sparql.lib.org.json.JSONObject;
import org.json.JSONObject;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController;
import edu.cornell.mannlib.vitro.webapp.search.SearchException;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
/**
* AutocompleteController generates autocomplete content
* through a Lucene search.
* through a Solr search.
*/
// RY Rename to AutocompleteController once the transition to Solr is complete.
public class SolrAutocompleteController extends VitroAjaxController {
private static final long serialVersionUID = 1L;
@ -53,10 +44,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
//private static final String TEMPLATE_DEFAULT = "autocompleteResults.ftl";
private static String QUERY_PARAMETER_NAME = "term";
private static final String PARAM_QUERY = "term";
private static final String PARAM_RDFTYPE = "type";
String NORESULT_MSG = "";
private int defaultMaxSearchSize= 1000;
private static final int DEFAULT_MAX_HIT_COUNT = 1000;
public static final int MAX_QUERY_LENGTH = 500;
@Override
protected Actions requiredActions(VitroRequest vreq) {
@ -69,12 +63,9 @@ public class SolrAutocompleteController extends VitroAjaxController {
try {
int maxHitSize = defaultMaxSearchSize;
String qtxt = vreq.getParameter(PARAM_QUERY);
String qtxt = vreq.getParameter(QUERY_PARAMETER_NAME);
Analyzer analyzer = getAnalyzer(getServletContext());
Query query = getQuery(vreq, analyzer, qtxt);
SolrQuery query = getQuery(qtxt, vreq);
if (query == null ) {
log.debug("query for '" + qtxt +"' is null.");
doNoQuery(response);
@ -82,43 +73,35 @@ public class SolrAutocompleteController extends VitroAjaxController {
}
log.debug("query for '" + qtxt +"' is " + query.toString());
IndexSearcher searcherForRequest = LuceneIndexFactory.getIndexSearcher(getServletContext());
SolrServer solr = SolrSetup.getSolrServer(getServletContext());
QueryResponse queryResponse = solr.query(query);
TopDocs topDocs = null;
try{
topDocs = searcherForRequest.search(query,null,maxHitSize);
}catch(Throwable t){
log.error("in first pass at search: " + t);
// this is a hack to deal with odd cases where search and index threads interact
try{
wait(150);
topDocs = searcherForRequest.search(query,null,maxHitSize);
}catch (Exception e){
log.error(e, e);
doNoSearchResults(response);
return;
}
}
if( topDocs == null || topDocs.scoreDocs == null){
log.error("topDocs for a search was null");
if ( queryResponse == null) {
log.error("Query response for a search was null");
doNoSearchResults(response);
return;
}
int hitsLength = topDocs.scoreDocs.length;
if ( hitsLength < 1 ){
SolrDocumentList docs = queryResponse.getResults();
if ( docs == null) {
log.error("Docs for a search was null");
doNoSearchResults(response);
return;
}
long hitCount = docs.getNumFound();
log.debug("Number of hits = " + hitCount);
if ( hitCount < 1 ) {
doNoSearchResults(response);
return;
}
log.debug("found "+hitsLength+" hits");
List<SearchResult> results = new ArrayList<SearchResult>();
for(int i=0; i<topDocs.scoreDocs.length ;i++){
for (SolrDocument doc : docs) {
try{
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc);
String uri = doc.get(VitroLuceneTermNames.URI);
String name = doc.get(VitroLuceneTermNames.NAMERAW);
String uri = doc.get(VitroLuceneTermNames.URI).toString();
String name = doc.get(VitroLuceneTermNames.NAME_RAW).toString();
SearchResult result = new SearchResult(name, uri);
results.add(result);
} catch(Exception e){
@ -138,143 +121,115 @@ public class SolrAutocompleteController extends VitroAjaxController {
}
response.getWriter().write(jsonArray.toString());
} catch (Throwable e) {
log.error(e, e);
doSearchError(response);
}
}
private Analyzer getAnalyzer(ServletContext servletContext) throws SearchException {
Object obj = servletContext.getAttribute(LuceneSetup.ANALYZER);
if( obj == null || !(obj instanceof Analyzer) )
throw new SearchException("Could not get analyzer");
else
return (Analyzer)obj;
}
private SolrQuery getQuery(String querystr, VitroRequest vreq) {
private Query getQuery(VitroRequest vreq, Analyzer analyzer,
String querystr) throws SearchException{
Query query = null;
try {
if( querystr == null){
log.error("There was no Parameter '"+ QUERY_PARAMETER_NAME
if ( querystr == null) {
log.error("There was no parameter '"+ PARAM_QUERY
+"' in the request.");
return null;
}else if( querystr.length() > MAX_QUERY_LENGTH ){
} else if( querystr.length() > MAX_QUERY_LENGTH ) {
log.debug("The search was too long. The maximum " +
"query length is " + MAX_QUERY_LENGTH );
return null;
}
query = makeNameQuery(querystr, analyzer, vreq);
SolrQuery query = new SolrQuery();
query = query.setStart(0);
query = query.setRows(DEFAULT_MAX_HIT_COUNT);
query = setNameQuery(query, querystr, vreq);
// Filter by type
{
BooleanQuery boolQuery = new BooleanQuery();
String typeParam = (String) vreq.getParameter("type");
boolQuery.add( new TermQuery(
new Term(VitroLuceneTermNames.RDFTYPE,
typeParam)),
BooleanClause.Occur.MUST);
boolQuery.add(query, BooleanClause.Occur.MUST);
query = boolQuery;
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if (typeParam != null) {
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
}
} catch (Exception ex){
throw new SearchException(ex.getMessage());
}
// Set the fields to retrieve **** RY
// query = query.setFields( ... );
return query;
}
private Query makeNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) {
private SolrQuery setNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
String tokenizeParam = (String) request.getParameter("tokenize");
boolean tokenize = "true".equals(tokenizeParam);
// Note: Stemming is only relevant if we are tokenizing: an untokenized name
// query will not be stemmed. So we don't look at the stem parameter until we get to
// makeTokenizedNameQuery().
// setTokenizedNameQuery().
if (tokenize) {
return makeTokenizedNameQuery(querystr, analyzer, request);
return setTokenizedNameQuery(query, querystr, request);
} else {
return makeUntokenizedNameQuery(querystr);
return setUntokenizedNameQuery(query, querystr);
}
}
private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) {
private SolrQuery setTokenizedNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam);
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED;
String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
BooleanQuery boolQuery = new BooleanQuery();
// Use the query parser to analyze the search term the same way the indexed text was analyzed.
// For example, text is lowercased, and function words are stripped out.
QueryParser parser = getQueryParser(termName, analyzer);
// The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match
// "tales", which is indexed as "tale", while query term name:tales does. Obviously we need
// the wildcard for name:tal*, so the only way to get them all to match is use a disjunction
// of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
// operator: e.g., +(name:tales name:tales*)
try {
log.debug("Adding non-wildcard query for " + querystr);
Query query = parser.parse(querystr);
boolQuery.add(query, BooleanClause.Occur.SHOULD);
// Prevent ParseException here when adding * after a space.
// If there's a space at the end, we don't need the wildcard query.
if (! querystr.endsWith(" ")) {
log.debug("Adding wildcard query for " + querystr);
Query wildcardQuery = parser.parse(querystr + "*");
boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
}
log.debug("Name query is: " + boolQuery.toString());
} catch (ParseException e) {
log.warn(e, e);
}
return boolQuery;
}
private Query makeUntokenizedNameQuery(String querystr) {
querystr = querystr.toLowerCase();
String termName = VitroLuceneTermNames.NAMELOWERCASE;
BooleanQuery query = new BooleanQuery();
log.debug("Adding wildcard query on unanalyzed name");
query.add(
new WildcardQuery(new Term(termName, querystr + "*")),
BooleanClause.Occur.MUST);
// // Use the query parser to analyze the search term the same way the indexed text was analyzed.
// // For example, text is lowercased, and function words are stripped out.
// QueryParser parser = getQueryParser(termName);
//
// // The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match
// // "tales", which is indexed as "tale", while query term name:tales does. Obviously we need
// // the wildcard for name:tal*, so the only way to get them all to match is use a disjunction
// // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
// // operator: e.g., +(name:tales name:tales*)
// try {
// log.debug("Adding non-wildcard query for " + querystr);
// Query query = parser.parse(querystr);
// boolQuery.add(query, BooleanClause.Occur.SHOULD);
//
// // Prevent ParseException here when adding * after a space.
// // If there's a space at the end, we don't need the wildcard query.
// if (! querystr.endsWith(" ")) {
// log.debug("Adding wildcard query for " + querystr);
// Query wildcardQuery = parser.parse(querystr + "*");
// boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
// }
//
// log.debug("Name query is: " + boolQuery.toString());
// } catch (ParseException e) {
// log.warn(e, e);
// }
return query;
}
private QueryParser getQueryParser(String searchField, Analyzer analyzer){
// searchField indicates which field to search against when there is no term
// indicated in the query string.
// The analyzer is needed so that we use the same analyzer on the search queries as
// was used on the text that was indexed.
QueryParser qp = new QueryParser(Version.LUCENE_29, searchField,analyzer);
//this sets the query parser to AND all of the query terms it finds.
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
return qp;
private SolrQuery setUntokenizedNameQuery(SolrQuery query, String querystr) {
//querystr = querystr.toLowerCase();
querystr += "*";
query = query.setQuery(querystr);
// *** It's the df parameter that sets the field to search
//String field = VitroLuceneTermNames.LABEL_LOWERCASE;
return query;
}
private void doNoQuery(HttpServletResponse response) throws IOException {
// For now, we are not sending an error message back to the client because with the default autocomplete configuration it
// chokes.
// For now, we are not sending an error message back to the client because
// with the default autocomplete configuration it chokes.
doNoSearchResults(response);
}
private void doSearchError(HttpServletResponse response) throws IOException {
// For now, we are not sending an error message back to the client because with the default autocomplete configuration it
// chokes.
// For now, we are not sending an error message back to the client because
// with the default autocomplete configuration it chokes.
doNoSearchResults(response);
}
@ -282,8 +237,6 @@ public class SolrAutocompleteController extends VitroAjaxController {
response.getWriter().write("[]");
}
public static final int MAX_QUERY_LENGTH = 500;
public class SearchResult implements Comparable<Object> {
private String label;
private String uri;

View file

@ -22,10 +22,6 @@ import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
@ -73,9 +69,8 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
private static final long serialVersionUID = 1L;
private static final Log log = LogFactory.getLog(SolrPagedSearchController.class);
private static final int DEFAULT_HITS_PER_PAGE = 25;
private static final int DEFAULT_MAX_SEARCH_SIZE = 1000;
private static final int DEFAULT_MAX_HIT_COUNT = 1000;
private static final String PARAM_XML_REQUEST = "xml";
private static final String PARAM_START_INDEX = "startIndex";
@ -169,15 +164,15 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
}
log.debug("hitsPerPage is " + hitsPerPage);
int maxHitCount = DEFAULT_MAX_SEARCH_SIZE ;
if( startIndex >= DEFAULT_MAX_SEARCH_SIZE - hitsPerPage )
maxHitCount = startIndex + DEFAULT_MAX_SEARCH_SIZE ;
int maxHitCount = DEFAULT_MAX_HIT_COUNT ;
if( startIndex >= DEFAULT_MAX_HIT_COUNT - hitsPerPage )
maxHitCount = startIndex + DEFAULT_MAX_HIT_COUNT ;
log.debug("maxHitSize is " + maxHitCount);
String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
log.debug("Query text is "+ qtxt); // + " Analyzer is "+ analyzer.toString());
log.debug("Query text is \""+ qtxt + "\"");
SolrQuery query = getQuery(qtxt, maxHitCount, vreq);
@ -440,29 +435,27 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
private SolrQuery getQuery(String queryText, int maxHitCount, VitroRequest vreq) {
SolrQuery query = new SolrQuery(queryText);
//SolrQuery query = new SolrQuery();
//query.setQuery(queryText);
// Solr requires these values, but we don't want them to be the real values for this page
// of results, else the refinement links won't work correctly: each page of results needs to
// show refinement links generated for all results, not just for the results on the current page.
query.setStart(0);
query.setRows(maxHitCount);
query.setStart(0)
.setRows(maxHitCount);
// Classgroup filtering
Object param = vreq.getParameter(PARAM_CLASSGROUP);
if( param != null && !"".equals(param)){
String classgroupParam = (String) vreq.getParameter(PARAM_CLASSGROUP);
if ( ! StringUtils.isBlank(classgroupParam) ) {
log.debug("Firing classgroup query ");
log.debug("request.getParameter(classgroup) is "+ param.toString());
query = query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + param + "\"");
log.debug("request.getParameter(classgroup) is "+ classgroupParam);
query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + classgroupParam + "\"");
}
// rdf:type filtering
param = vreq.getParameter(PARAM_RDFTYPE);
if( param != null && !"".equals(param)){
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if ( ! StringUtils.isBlank(typeParam) ) {
log.debug("Firing type query ");
log.debug("request.getParameter(type) is "+ param.toString());
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + param + "\"");
log.debug("request.getParameter(type) is "+ typeParam);
query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
}
//query.setQuery(queryText);
@ -470,23 +463,6 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
return query;
}
@SuppressWarnings("unused")
private QueryParser getQueryParser(Analyzer analyzer){
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[] {
VitroLuceneTermNames.NAME,
VitroLuceneTermNames.NAMEUNSTEMMED,
VitroLuceneTermNames.RDFTYPE,
VitroLuceneTermNames.ALLTEXT,
VitroLuceneTermNames.ALLTEXTUNSTEMMED,
VitroLuceneTermNames.NAMERAW,
VitroLuceneTermNames.CLASSLOCALNAME,
VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
//"name", "nameunstemmed", "type", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
return qp;
}
private class VClassGroupSearchLink extends LinkTemplateModel {
VClassGroupSearchLink(String querytext, VClassGroup classgroup) {

View file

@ -45,14 +45,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
public static String CLASSGROUP_URI = "classgroup";
/** Modtime from db */
public static String MODTIME = "modTime";
/** Name of entity, tab or vclass */
public static String NAME = "name";
/** rdfs:label unanalyzed */
public static String NAMELOWERCASE = "nameunanalyzed" ;
/** Name of entity, unstemmed */
public static String NAMEUNSTEMMED = "nameunstemmed";
/** Unaltered name of individual, un-lowercased, un-stemmed, un-tokenized" */
public static String NAMERAW = "nameraw";
/** time of index in msec since epoc */
public static String INDEXEDTIME= "indexedTime";
/** timekey of entity in yyyymmddhhmm */
@ -78,6 +71,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
/** class names in human readable form of an individual*/
public static final String CLASSLOCALNAME = "classLocalName";
// Fields derived from rdfs:label
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
public static String NAME_RAW = "nameRaw"; // was NAMERAW
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
public static String NAME_STEMMED = "nameStemmed"; // was NAME
}
private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
@ -189,7 +196,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
//java class
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
//Entity Name
// Individual label
if( ent.getRdfsLabel() != null )
value=ent.getRdfsLabel();
else{
@ -198,21 +205,22 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
value = ent.getLocalName();
}
Field name = new Field(term.NAME, value, Field.Store.YES, Field.Index.ANALYZED);
doc.add( name );
Field nameUn = new Field(term.NAMEUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
nameUn.setBoost(NAME_BOOST);
doc.add( nameUn );
Field labelRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
labelRaw.setBoost(NAME_BOOST);
doc.add(labelRaw);
// BK nameunanalyzed is used by IndividualListController
Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED);
nameUnanalyzed.setBoost(NAME_BOOST);
doc.add( nameUnanalyzed );
Field labelLowerCase = new Field(term.NAME_LOWERCASE, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
labelLowerCase.setBoost(NAME_BOOST);
doc.add(labelLowerCase);
Field nameRaw = new Field(term.NAMERAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
nameRaw.setBoost(NAME_BOOST);
doc.add(nameRaw);
Field labelUnstemmed = new Field(term.NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
labelUnstemmed.setBoost(NAME_BOOST);
doc.add(labelUnstemmed);
Field labelStemmed = new Field(term.NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
labelStemmed.setBoost(NAME_BOOST);
doc.add(labelStemmed);
//Moniker

View file

@ -81,8 +81,7 @@ public class HtmlLowerStopAnalyzer extends Analyzer {
/**
* Processes the input by first converting it to
* lower case, then by eliminating stop words, and
* finally by performing Porter stemming on it.
* lower case, then by eliminating stop words.
*
* @param reader the Reader that
* provides access to the input text

View file

@ -7,8 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
import java.io.File;
@ -245,10 +245,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
// analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAME, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(NAME_UNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());