NIHVIVO-2459 Work on SolrAutocompleteController (not working yet). Refactoring VitroLuceneTermNames rdfs:label field names to clarify what the fields are. Restored 1.2 Lucene analysis to name fields.

This commit is contained in:
ryounes 2011-05-13 22:43:47 +00:00
parent 39622d8b2b
commit 702f904e75
11 changed files with 184 additions and 240 deletions

View file

@ -427,16 +427,16 @@
<field name="DocId" type="string" indexed="true" stored="true" required="true" /> <field name="DocId" type="string" indexed="true" stored="true" required="true" />
<field name="type" type="string" indexed="true" stored="true" omitNorms="ture" multiValued="true"/> <field name="type" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
<field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/> <field name="classLocalName" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/> <field name="classLocalNameLowerCase" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/> <field name="classgroup" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/> <field name="PROHIBITED_FROM_TEXT_RESULTS" type="string" indexed="true" stored="false" multiValued="false"/>
<field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/> <field name="URI" type="string" indexed="true" stored="true" multiValued="false" omitNorms="true"/>
<field name="name" type="text" indexed="true" stored="true" multiValued="true"/> <field name="nameRaw" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="nameunstemmed" type="text" indexed="true" stored="false" multiValued="true"/> <field name="nameLowercase" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="nameunanalyzed" type="string" indexed="true" stored="false" multiValued="true"/> <field name="nameUnstemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="nameraw" type="string" indexed="true" stored="true" multiValued="true"/> <field name="nameStemmed" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/> <field name="indexedTime" type="string" indexed="true" stored="true" multiValued="true"/>
<field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXT" type="text" indexed="true" stored="false" multiValued="true"/>
<field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/> <field name="ALLTEXTUNSTEMMED" type="textgen" indexed="true" stored="false" multiValued="true"/>

View file

@ -29,6 +29,7 @@ import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
import edu.cornell.mannlib.vitro.webapp.web.ContentType; import edu.cornell.mannlib.vitro.webapp.web.ContentType;
@ -74,13 +75,13 @@ public void doGet (HttpServletRequest req, HttpServletResponse res) throws IOExc
String classUri = (String) getServletContext().getAttribute("classuri"); String classUri = (String) getServletContext().getAttribute("classuri");
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery();
query.add( query.add(
new TermQuery( new Term(Entity2LuceneDoc.term.RDFTYPE, classUri)), new TermQuery( new Term(VitroLuceneTermNames.RDFTYPE, classUri)),
BooleanClause.Occur.MUST ); BooleanClause.Occur.MUST );
IndexSearcher index = LuceneIndexFactory.getIndexSearcher(getServletContext()); IndexSearcher index = LuceneIndexFactory.getIndexSearcher(getServletContext());
TopDocs docs = index.search(query, null, TopDocs docs = index.search(query, null,
ENTITY_LIST_CONTROLLER_MAX_RESULTS, ENTITY_LIST_CONTROLLER_MAX_RESULTS,
new Sort(Entity2LuceneDoc.term.NAMELOWERCASE)); new Sort(VitroLuceneTermNames.NAME_LOWERCASE));
if( docs == null ){ if( docs == null ){
log.error("Search of lucene index returned null"); log.error("Search of lucene index returned null");
@ -97,7 +98,7 @@ public void doGet (HttpServletRequest req, HttpServletResponse res) throws IOExc
if (hit != null) { if (hit != null) {
Document doc = index.doc(hit.doc); Document doc = index.doc(hit.doc);
if (doc != null) { if (doc != null) {
String uri = doc.getField(Entity2LuceneDoc.term.URI).stringValue(); String uri = doc.getField(VitroLuceneTermNames.URI).stringValue();
resource = ResourceFactory.createResource(uri); resource = ResourceFactory.createResource(uri);
node = (RDFNode) ResourceFactory.createResource(classUri); node = (RDFNode) ResourceFactory.createResource(classUri);
model.add(resource, RDF.type, node); model.add(resource, RDF.type, node);

View file

@ -377,7 +377,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
String stemParam = (String) request.getParameter("stem"); String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam); boolean stem = "true".equals(stemParam);
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED; String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
@ -415,7 +415,7 @@ public class JSONReconcileServlet extends VitroHttpServlet {
private Query makeUntokenizedNameQuery(String querystr) { private Query makeUntokenizedNameQuery(String querystr) {
querystr = querystr.toLowerCase(); querystr = querystr.toLowerCase();
String termName = VitroLuceneTermNames.NAMELOWERCASE; String termName = VitroLuceneTermNames.NAME_LOWERCASE;
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery();
log.debug("Adding wildcard query on unanalyzed name"); log.debug("Adding wildcard query on unanalyzed name");
query.add( query.add(

View file

@ -188,7 +188,7 @@ public class IndividualListController extends FreemarkerHttpServlet {
try{ try{
docs = index.search(query, null, docs = index.search(query, null,
ENTITY_LIST_CONTROLLER_MAX_RESULTS, ENTITY_LIST_CONTROLLER_MAX_RESULTS,
new Sort(Entity2LuceneDoc.term.NAMELOWERCASE)); new Sort(Entity2LuceneDoc.term.NAME_LOWERCASE));
}catch(Throwable th){ }catch(Throwable th){
log.error("Could not run search. " + th.getMessage()); log.error("Could not run search. " + th.getMessage());
docs = null; docs = null;
@ -258,7 +258,7 @@ public class IndividualListController extends FreemarkerHttpServlet {
Query alphaQuery = null; Query alphaQuery = null;
if( alpha != null && !"".equals(alpha) && alpha.length() == 1){ if( alpha != null && !"".equals(alpha) && alpha.length() == 1){
alphaQuery = alphaQuery =
new PrefixQuery(new Term(Entity2LuceneDoc.term.NAMELOWERCASE, alpha.toLowerCase())); new PrefixQuery(new Term(Entity2LuceneDoc.term.NAME_LOWERCASE, alpha.toLowerCase()));
query.add(alphaQuery,BooleanClause.Occur.MUST); query.add(alphaQuery,BooleanClause.Occur.MUST);
} }

View file

@ -30,8 +30,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version; import org.apache.lucene.util.Version;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject;
import com.hp.hpl.jena.sparql.lib.org.json.JSONObject;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
@ -118,7 +117,7 @@ public class AutocompleteController extends VitroAjaxController {
try{ try{
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc); Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc);
String uri = doc.get(VitroLuceneTermNames.URI); String uri = doc.get(VitroLuceneTermNames.URI);
String name = doc.get(VitroLuceneTermNames.NAMERAW); String name = doc.get(VitroLuceneTermNames.NAME_RAW);
SearchResult result = new SearchResult(name, uri); SearchResult result = new SearchResult(name, uri);
results.add(result); results.add(result);
} catch(Exception e){ } catch(Exception e){
@ -208,7 +207,7 @@ public class AutocompleteController extends VitroAjaxController {
String stemParam = (String) request.getParameter("stem"); String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam); boolean stem = "true".equals(stemParam);
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED; String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
@ -245,7 +244,7 @@ public class AutocompleteController extends VitroAjaxController {
private Query makeUntokenizedNameQuery(String querystr) { private Query makeUntokenizedNameQuery(String querystr) {
querystr = querystr.toLowerCase(); querystr = querystr.toLowerCase();
String termName = VitroLuceneTermNames.NAMELOWERCASE; String termName = VitroLuceneTermNames.NAME_LOWERCASE;
BooleanQuery query = new BooleanQuery(); BooleanQuery query = new BooleanQuery();
log.debug("Adding wildcard query on unanalyzed name"); log.debug("Adding wildcard query on unanalyzed name");
query.add( query.add(

View file

@ -65,6 +65,7 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity; import edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup; import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel; import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
@ -228,7 +229,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
Document document = searcherForRequest.doc(scoreDoc.doc); Document document = searcherForRequest.doc(scoreDoc.doc);
Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc); Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score); log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED) + " score: " +scoreDoc.score);
log.debug("Scoring of the doc explained " + explanation.toString()); log.debug("Scoring of the doc explained " + explanation.toString());
log.debug("Explanation's description "+ explanation.getDescription()); log.debug("Explanation's description "+ explanation.getDescription());
log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT)); log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
@ -404,7 +405,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
Document doc; Document doc;
try { try {
doc = searcher.doc(topDocs.scoreDocs[i].doc); doc = searcher.doc(topDocs.scoreDocs[i].doc);
String name =doc.get(Entity2LuceneDoc.term.NAME); String name =doc.get(Entity2LuceneDoc.term.NAME_STEMMED);
if( name != null && name.length() > 0) if( name != null && name.length() > 0)
alphas.add( name.substring(0, 1)); alphas.add( name.substring(0, 1));
} catch (CorruptIndexException e) { } catch (CorruptIndexException e) {
@ -621,7 +622,7 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
boolQuery.add( query, BooleanClause.Occur.MUST ); boolQuery.add( query, BooleanClause.Occur.MUST );
boolQuery.add( boolQuery.add(
new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME, alpha+'*')), new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME_STEMMED, alpha+'*')),
BooleanClause.Occur.MUST); BooleanClause.Occur.MUST);
query = boolQuery; query = boolQuery;
} }
@ -682,7 +683,15 @@ public class PagedSearchController extends FreemarkerHttpServlet implements Sear
// qp.setStemmedToUnstemmed(map); // qp.setStemmedToUnstemmed(map);
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{ MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
"name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer); VitroLuceneTermNames.NAME_STEMMED,
VitroLuceneTermNames.NAME_UNSTEMMED,
VitroLuceneTermNames.RDFTYPE,
VitroLuceneTermNames.MONIKER,
VitroLuceneTermNames.ALLTEXT,
VitroLuceneTermNames.ALLTEXTUNSTEMMED,
VitroLuceneTermNames.NAME_RAW,
VitroLuceneTermNames.CLASSLOCALNAME,
VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
// QueryParser qp = new QueryParser(Version.LUCENE_29, "name", analyzer); // QueryParser qp = new QueryParser(Version.LUCENE_29, "name", analyzer);

View file

@ -9,43 +9,34 @@ import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import javax.servlet.ServletContext;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse; import javax.servlet.http.HttpServletResponse;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.lucene.search.Query; import org.apache.solr.client.solrj.SolrServer;
import org.apache.lucene.search.TermQuery; import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.lucene.search.TopDocs; import org.apache.solr.common.SolrDocument;
import org.apache.lucene.search.WildcardQuery; import org.apache.solr.common.SolrDocumentList;
import org.apache.lucene.util.Version;
import org.json.JSONArray; import org.json.JSONArray;
import org.json.JSONObject;
import com.hp.hpl.jena.sparql.lib.org.json.JSONObject;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController; import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController;
import edu.cornell.mannlib.vitro.webapp.search.SearchException;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
/** /**
* AutocompleteController generates autocomplete content * AutocompleteController generates autocomplete content
* through a Lucene search. * through a Solr search.
*/ */
// RY Rename to AutocompleteController once the transition to Solr is complete.
public class SolrAutocompleteController extends VitroAjaxController { public class SolrAutocompleteController extends VitroAjaxController {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
@ -53,10 +44,13 @@ public class SolrAutocompleteController extends VitroAjaxController {
//private static final String TEMPLATE_DEFAULT = "autocompleteResults.ftl"; //private static final String TEMPLATE_DEFAULT = "autocompleteResults.ftl";
private static String QUERY_PARAMETER_NAME = "term"; private static final String PARAM_QUERY = "term";
private static final String PARAM_RDFTYPE = "type";
String NORESULT_MSG = ""; String NORESULT_MSG = "";
private int defaultMaxSearchSize= 1000; private static final int DEFAULT_MAX_HIT_COUNT = 1000;
public static final int MAX_QUERY_LENGTH = 500;
@Override @Override
protected Actions requiredActions(VitroRequest vreq) { protected Actions requiredActions(VitroRequest vreq) {
@ -69,12 +63,9 @@ public class SolrAutocompleteController extends VitroAjaxController {
try { try {
int maxHitSize = defaultMaxSearchSize; String qtxt = vreq.getParameter(PARAM_QUERY);
String qtxt = vreq.getParameter(QUERY_PARAMETER_NAME); SolrQuery query = getQuery(qtxt, vreq);
Analyzer analyzer = getAnalyzer(getServletContext());
Query query = getQuery(vreq, analyzer, qtxt);
if (query == null ) { if (query == null ) {
log.debug("query for '" + qtxt +"' is null."); log.debug("query for '" + qtxt +"' is null.");
doNoQuery(response); doNoQuery(response);
@ -82,43 +73,35 @@ public class SolrAutocompleteController extends VitroAjaxController {
} }
log.debug("query for '" + qtxt +"' is " + query.toString()); log.debug("query for '" + qtxt +"' is " + query.toString());
IndexSearcher searcherForRequest = LuceneIndexFactory.getIndexSearcher(getServletContext()); SolrServer solr = SolrSetup.getSolrServer(getServletContext());
QueryResponse queryResponse = solr.query(query);
TopDocs topDocs = null; if ( queryResponse == null) {
try{ log.error("Query response for a search was null");
topDocs = searcherForRequest.search(query,null,maxHitSize);
}catch(Throwable t){
log.error("in first pass at search: " + t);
// this is a hack to deal with odd cases where search and index threads interact
try{
wait(150);
topDocs = searcherForRequest.search(query,null,maxHitSize);
}catch (Exception e){
log.error(e, e);
doNoSearchResults(response);
return;
}
}
if( topDocs == null || topDocs.scoreDocs == null){
log.error("topDocs for a search was null");
doNoSearchResults(response); doNoSearchResults(response);
return; return;
} }
int hitsLength = topDocs.scoreDocs.length; SolrDocumentList docs = queryResponse.getResults();
if ( hitsLength < 1 ){
if ( docs == null) {
log.error("Docs for a search was null");
doNoSearchResults(response);
return;
}
long hitCount = docs.getNumFound();
log.debug("Number of hits = " + hitCount);
if ( hitCount < 1 ) {
doNoSearchResults(response); doNoSearchResults(response);
return; return;
} }
log.debug("found "+hitsLength+" hits");
List<SearchResult> results = new ArrayList<SearchResult>(); List<SearchResult> results = new ArrayList<SearchResult>();
for(int i=0; i<topDocs.scoreDocs.length ;i++){ for (SolrDocument doc : docs) {
try{ try{
Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc); String uri = doc.get(VitroLuceneTermNames.URI).toString();
String uri = doc.get(VitroLuceneTermNames.URI); String name = doc.get(VitroLuceneTermNames.NAME_RAW).toString();
String name = doc.get(VitroLuceneTermNames.NAMERAW);
SearchResult result = new SearchResult(name, uri); SearchResult result = new SearchResult(name, uri);
results.add(result); results.add(result);
} catch(Exception e){ } catch(Exception e){
@ -138,143 +121,115 @@ public class SolrAutocompleteController extends VitroAjaxController {
} }
response.getWriter().write(jsonArray.toString()); response.getWriter().write(jsonArray.toString());
} catch (Throwable e) { } catch (Throwable e) {
log.error(e, e); log.error(e, e);
doSearchError(response); doSearchError(response);
} }
} }
private Analyzer getAnalyzer(ServletContext servletContext) throws SearchException { private SolrQuery getQuery(String querystr, VitroRequest vreq) {
Object obj = servletContext.getAttribute(LuceneSetup.ANALYZER);
if( obj == null || !(obj instanceof Analyzer) )
throw new SearchException("Could not get analyzer");
else
return (Analyzer)obj;
}
private Query getQuery(VitroRequest vreq, Analyzer analyzer, if ( querystr == null) {
String querystr) throws SearchException{ log.error("There was no parameter '"+ PARAM_QUERY
+"' in the request.");
Query query = null; return null;
try { } else if( querystr.length() > MAX_QUERY_LENGTH ) {
if( querystr == null){ log.debug("The search was too long. The maximum " +
log.error("There was no Parameter '"+ QUERY_PARAMETER_NAME "query length is " + MAX_QUERY_LENGTH );
+"' in the request."); return null;
return null;
}else if( querystr.length() > MAX_QUERY_LENGTH ){
log.debug("The search was too long. The maximum " +
"query length is " + MAX_QUERY_LENGTH );
return null;
}
query = makeNameQuery(querystr, analyzer, vreq);
// Filter by type
{
BooleanQuery boolQuery = new BooleanQuery();
String typeParam = (String) vreq.getParameter("type");
boolQuery.add( new TermQuery(
new Term(VitroLuceneTermNames.RDFTYPE,
typeParam)),
BooleanClause.Occur.MUST);
boolQuery.add(query, BooleanClause.Occur.MUST);
query = boolQuery;
}
} catch (Exception ex){
throw new SearchException(ex.getMessage());
} }
SolrQuery query = new SolrQuery();
query = query.setStart(0);
query = query.setRows(DEFAULT_MAX_HIT_COUNT);
query = setNameQuery(query, querystr, vreq);
// Filter by type
String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if (typeParam != null) {
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
}
// Set the fields to retrieve **** RY
// query = query.setFields( ... );
return query; return query;
} }
private Query makeNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { private SolrQuery setNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
String tokenizeParam = (String) request.getParameter("tokenize"); String tokenizeParam = (String) request.getParameter("tokenize");
boolean tokenize = "true".equals(tokenizeParam); boolean tokenize = "true".equals(tokenizeParam);
// Note: Stemming is only relevant if we are tokenizing: an untokenized name // Note: Stemming is only relevant if we are tokenizing: an untokenized name
// query will not be stemmed. So we don't look at the stem parameter until we get to // query will not be stemmed. So we don't look at the stem parameter until we get to
// makeTokenizedNameQuery(). // setTokenizedNameQuery().
if (tokenize) { if (tokenize) {
return makeTokenizedNameQuery(querystr, analyzer, request); return setTokenizedNameQuery(query, querystr, request);
} else { } else {
return makeUntokenizedNameQuery(querystr); return setUntokenizedNameQuery(query, querystr);
} }
} }
private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { private SolrQuery setTokenizedNameQuery(SolrQuery query, String querystr, HttpServletRequest request) {
String stemParam = (String) request.getParameter("stem"); String stemParam = (String) request.getParameter("stem");
boolean stem = "true".equals(stemParam); boolean stem = "true".equals(stemParam);
String termName = stem ? VitroLuceneTermNames.NAME : VitroLuceneTermNames.NAMEUNSTEMMED; String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED;
BooleanQuery boolQuery = new BooleanQuery(); BooleanQuery boolQuery = new BooleanQuery();
// Use the query parser to analyze the search term the same way the indexed text was analyzed. // // Use the query parser to analyze the search term the same way the indexed text was analyzed.
// For example, text is lowercased, and function words are stripped out. // // For example, text is lowercased, and function words are stripped out.
QueryParser parser = getQueryParser(termName, analyzer); // QueryParser parser = getQueryParser(termName);
//
// The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match // // The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match
// "tales", which is indexed as "tale", while query term name:tales does. Obviously we need // // "tales", which is indexed as "tale", while query term name:tales does. Obviously we need
// the wildcard for name:tal*, so the only way to get them all to match is use a disjunction // // the wildcard for name:tal*, so the only way to get them all to match is use a disjunction
// of wildcard and non-wildcard queries. The query will look have only an implicit disjunction // // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction
// operator: e.g., +(name:tales name:tales*) // // operator: e.g., +(name:tales name:tales*)
try { // try {
log.debug("Adding non-wildcard query for " + querystr); // log.debug("Adding non-wildcard query for " + querystr);
Query query = parser.parse(querystr); // Query query = parser.parse(querystr);
boolQuery.add(query, BooleanClause.Occur.SHOULD); // boolQuery.add(query, BooleanClause.Occur.SHOULD);
//
// Prevent ParseException here when adding * after a space. // // Prevent ParseException here when adding * after a space.
// If there's a space at the end, we don't need the wildcard query. // // If there's a space at the end, we don't need the wildcard query.
if (! querystr.endsWith(" ")) { // if (! querystr.endsWith(" ")) {
log.debug("Adding wildcard query for " + querystr); // log.debug("Adding wildcard query for " + querystr);
Query wildcardQuery = parser.parse(querystr + "*"); // Query wildcardQuery = parser.parse(querystr + "*");
boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD); // boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD);
} // }
//
log.debug("Name query is: " + boolQuery.toString()); // log.debug("Name query is: " + boolQuery.toString());
} catch (ParseException e) { // } catch (ParseException e) {
log.warn(e, e); // log.warn(e, e);
} // }
return boolQuery;
}
private Query makeUntokenizedNameQuery(String querystr) {
querystr = querystr.toLowerCase();
String termName = VitroLuceneTermNames.NAMELOWERCASE;
BooleanQuery query = new BooleanQuery();
log.debug("Adding wildcard query on unanalyzed name");
query.add(
new WildcardQuery(new Term(termName, querystr + "*")),
BooleanClause.Occur.MUST);
return query; return query;
} }
private QueryParser getQueryParser(String searchField, Analyzer analyzer){ private SolrQuery setUntokenizedNameQuery(SolrQuery query, String querystr) {
// searchField indicates which field to search against when there is no term
// indicated in the query string. //querystr = querystr.toLowerCase();
// The analyzer is needed so that we use the same analyzer on the search queries as querystr += "*";
// was used on the text that was indexed. query = query.setQuery(querystr);
QueryParser qp = new QueryParser(Version.LUCENE_29, searchField,analyzer); // *** It's the df parameter that sets the field to search
//this sets the query parser to AND all of the query terms it finds. //String field = VitroLuceneTermNames.LABEL_LOWERCASE;
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
return qp; return query;
} }
private void doNoQuery(HttpServletResponse response) throws IOException { private void doNoQuery(HttpServletResponse response) throws IOException {
// For now, we are not sending an error message back to the client because with the default autocomplete configuration it // For now, we are not sending an error message back to the client because
// chokes. // with the default autocomplete configuration it chokes.
doNoSearchResults(response); doNoSearchResults(response);
} }
private void doSearchError(HttpServletResponse response) throws IOException { private void doSearchError(HttpServletResponse response) throws IOException {
// For now, we are not sending an error message back to the client because with the default autocomplete configuration it // For now, we are not sending an error message back to the client because
// chokes. // with the default autocomplete configuration it chokes.
doNoSearchResults(response); doNoSearchResults(response);
} }
@ -282,8 +237,6 @@ public class SolrAutocompleteController extends VitroAjaxController {
response.getWriter().write("[]"); response.getWriter().write("[]");
} }
public static final int MAX_QUERY_LENGTH = 500;
public class SearchResult implements Comparable<Object> { public class SearchResult implements Comparable<Object> {
private String label; private String label;
private String uri; private String uri;

View file

@ -22,10 +22,6 @@ import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer; import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
@ -73,9 +69,8 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
private static final long serialVersionUID = 1L; private static final long serialVersionUID = 1L;
private static final Log log = LogFactory.getLog(SolrPagedSearchController.class); private static final Log log = LogFactory.getLog(SolrPagedSearchController.class);
private static final int DEFAULT_HITS_PER_PAGE = 25; private static final int DEFAULT_HITS_PER_PAGE = 25;
private static final int DEFAULT_MAX_SEARCH_SIZE = 1000; private static final int DEFAULT_MAX_HIT_COUNT = 1000;
private static final String PARAM_XML_REQUEST = "xml"; private static final String PARAM_XML_REQUEST = "xml";
private static final String PARAM_START_INDEX = "startIndex"; private static final String PARAM_START_INDEX = "startIndex";
@ -169,15 +164,15 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
} }
log.debug("hitsPerPage is " + hitsPerPage); log.debug("hitsPerPage is " + hitsPerPage);
int maxHitCount = DEFAULT_MAX_SEARCH_SIZE ; int maxHitCount = DEFAULT_MAX_HIT_COUNT ;
if( startIndex >= DEFAULT_MAX_SEARCH_SIZE - hitsPerPage ) if( startIndex >= DEFAULT_MAX_HIT_COUNT - hitsPerPage )
maxHitCount = startIndex + DEFAULT_MAX_SEARCH_SIZE ; maxHitCount = startIndex + DEFAULT_MAX_HIT_COUNT ;
log.debug("maxHitSize is " + maxHitCount); log.debug("maxHitSize is " + maxHitCount);
String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME); String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
log.debug("Query text is "+ qtxt); // + " Analyzer is "+ analyzer.toString()); log.debug("Query text is \""+ qtxt + "\"");
SolrQuery query = getQuery(qtxt, maxHitCount, vreq); SolrQuery query = getQuery(qtxt, maxHitCount, vreq);
@ -440,29 +435,27 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
private SolrQuery getQuery(String queryText, int maxHitCount, VitroRequest vreq) { private SolrQuery getQuery(String queryText, int maxHitCount, VitroRequest vreq) {
SolrQuery query = new SolrQuery(queryText); SolrQuery query = new SolrQuery(queryText);
//SolrQuery query = new SolrQuery();
//query.setQuery(queryText);
// Solr requires these values, but we don't want them to be the real values for this page // Solr requires these values, but we don't want them to be the real values for this page
// of results, else the refinement links won't work correctly: each page of results needs to // of results, else the refinement links won't work correctly: each page of results needs to
// show refinement links generated for all results, not just for the results on the current page. // show refinement links generated for all results, not just for the results on the current page.
query.setStart(0); query.setStart(0)
query.setRows(maxHitCount); .setRows(maxHitCount);
// Classgroup filtering // Classgroup filtering
Object param = vreq.getParameter(PARAM_CLASSGROUP); String classgroupParam = (String) vreq.getParameter(PARAM_CLASSGROUP);
if( param != null && !"".equals(param)){ if ( ! StringUtils.isBlank(classgroupParam) ) {
log.debug("Firing classgroup query "); log.debug("Firing classgroup query ");
log.debug("request.getParameter(classgroup) is "+ param.toString()); log.debug("request.getParameter(classgroup) is "+ classgroupParam);
query = query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + param + "\""); query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + classgroupParam + "\"");
} }
// rdf:type filtering // rdf:type filtering
param = vreq.getParameter(PARAM_RDFTYPE); String typeParam = (String) vreq.getParameter(PARAM_RDFTYPE);
if( param != null && !"".equals(param)){ if ( ! StringUtils.isBlank(typeParam) ) {
log.debug("Firing type query "); log.debug("Firing type query ");
log.debug("request.getParameter(type) is "+ param.toString()); log.debug("request.getParameter(type) is "+ typeParam);
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + param + "\""); query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + typeParam + "\"");
} }
//query.setQuery(queryText); //query.setQuery(queryText);
@ -470,23 +463,6 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
return query; return query;
} }
@SuppressWarnings("unused")
private QueryParser getQueryParser(Analyzer analyzer){
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[] {
VitroLuceneTermNames.NAME,
VitroLuceneTermNames.NAMEUNSTEMMED,
VitroLuceneTermNames.RDFTYPE,
VitroLuceneTermNames.ALLTEXT,
VitroLuceneTermNames.ALLTEXTUNSTEMMED,
VitroLuceneTermNames.NAMERAW,
VitroLuceneTermNames.CLASSLOCALNAME,
VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
//"name", "nameunstemmed", "type", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
return qp;
}
private class VClassGroupSearchLink extends LinkTemplateModel { private class VClassGroupSearchLink extends LinkTemplateModel {
VClassGroupSearchLink(String querytext, VClassGroup classgroup) { VClassGroupSearchLink(String querytext, VClassGroup classgroup) {

View file

@ -45,14 +45,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
public static String CLASSGROUP_URI = "classgroup"; public static String CLASSGROUP_URI = "classgroup";
/** Modtime from db */ /** Modtime from db */
public static String MODTIME = "modTime"; public static String MODTIME = "modTime";
/** Name of entity, tab or vclass */
public static String NAME = "name";
/** rdfs:label unanalyzed */
public static String NAMELOWERCASE = "nameunanalyzed" ;
/** Name of entity, unstemmed */
public static String NAMEUNSTEMMED = "nameunstemmed";
/** Unaltered name of individual, un-lowercased, un-stemmed, un-tokenized" */
public static String NAMERAW = "nameraw";
/** time of index in msec since epoc */ /** time of index in msec since epoc */
public static String INDEXEDTIME= "indexedTime"; public static String INDEXEDTIME= "indexedTime";
/** timekey of entity in yyyymmddhhmm */ /** timekey of entity in yyyymmddhhmm */
@ -78,6 +71,20 @@ public class Entity2LuceneDoc implements Obj2DocIface{
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase"; public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
/** class names in human readable form of an individual*/ /** class names in human readable form of an individual*/
public static final String CLASSLOCALNAME = "classLocalName"; public static final String CLASSLOCALNAME = "classLocalName";
// Fields derived from rdfs:label
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
public static String NAME_RAW = "nameRaw"; // was NAMERAW
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
public static String NAME_STEMMED = "nameStemmed"; // was NAME
} }
private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName()); private static final Log log = LogFactory.getLog(Entity2LuceneDoc.class.getName());
@ -189,7 +196,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
//java class //java class
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
//Entity Name // Individual label
if( ent.getRdfsLabel() != null ) if( ent.getRdfsLabel() != null )
value=ent.getRdfsLabel(); value=ent.getRdfsLabel();
else{ else{
@ -198,21 +205,22 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.debug("Using local name for individual with rdfs:label " + ent.getURI()); log.debug("Using local name for individual with rdfs:label " + ent.getURI());
value = ent.getLocalName(); value = ent.getLocalName();
} }
Field name = new Field(term.NAME, value, Field.Store.YES, Field.Index.ANALYZED);
doc.add( name );
Field nameUn = new Field(term.NAMEUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED); Field labelRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
nameUn.setBoost(NAME_BOOST); labelRaw.setBoost(NAME_BOOST);
doc.add( nameUn ); doc.add(labelRaw);
// BK nameunanalyzed is used by IndividualListController Field labelLowerCase = new Field(term.NAME_LOWERCASE, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
Field nameUnanalyzed = new Field(term.NAMELOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED); labelLowerCase.setBoost(NAME_BOOST);
nameUnanalyzed.setBoost(NAME_BOOST); doc.add(labelLowerCase);
doc.add( nameUnanalyzed );
Field nameRaw = new Field(term.NAMERAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED); Field labelUnstemmed = new Field(term.NAME_UNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
nameRaw.setBoost(NAME_BOOST); labelUnstemmed.setBoost(NAME_BOOST);
doc.add(nameRaw); doc.add(labelUnstemmed);
Field labelStemmed = new Field(term.NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
labelStemmed.setBoost(NAME_BOOST);
doc.add(labelStemmed);
//Moniker //Moniker

View file

@ -81,8 +81,7 @@ public class HtmlLowerStopAnalyzer extends Analyzer {
/** /**
* Processes the input by first converting it to * Processes the input by first converting it to
* lower case, then by eliminating stop words, and * lower case, then by eliminating stop words.
* finally by performing Porter stemming on it.
* *
* @param reader the Reader that * @param reader the Reader that
* provides access to the input text * provides access to the input text

View file

@ -7,8 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAME;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.MONIKER;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAMEUNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.NAME_UNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
import java.io.File; import java.io.File;
@ -245,10 +245,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29)); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer());
// analyzer.addAnalyzer(NAME, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAMEUNSTEMMED, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(NAME_UNSTEMMED, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(NAME, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());