NIHVIVO-2437 Filtered search based on refinement links. Change default boolean operator from OR to AND. Unrelated code cleanup in ApplicationDaoJena.java.

This commit is contained in:
ryounes 2011-05-11 22:08:39 +00:00
parent f41d69753b
commit b8471f62cf
3 changed files with 61 additions and 197 deletions

View file

@ -508,7 +508,7 @@
<defaultSearchField>ALLTEXT</defaultSearchField>
<!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
<solrQueryParser defaultOperator="OR"/>
<solrQueryParser defaultOperator="AND"/>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field differently,

View file

@ -29,12 +29,12 @@ public class ApplicationDaoJena extends JenaBaseDao implements ApplicationDao {
Integer portalCount = null;
List<String> externallyLinkedNamespaces = null;
ModelChangedListener modelChangedListener = null;
ModelChangedListener externalNamespaceChangeListener = null;
public ApplicationDaoJena(WebappDaoFactoryJena wadf) {
super(wadf);
modelChangedListener = new ExternalNamespacesChangeListener();
getOntModelSelector().getDisplayModel().register(modelChangedListener);
externalNamespaceChangeListener = new ExternalNamespacesChangeListener();
getOntModelSelector().getDisplayModel().register(externalNamespaceChangeListener);
}
private String getApplicationResourceURI() {
@ -117,18 +117,19 @@ public class ApplicationDaoJena extends JenaBaseDao implements ApplicationDao {
}
public void close() {
if (modelChangedListener != null) {
getOntModelSelector().getDisplayModel().unregister(modelChangedListener);
if (externalNamespaceChangeListener != null) {
getOntModelSelector().getDisplayModel().unregister(externalNamespaceChangeListener);
}
}
private static final boolean CLEAR_CACHE = true;
@Override
public synchronized List<String> getExternallyLinkedNamespaces() {
return getExternallyLinkedNamespaces(!CLEAR_CACHE);
}
public synchronized List<String> getExternallyLinkedNamespaces(boolean clearCache) {
private synchronized List<String> getExternallyLinkedNamespaces(boolean clearCache) {
if (clearCache || externallyLinkedNamespaces == null) {
externallyLinkedNamespaces = new ArrayList<String>();
OntModel ontModel = getOntModelSelector().getDisplayModel();

View file

@ -15,7 +15,6 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
@ -27,9 +26,9 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.Version;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
@ -53,7 +52,7 @@ import edu.cornell.mannlib.vitro.webapp.search.SearchException;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
@ -77,11 +76,9 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
private static final String XML_REQUEST_PARAM = "xml";
private static final int DEFAULT_HITS_PER_PAGE = 25;
private static final int DEFAULT_MAX_SEARCH_SIZE = 1000;
private static final float QUERY_BOOST = 2.0F;
protected static final Map<Format,Map<Result,String>> templateTable;
//private IndexSearcher searcher = null;
protected enum Format {
HTML, XML;
}
@ -173,20 +170,10 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
// RY *** Analyzer
//Analyzer analyzer = getAnalyzer(getServletContext());
log.debug("Query text: " + qtxt);
log.debug("Query text is "+ qtxt); // + " Analyzer is "+ analyzer.toString());
SolrQuery query = getQuery(qtxt, maxHitCount, vreq);
// We need Solr to send us the entire result set back, otherwise we may not get all the
// right refinement links. For example, if all individuals on pg 1 are Persons, and on
// pg 2 there are FacultyMembers, we need to show the FacultyMember refinement link on
// both pages.
//parameters.set("start", startIndex);
//parameters.set("rows", hitsPerPage);
// ** For xml requested, add version=2.2 for xml version
// is that enough, or do we also have to add wt param?
@ -196,55 +183,6 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
try {
response = solr.query(query);
//log.debug("Query text is "+ qtxt + " Analyzer is "+ analyzer.toString());
// Query query = null;
// try {
// query = getQuery(vreq, portalFlag, analyzer, qtxt);
// log.debug("query for '" + qtxt +"' is " + query.toString());
// } catch (ParseException e) {
// return doBadQuery(portal, qtxt,format);
// }
//IndexSearcher searcherForRequest = LuceneIndexFactory.getIndexSearcher(getServletContext());
/* using the CustomSimilarity to override effects such as
* 1) rarity of a term doesn't affect the document score.
* 2) number of instances of a query term in the matched document doesn't affect the document score
* 3) field length doesn't affect the document score
*
* 3/29/2011 bk392
*/
// CustomSimilarity customSimilarity = new CustomSimilarity();
// searcherForRequest.setSimilarity(customSimilarity);
//
// TopDocs topDocs = null;
// try{
// log.debug("Searching for query term in the Index with maxHitSize "+ maxHitSize);
// log.debug("Query is "+ query.toString());
//
// //sets the query boost for the query. the lucene docs matching this query term
// //are multiplied by QUERY_BOOST to get their total score
// //query.setBoost(QUERY_BOOST);
//
// topDocs = searcherForRequest.search(query,null,maxHitSize);
//
// log.debug("Total hits for the query are "+ topDocs.totalHits);
// for(ScoreDoc scoreDoc : topDocs.scoreDocs){
//
// Document document = searcherForRequest.doc(scoreDoc.doc);
// Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
//
// log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score);
// log.debug("Scoring of the doc explained " + explanation.toString());
// log.debug("Explanation's description "+ explanation.getDescription());
// log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
// log.debug("ALLTEXTUNSTEMMED: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED));
//
//
// }
//
} catch (Throwable t) {
log.error("in first pass at search: " + t);
// this is a hack to deal with odd cases where search and index threads interact
@ -292,7 +230,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
for(int i = startIndex; i < lastHitToShow; i++){
try {
SolrDocument doc = docs.get(i);
String uri = doc.get(Entity2LuceneDoc.term.URI).toString();
String uri = doc.get(VitroLuceneTermNames.URI).toString();
log.debug("Retrieving individual with uri "+ uri);
Individual ent = new IndividualImpl();
ent.setURI(uri);
@ -327,10 +265,10 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
}
String typeParam = vreq.getParameter("type");
boolean typeFiltereRequested = false;
boolean typeFilterRequested = false;
if (!StringUtils.isEmpty(typeParam)) {
VClass type = vclassDao.getVClassByURI(typeParam);
typeFiltereRequested = true;
typeFilterRequested = true;
if (type != null && type.getName() != null)
body.put("typeName", type.getName());
}
@ -338,7 +276,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
/* Add classgroup and type refinement links to body */
if( wasHtmlRequested ){
// Search request includes no classgroup and no type, so add classgroup search refinement links.
if ( !classGroupFilterRequested && !typeFiltereRequested ) {
if ( !classGroupFilterRequested && !typeFilterRequested ) {
List<VClassGroup> classgroups = getClassGroups(grpDao, docs);
List<VClassGroupSearchLink> classGroupLinks = new ArrayList<VClassGroupSearchLink>(classgroups.size());
for (VClassGroup vcg : classgroups) {
@ -350,7 +288,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
// Search request is for a classgroup, so add rdf:type search refinement links
// but try to filter out classes that are subclasses
} else if ( classGroupFilterRequested && !typeFiltereRequested ) {
} else if ( classGroupFilterRequested && !typeFilterRequested ) {
List<VClass> vClasses = getVClasses(vclassDao, docs);
List<VClassSearchLink> vClassLinks = new ArrayList<VClassSearchLink>(vClasses.size());
for (VClass vc : vClasses) {
@ -413,7 +351,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
for(int i=0; i<hitCount && n > grpsFound ;i++){
try{
SolrDocument doc = docs.get(i);
Collection<Object> grps = doc.getFieldValues(Entity2LuceneDoc.term.CLASSGROUP_URI);
Collection<Object> grps = doc.getFieldValues(VitroLuceneTermNames.CLASSGROUP_URI);
if (grps != null) {
for (Object o : grps) {
String groupUri = o.toString();
@ -448,7 +386,6 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
return classgroups;
}
private List<VClass> getVClasses(VClassDao vclassDao, SolrDocumentList docs){
HashSet<String> typesInHits = getVClassUrisForHits(docs);
List<VClass> classes = new ArrayList<VClass>(typesInHits.size());
@ -480,7 +417,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
HashSet<String> typesInHits = new HashSet<String>();
for (SolrDocument doc : docs) {
try {
Collection<Object> types = doc.getFieldValues(Entity2LuceneDoc.term.RDFTYPE);
Collection<Object> types = doc.getFieldValues(VitroLuceneTermNames.RDFTYPE);
if (types != null) {
for (Object o : types) {
String typeUri = o.toString();
@ -494,114 +431,51 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
return typesInHits;
}
private Analyzer getAnalyzer(ServletContext servletContext) throws SearchException {
Object obj = servletContext.getAttribute(LuceneSetup.ANALYZER);
if( obj == null || !(obj instanceof Analyzer) )
throw new SearchException("Could not get analyzer");
else
return (Analyzer)obj;
}
// private Query getQuery(VitroRequest request,
// Analyzer analyzer, String querystr ) throws SearchException, ParseException {
// Query query = null;
// try{
// //String querystr = request.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
// if( querystr == null){
// log.error("There was no Parameter '"+VitroQuery.QUERY_PARAMETER_NAME
// +"' in the request.");
// return null;
// }else if( querystr.length() > MAX_QUERY_LENGTH ){
// log.debug("The search was too long. The maximum " +
// "query length is " + MAX_QUERY_LENGTH );
// return null;
// }
//
// log.debug("Parsing query using QueryParser ");
//
// QueryParser parser = getQueryParser(analyzer);
// query = parser.parse(querystr);
//
// //check if this is classgroup filtered
// Object param = request.getParameter("classgroup");
// if( param != null && !"".equals(param)){
//
// log.debug("Firing classgroup query ");
// log.debug("request.getParameter(classgroup) is "+ param.toString());
//
// BooleanQuery boolQuery = new BooleanQuery();
// boolQuery.add( query, BooleanClause.Occur.MUST);
// boolQuery.add( new TermQuery(
// new Term(Entity2LuceneDoc.term.CLASSGROUP_URI,
// (String)param)),
// BooleanClause.Occur.MUST);
// query = boolQuery;
// }
//
// //check if this is rdf:type filtered
// param = request.getParameter("type");
// if( param != null && !"".equals(param)){
// log.debug("Firing type query ");
// log.debug("request.getParameter(type) is "+ param.toString());
//
// BooleanQuery boolQuery = new BooleanQuery();
// boolQuery.add( query, BooleanClause.Occur.MUST);
// boolQuery.add( new TermQuery(
// new Term(Entity2LuceneDoc.term.RDFTYPE,
// (String)param)),
// BooleanClause.Occur.MUST);
// query = boolQuery;
// }
//
// log.debug("Query: " + query);
//
// } catch (ParseException e) {
// throw new ParseException(e.getMessage());
// } catch (Exception ex){
// throw new SearchException(ex.getMessage());
// }
//
// return query;
// }
private SolrQuery getQuery(String queryText, int maxHitCount, VitroRequest vreq) {
SolrQuery query = new SolrQuery(queryText);
//SolrQuery query = new SolrQuery();
//query.setQuery(queryText);
// Solr requires these values, but we don't want them to be the real values for this page
// of results, else the refinement links won't work correctly: if only Persons are on pg 1,
// but there are FacultyMembers on pg 2, we still need to show the FacultyMember refinement
// link on pg 1, which won't happen if we have retrieved only one page of results from Solr.
// of results, else the refinement links won't work correctly: each page of results needs to
// show refinement links generated for all results, not just for the results on the current page.
query.setStart(0);
query.setRows(maxHitCount);
// RY Add other stuff here
// classgroups, type params
// Classgroup filtering
Object param = vreq.getParameter("classgroup");
if( param != null && !"".equals(param)){
log.debug("Firing classgroup query ");
log.debug("request.getParameter(classgroup) is "+ param.toString());
query = query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + param + "\"");
}
// rdf:type filtering
param = vreq.getParameter("type");
if( param != null && !"".equals(param)){
log.debug("Firing type query ");
log.debug("request.getParameter(type) is "+ param.toString());
query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + param + "\"");
}
//query.setQuery(queryText);
log.debug("Query = " + query.toString());
return query;
}
@SuppressWarnings("static-access")
@SuppressWarnings("unused")
private QueryParser getQueryParser(Analyzer analyzer){
//defaultSearchField indicates which field search against when there is no term
//indicated in the query string.
//The analyzer is needed so that we use the same analyzer on the search queries as
//was used on the text that was indexed.
//QueryParser qp = new QueryParser("NAME",analyzer);
//this sets the query parser to AND all of the query terms it finds.
//set up the map of stemmed field names -> unstemmed field names
// HashMap<String,String> map = new HashMap<String, String>();
// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
// qp.setStemmedToUnstemmed(map);
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{
"name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
// QueryParser qp = new QueryParser(Version.LUCENE_29, "name", analyzer);
//AND_OPERATOR returns documents even if the terms in the query lie in different fields.
//The only requirement is that they exist in a single document.
//qp.setDefaultOperator(QueryParser.AND_OPERATOR);
MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[] {
VitroLuceneTermNames.NAME,
VitroLuceneTermNames.NAMEUNSTEMMED,
VitroLuceneTermNames.RDFTYPE,
VitroLuceneTermNames.ALLTEXT,
VitroLuceneTermNames.ALLTEXTUNSTEMMED,
VitroLuceneTermNames.NAMERAW,
VitroLuceneTermNames.CLASSLOCALNAME,
VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
//"name", "nameunstemmed", "type", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer);
return qp;
}
@ -648,13 +522,11 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
private String getPreviousPageLink(int startIndex, int hitsPerPage, String baseUrl, ParamMap params) {
params.put("startIndex", String.valueOf(startIndex-hitsPerPage));
//return new PagingLink("Previous", baseUrl, params);
return UrlBuilder.getUrl(baseUrl, params);
}
private String getNextPageLink(int startIndex, int hitsPerPage, String baseUrl, ParamMap params) {
params.put("startIndex", String.valueOf(startIndex+hitsPerPage));
//return new PagingLink("Next", baseUrl, params);
return UrlBuilder.getUrl(baseUrl, params);
}
@ -681,12 +553,12 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
return new ExceptionResponseValues(getTemplate(f,Result.ERROR), body, e);
}
private TemplateResponseValues doBadQuery(ApplicationBean appBean, String query, Format f) {
Map<String, Object> body = new HashMap<String, Object>();
body.put("title", "Search " + appBean.getApplicationName());
body.put("query", query);
return new TemplateResponseValues(getTemplate(f,Result.BAD_QUERY), body);
}
// private TemplateResponseValues doBadQuery(ApplicationBean appBean, String query, Format f) {
// Map<String, Object> body = new HashMap<String, Object>();
// body.put("title", "Search " + appBean.getApplicationName());
// body.put("query", query);
// return new TemplateResponseValues(getTemplate(f,Result.BAD_QUERY), body);
// }
private TemplateResponseValues doFailedSearch(String message, String querytext, Format f) {
Map<String, Object> body = new HashMap<String, Object>();
@ -747,32 +619,22 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
return rv;
}
@SuppressWarnings("unchecked")
@SuppressWarnings({ "unchecked", "unused" })
private HashSet<String> getDataPropertyBlacklist(){
HashSet<String>dpBlacklist = (HashSet<String>)
getServletContext().getAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST);
return dpBlacklist;
}
@SuppressWarnings("unchecked")
@SuppressWarnings({ "unchecked", "unused" })
private HashSet<String> getObjectPropertyBlacklist(){
HashSet<String>opBlacklist = (HashSet<String>)
getServletContext().getAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST);
return opBlacklist;
}
private final String defaultSearchField = "ALLTEXT";
public static final int MAX_QUERY_LENGTH = 500;
/**
* Need to accept notification from indexer that the index has been changed.
*/
// public void close() {
// searcher = null;
// }
public VitroHighlighter getHighlighter(VitroQuery q) {
throw new Error("PagedSearchController.getHighlighter() is unimplemented");
}
@ -781,6 +643,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
throw new Error("PagedSearchController.getQueryFactory() is unimplemented");
}
@SuppressWarnings("rawtypes")
public List search(VitroQuery query) throws SearchException {
throw new Error("PagedSearchController.search() is unimplemented");
}