NIHVIVO-2437 Filtered search based on refinement links. Change default boolean operator from OR to AND. Unrelated code cleanup in ApplicationDaoJena.java.

2011-05-11 22:08:39 +00:00 · 2011-05-11 22:08:39 +00:00 · b8471f62cf
commit b8471f62cf
parent f41d69753b
3 changed files with 61 additions and 197 deletions
--- a/solr/exampleSolr/conf/schema.xml
+++ b/solr/exampleSolr/conf/schema.xml
@ -508,7 +508,7 @@
 <defaultSearchField>ALLTEXT</defaultSearchField>

 <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
- <solrQueryParser defaultOperator="OR"/>
+ <solrQueryParser defaultOperator="AND"/>

  <!-- copyField commands copy one field to another at the time a document
        is added to the index.  It's used either to index the same field differently,
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ApplicationDaoJena.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ApplicationDaoJena.java
@ -29,12 +29,12 @@ public class ApplicationDaoJena extends JenaBaseDao implements ApplicationDao {
    
 	Integer portalCount = null;
 	List<String> externallyLinkedNamespaces = null;
-    ModelChangedListener modelChangedListener = null;
+    ModelChangedListener externalNamespaceChangeListener = null;
 	
    public ApplicationDaoJena(WebappDaoFactoryJena wadf) {
        super(wadf);
-        modelChangedListener = new ExternalNamespacesChangeListener();
-        getOntModelSelector().getDisplayModel().register(modelChangedListener);
+        externalNamespaceChangeListener = new ExternalNamespacesChangeListener();
+        getOntModelSelector().getDisplayModel().register(externalNamespaceChangeListener);
    }
    
    private String getApplicationResourceURI() {
@ -117,18 +117,19 @@ public class ApplicationDaoJena extends JenaBaseDao implements ApplicationDao {
    }
    
    public void close() {
-        if (modelChangedListener != null) {
-            getOntModelSelector().getDisplayModel().unregister(modelChangedListener);
+        if (externalNamespaceChangeListener != null) {
+            getOntModelSelector().getDisplayModel().unregister(externalNamespaceChangeListener);
        }
    }

 	private static final boolean CLEAR_CACHE = true;
 	
+	@Override
 	public synchronized List<String> getExternallyLinkedNamespaces() {
 	    return getExternallyLinkedNamespaces(!CLEAR_CACHE);
 	}

-    public synchronized List<String> getExternallyLinkedNamespaces(boolean clearCache) {
+    private synchronized List<String> getExternallyLinkedNamespaces(boolean clearCache) {
        if (clearCache || externallyLinkedNamespaces == null) {            
            externallyLinkedNamespaces = new ArrayList<String>();
            OntModel ontModel = getOntModelSelector().getDisplayModel();
--- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrPagedSearchController.java
+++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/SolrPagedSearchController.java
@ -15,7 +15,6 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;

-import javax.servlet.ServletContext;
 import javax.servlet.ServletException;
 import javax.servlet.http.HttpServletRequest;
 import javax.servlet.http.HttpServletResponse;
@ -27,9 +26,9 @@ import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.queryParser.MultiFieldQueryParser;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.util.Version;
-import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServer;
+import org.apache.solr.client.solrj.response.QueryResponse;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrDocumentList;

@ -53,7 +52,7 @@ import edu.cornell.mannlib.vitro.webapp.search.SearchException;
 import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter;
 import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
 import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
-import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
+import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames;
 import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
 import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
 import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel;
@ -77,11 +76,9 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
    private static final String XML_REQUEST_PARAM = "xml";
    private static final int DEFAULT_HITS_PER_PAGE = 25;
    private static final int DEFAULT_MAX_SEARCH_SIZE = 1000;   
-    private static final float QUERY_BOOST = 2.0F;
    
    protected static final Map<Format,Map<Result,String>> templateTable;

-    //private IndexSearcher searcher = null;
    protected enum Format { 
        HTML, XML; 
    }
@ -173,20 +170,10 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {

            String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
            
-            // RY *** Analyzer
-            //Analyzer analyzer = getAnalyzer(getServletContext());
-            
-            log.debug("Query text: " + qtxt);
+            log.debug("Query text is "+ qtxt); // + " Analyzer is "+ analyzer.toString());

            SolrQuery query = getQuery(qtxt, maxHitCount, vreq);

-            // We need Solr to send us the entire result set back, otherwise we may not get all the
-            // right refinement links. For example, if all individuals on pg 1 are Persons, and on 
-            // pg 2 there are FacultyMembers, we need to show the FacultyMember refinement link on 
-            // both pages.
-            //parameters.set("start", startIndex);
-            //parameters.set("rows", hitsPerPage);
-
            // ** For xml requested, add version=2.2 for xml version 
            // is that enough, or do we also have to add wt param?
            
@ -196,55 +183,6 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
            try {
                response = solr.query(query);

-            
-            //log.debug("Query text is "+ qtxt + " Analyzer is "+ analyzer.toString());
-            
-//            Query query = null;
-//            try {
-//                query = getQuery(vreq, portalFlag, analyzer, qtxt);
-//                log.debug("query for '" + qtxt +"' is " + query.toString());
-//            } catch (ParseException e) {
-//                return doBadQuery(portal, qtxt,format);
-//            } 
-
-            //IndexSearcher searcherForRequest = LuceneIndexFactory.getIndexSearcher(getServletContext());
-                                                
-            /* using the CustomSimilarity to override effects such as 
-             * 1) rarity of a term doesn't affect the document score.
-             * 2) number of instances of a query term in the matched document doesn't affect the document score
-             * 3) field length doesn't affect the document score 
-             * 
-             * 3/29/2011 bk392
-             */
-//            CustomSimilarity customSimilarity = new CustomSimilarity();
-//            searcherForRequest.setSimilarity(customSimilarity);
-//            
-//            TopDocs topDocs = null;
-//            try{
-//            	log.debug("Searching for query term in the Index with maxHitSize "+ maxHitSize);
-//            	log.debug("Query is "+ query.toString());
-//            	
-//            	//sets the query boost for the query. the lucene docs matching this query term
-//            	//are multiplied by QUERY_BOOST to get their total score
-//            	//query.setBoost(QUERY_BOOST);
-//                
-//            	topDocs = searcherForRequest.search(query,null,maxHitSize);
-//            	
-//            	log.debug("Total hits for the query are "+ topDocs.totalHits);
-//            	for(ScoreDoc scoreDoc : topDocs.scoreDocs){
-//            		
-//            		Document document = searcherForRequest.doc(scoreDoc.doc);
-//            		Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc);
-//            		
-//            		log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME) + " score: " +scoreDoc.score);
-//            		log.debug("Scoring of the doc explained " + explanation.toString());
-//            		log.debug("Explanation's description "+ explanation.getDescription());
-//            		log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT));
-//            		log.debug("ALLTEXTUNSTEMMED: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED));
-//            		
-//            		
-//            	}
-//            	
            } catch (Throwable t) {
                log.error("in first pass at search: " + t);
                // this is a hack to deal with odd cases where search and index threads interact
@ -292,7 +230,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
            for(int i = startIndex; i < lastHitToShow; i++){
                try {                    
                    SolrDocument doc = docs.get(i);
-                    String uri = doc.get(Entity2LuceneDoc.term.URI).toString();
+                    String uri = doc.get(VitroLuceneTermNames.URI).toString();
                    log.debug("Retrieving individual with uri "+ uri);
                    Individual ent = new IndividualImpl();
                    ent.setURI(uri);
@ -327,10 +265,10 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
            }
            
            String typeParam = vreq.getParameter("type");
-            boolean typeFiltereRequested = false;
+            boolean typeFilterRequested = false;
            if (!StringUtils.isEmpty(typeParam)) {
                VClass type = vclassDao.getVClassByURI(typeParam);
-                typeFiltereRequested = true;
+                typeFilterRequested = true;
                if (type != null && type.getName() != null)
                    body.put("typeName", type.getName());
            }
@ -338,7 +276,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
            /* Add classgroup and type refinement links to body */
            if( wasHtmlRequested ){                                
                // Search request includes no classgroup and no type, so add classgroup search refinement links.
-                if ( !classGroupFilterRequested && !typeFiltereRequested ) { 
+                if ( !classGroupFilterRequested && !typeFilterRequested ) { 
                    List<VClassGroup> classgroups = getClassGroups(grpDao, docs);
                    List<VClassGroupSearchLink> classGroupLinks = new ArrayList<VClassGroupSearchLink>(classgroups.size());
                    for (VClassGroup vcg : classgroups) {
@ -350,7 +288,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
     
                // Search request is for a classgroup, so add rdf:type search refinement links
                // but try to filter out classes that are subclasses
-                } else if ( classGroupFilterRequested && !typeFiltereRequested ) {  
+                } else if ( classGroupFilterRequested && !typeFilterRequested ) {  
                    List<VClass> vClasses = getVClasses(vclassDao, docs);
                    List<VClassSearchLink> vClassLinks = new ArrayList<VClassSearchLink>(vClasses.size());
                    for (VClass vc : vClasses) {
@ -413,7 +351,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
        for(int i=0; i<hitCount && n > grpsFound ;i++){
            try{
                SolrDocument doc = docs.get(i);        
-                Collection<Object> grps = doc.getFieldValues(Entity2LuceneDoc.term.CLASSGROUP_URI);     
+                Collection<Object> grps = doc.getFieldValues(VitroLuceneTermNames.CLASSGROUP_URI);     
                if (grps != null) {
                    for (Object o : grps) {                            
                        String groupUri = o.toString();
@ -448,7 +386,6 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
        return classgroups;
    }

-
    private List<VClass> getVClasses(VClassDao vclassDao, SolrDocumentList docs){        
        HashSet<String> typesInHits = getVClassUrisForHits(docs);                                
        List<VClass> classes = new ArrayList<VClass>(typesInHits.size());
@ -480,7 +417,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
        HashSet<String> typesInHits = new HashSet<String>();  
        for (SolrDocument doc : docs) {
            try {
-                Collection<Object> types = doc.getFieldValues(Entity2LuceneDoc.term.RDFTYPE);     
+                Collection<Object> types = doc.getFieldValues(VitroLuceneTermNames.RDFTYPE);     
                if (types != null) {
                    for (Object o : types) {
                        String typeUri = o.toString();
@ -494,114 +431,51 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
        return typesInHits;
    }       

-    private Analyzer getAnalyzer(ServletContext servletContext) throws SearchException {
-        Object obj = servletContext.getAttribute(LuceneSetup.ANALYZER);
-        if( obj == null || !(obj instanceof Analyzer) )
-            throw new SearchException("Could not get analyzer");
-        else
-            return (Analyzer)obj;        
-    }
-
-//    private Query getQuery(VitroRequest request, 
-//                       Analyzer analyzer, String querystr ) throws SearchException, ParseException {
-//        Query query = null;
-//        try{
-//            //String querystr = request.getParameter(VitroQuery.QUERY_PARAMETER_NAME);
-//            if( querystr == null){
-//                log.error("There was no Parameter '"+VitroQuery.QUERY_PARAMETER_NAME            
-//                    +"' in the request.");                
-//                return null;
-//            }else if( querystr.length() > MAX_QUERY_LENGTH ){
-//                log.debug("The search was too long. The maximum " +
-//                        "query length is " + MAX_QUERY_LENGTH );
-//                return null;
-//            }               
-//            
-//            log.debug("Parsing query using QueryParser ");
-//            
-//            QueryParser parser = getQueryParser(analyzer);
-//            query = parser.parse(querystr);
-//            
-//            //check if this is classgroup filtered
-//            Object param = request.getParameter("classgroup");
-//            if( param != null && !"".equals(param)){
-//            	
-//            	log.debug("Firing classgroup query ");
-//                log.debug("request.getParameter(classgroup) is "+ param.toString());
-//
-//                  BooleanQuery boolQuery = new BooleanQuery();
-//                  boolQuery.add( query, BooleanClause.Occur.MUST);
-//                  boolQuery.add(  new TermQuery(
-//                                      new Term(Entity2LuceneDoc.term.CLASSGROUP_URI, 
-//                                              (String)param)),
-//                                  BooleanClause.Occur.MUST);
-//                  query = boolQuery;
-//            }
-//
-//            //check if this is rdf:type filtered
-//            param = request.getParameter("type");
-//            if(  param != null && !"".equals(param)){                         
-//            	log.debug("Firing type query ");
-//            	log.debug("request.getParameter(type) is "+ param.toString());   
-//                
-//            	BooleanQuery boolQuery = new BooleanQuery();
-//                boolQuery.add( query, BooleanClause.Occur.MUST);
-//                boolQuery.add(  new TermQuery(
-//                                    new Term(Entity2LuceneDoc.term.RDFTYPE, 
-//                                            (String)param)),
-//                                BooleanClause.Occur.MUST);
-//                query = boolQuery;
-//            }
-//            
-//            log.debug("Query: " + query);
-//            
-//        } catch (ParseException e) {
-//            throw new ParseException(e.getMessage());
-//        } catch (Exception ex){
-//            throw new SearchException(ex.getMessage());
-//        }
-//
-//        return query;
-//    }
-    
    private SolrQuery getQuery(String queryText, int maxHitCount, VitroRequest vreq) {
        SolrQuery query = new SolrQuery(queryText);
+        //SolrQuery query = new SolrQuery();
+        //query.setQuery(queryText);
        
        // Solr requires these values, but we don't want them to be the real values for this page
-        // of results, else the refinement links won't work correctly: if only Persons are on pg 1,
-        // but there are FacultyMembers on pg 2, we still need to show the FacultyMember refinement
-        // link on pg 1, which won't happen if we have retrieved only one page of results from Solr.
+        // of results, else the refinement links won't work correctly: each page of results needs to
+        // show refinement links generated for all results, not just for the results on the current page.
        query.setStart(0);
        query.setRows(maxHitCount);

-        // RY Add other stuff here
-        // classgroups, type params
+        // Classgroup filtering
+        Object param = vreq.getParameter("classgroup");
+        if( param != null && !"".equals(param)){           
+            log.debug("Firing classgroup query ");
+            log.debug("request.getParameter(classgroup) is "+ param.toString());
+            query = query.addFilterQuery(VitroLuceneTermNames.CLASSGROUP_URI + ":\"" + param + "\"");
+        }

+        // rdf:type filtering
+        param = vreq.getParameter("type");
+        if(  param != null && !"".equals(param)){                         
+            log.debug("Firing type query ");
+            log.debug("request.getParameter(type) is "+ param.toString());   
+            query = query.addFilterQuery(VitroLuceneTermNames.RDFTYPE + ":\"" + param + "\"");
+        }
+                
+        //query.setQuery(queryText);
+        log.debug("Query = " + query.toString());
        return query;
    }
    
-    @SuppressWarnings("static-access")
+    @SuppressWarnings("unused")
    private QueryParser getQueryParser(Analyzer analyzer){
-        //defaultSearchField indicates which field search against when there is no term
-        //indicated in the query string.
-        //The analyzer is needed so that we use the same analyzer on the search queries as
-        //was used on the text that was indexed.
-    	//QueryParser qp = new QueryParser("NAME",analyzer);
-        //this sets the query parser to AND all of the query terms it finds.
-        //set up the map of stemmed field names -> unstemmed field names
-//        HashMap<String,String> map = new HashMap<String, String>();
-//        map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED);
-//        qp.setStemmedToUnstemmed(map);
-    	
-    	MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{ 
-    				"name", "nameunstemmed", "type", "moniker", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer); 
-    	
-    //	QueryParser qp = new QueryParser(Version.LUCENE_29, "name", analyzer); 
-    	
-    	//AND_OPERATOR returns documents even if the terms in the query lie in different fields.
-    	//The only requirement is that they exist in a single document.
-        //qp.setDefaultOperator(QueryParser.AND_OPERATOR);

+    	MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[] { 
+    	        VitroLuceneTermNames.NAME,
+    	        VitroLuceneTermNames.NAMEUNSTEMMED,
+    	        VitroLuceneTermNames.RDFTYPE,
+    	        VitroLuceneTermNames.ALLTEXT, 
+    	        VitroLuceneTermNames.ALLTEXTUNSTEMMED,
+    	        VitroLuceneTermNames.NAMERAW,
+    	        VitroLuceneTermNames.CLASSLOCALNAME,
+    	        VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer);
+    				//"name", "nameunstemmed", "type", "ALLTEXT", "ALLTEXTUNSTEMMED", "nameraw" , "classLocalName", "classLocalNameLowerCase" }, analyzer); 
    	
    	return qp;
    }
@ -648,13 +522,11 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
    
    private String getPreviousPageLink(int startIndex, int hitsPerPage, String baseUrl, ParamMap params) {
        params.put("startIndex", String.valueOf(startIndex-hitsPerPage));
-        //return new PagingLink("Previous", baseUrl, params);
        return UrlBuilder.getUrl(baseUrl, params);
    }
    
    private String getNextPageLink(int startIndex, int hitsPerPage, String baseUrl, ParamMap params) {
        params.put("startIndex", String.valueOf(startIndex+hitsPerPage));
-        //return new PagingLink("Next", baseUrl, params);
        return UrlBuilder.getUrl(baseUrl, params);
    }
    
@ -681,12 +553,12 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
        return new ExceptionResponseValues(getTemplate(f,Result.ERROR), body, e);
    }
    
-    private TemplateResponseValues doBadQuery(ApplicationBean appBean, String query, Format f) {
-        Map<String, Object> body = new HashMap<String, Object>();
-        body.put("title", "Search " + appBean.getApplicationName());
-        body.put("query", query);
-        return new TemplateResponseValues(getTemplate(f,Result.BAD_QUERY), body);
-    }
+//    private TemplateResponseValues doBadQuery(ApplicationBean appBean, String query, Format f) {
+//        Map<String, Object> body = new HashMap<String, Object>();
+//        body.put("title", "Search " + appBean.getApplicationName());
+//        body.put("query", query);
+//        return new TemplateResponseValues(getTemplate(f,Result.BAD_QUERY), body);
+//    }
    
    private TemplateResponseValues doFailedSearch(String message, String querytext, Format f) {
        Map<String, Object> body = new HashMap<String, Object>();       
@ -747,32 +619,22 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
        return rv;
    }
    
-    @SuppressWarnings("unchecked")
+    @SuppressWarnings({ "unchecked", "unused" })
    private HashSet<String> getDataPropertyBlacklist(){
        HashSet<String>dpBlacklist = (HashSet<String>)
        getServletContext().getAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST);
        return dpBlacklist;        
    }
    
-    @SuppressWarnings("unchecked")
+    @SuppressWarnings({ "unchecked", "unused" })
    private HashSet<String> getObjectPropertyBlacklist(){
        HashSet<String>opBlacklist = (HashSet<String>)
        getServletContext().getAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST);
        return opBlacklist;        
    }
    
-    
-    private final String defaultSearchField = "ALLTEXT";
    public static final int MAX_QUERY_LENGTH = 500;

-    
-    /**
-     * Need to accept notification from indexer that the index has been changed.
-     */
-//    public void close() {
-//        searcher = null;        
-//    }
-
    public VitroHighlighter getHighlighter(VitroQuery q) {
        throw new Error("PagedSearchController.getHighlighter() is unimplemented");
    }
@ -781,6 +643,7 @@ public class SolrPagedSearchController extends FreemarkerHttpServlet {
        throw new Error("PagedSearchController.getQueryFactory() is unimplemented");
    }

+    @SuppressWarnings("rawtypes")
    public List search(VitroQuery query) throws SearchException {
        throw new Error("PagedSearchController.search() is unimplemented");
    }