From 9caf05389842ec2c5e9ba36d0a600ceda123d460 Mon Sep 17 00:00:00 2001 From: briancaruso Date: Tue, 12 Jul 2011 17:57:24 +0000 Subject: [PATCH] Removing several lucene search controllers and classes. Adding QueryExecution close() to PageDaoJena Adding AdditionalURIsToIndex interface and tests. --- .../controller/EntityURLController.java | 160 ---- .../vitro/webapp/controller/JSONServlet.java | 621 ------------- .../freemarker/IndividualListController.java | 511 ----------- .../vitro/webapp/dao/jena/PageDaoJena.java | 229 +++-- .../controller/AutocompleteController.java | 326 ------- .../controller/PagedSearchController.java | 868 ------------------ .../AdditionalURIsForContextNodes.java | 22 + .../indexing/AdditionalURIsToIndex.java | 11 + .../webapp/search/indexing/IndexBuilder.java | 8 +- .../search/lucene/LuceneIndexFactory.java | 126 --- .../webapp/search/lucene/LuceneIndexer.java | 489 ---------- .../webapp/search/lucene/LuceneSearcher.java | 285 ------ .../webapp/search/lucene/LuceneSetup.java | 274 ------ .../webapp/search/lucene/LuceneSetupCJK.java | 205 ----- .../vitro/webapp/search/solr/SolrSetup.java | 8 +- .../pageDataGetter/BrowseDataGetter.java | 4 +- .../utils/pageDataGetter/DataGetterUtils.java | 12 +- .../IndividualsForClassesDataGetter.java | 38 +- .../AdditionalURIsForContextNodesTest.java | 31 + 19 files changed, 237 insertions(+), 3991 deletions(-) delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/controller/EntityURLController.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONServlet.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/controller/freemarker/IndividualListController.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodes.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsToIndex.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexFactory.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSearcher.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java create mode 100644 webapp/test/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodesTest.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/EntityURLController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/EntityURLController.java deleted file mode 100644 index f925042b4..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/EntityURLController.java +++ /dev/null @@ -1,160 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.controller; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; - -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; - -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.rdf.model.RDFNode; -import com.hp.hpl.jena.rdf.model.Resource; -import com.hp.hpl.jena.rdf.model.ResourceFactory; -import com.hp.hpl.jena.vocabulary.RDF; - -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; -import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; -import edu.cornell.mannlib.vitro.webapp.web.ContentType; - - - -public class EntityURLController extends VitroHttpServlet { - private static final Log log = LogFactory.getLog(EntityURLController.class.getName()); - public static final int ENTITY_LIST_CONTROLLER_MAX_RESULTS = 30000; - -public void doGet (HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException{ - - String url = req.getRequestURI().substring(req.getContextPath().length()); - ContentType contentType = checkForRequestType(req.getHeader("accept")); - - if(Pattern.compile("^/listrdf/$").matcher(url).matches()){ - String redirectURL = null; - if(contentType!=null){ - if ( RDFXML_MIMETYPE.equals(contentType.getMediaType())) - redirectURL = "/listrdf/listrdf.rdf"; - else if( N3_MIMETYPE.equals(contentType.getMediaType())) - redirectURL = "/listrdf/listrdf.n3"; - else if ( TTL_MIMETYPE.equals(contentType.getMediaType())) - redirectURL = "/listrdf/listrdf.ttl"; - } - else{ - redirectURL = "/listrdf/listrdf.rdf"; - } - - String hn = req.getHeader("Host"); - if (req.isSecure()) { - res.setHeader("Location", res.encodeURL("https://" + hn - + req.getContextPath() + redirectURL)); - log.info("doRedirect by using HTTPS"); - } else { - res.setHeader("Location", res.encodeURL("http://" + hn - + req.getContextPath() + redirectURL)); - log.info("doRedirect by using HTTP"); - } - res.setStatus(res.SC_SEE_OTHER); - return; - } - - String classUri = (String) getServletContext().getAttribute("classuri"); - BooleanQuery query = new BooleanQuery(); - query.add( - new TermQuery( new Term(VitroLuceneTermNames.RDFTYPE, classUri)), - BooleanClause.Occur.MUST ); - - IndexSearcher index = LuceneIndexFactory.getIndexSearcher(getServletContext()); - TopDocs docs = index.search(query, null, - ENTITY_LIST_CONTROLLER_MAX_RESULTS, - new Sort(VitroLuceneTermNames.NAME_LOWERCASE)); - - if( docs == null ){ - log.error("Search of lucene index returned null"); - throw new ServletException("Search of lucene index returned null"); - } - - int ii = 0; - int size = docs.totalHits; - Resource resource = null; - RDFNode node = null; - Model model = ModelFactory.createDefaultModel(); - while( ii < size ){ - ScoreDoc hit = docs.scoreDocs[ii]; - if (hit != null) { - Document doc = index.doc(hit.doc); - if (doc != null) { - String uri = doc.getField(VitroLuceneTermNames.URI).stringValue(); - resource = ResourceFactory.createResource(uri); - node = (RDFNode) ResourceFactory.createResource(classUri); - model.add(resource, RDF.type, node); - } else { - log.warn("no document found for lucene doc id " + hit.doc); - } - } else { - log.debug("hit was null"); - } - ii++; - } - - String format = ""; - if(contentType != null){ - if ( RDFXML_MIMETYPE.equals(contentType.getMediaType())) - format = "RDF/XML"; - else if( N3_MIMETYPE.equals(contentType.getMediaType())) - format = "N3"; - else if ( TTL_MIMETYPE.equals(contentType.getMediaType())) - format ="TTL"; - res.setContentType(contentType.getMediaType()); - } - else{ - res.setContentType(RDFXML_MIMETYPE); - format = "RDF/XML"; - } - model.write(res.getOutputStream(), format); -} -public void doPost (HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException{ - doGet(req,res); -} - -protected ContentType checkForRequestType(String acceptHeader) { - try { - //check the accept header - if (acceptHeader != null) { - List actualContentTypes = new ArrayList(); - actualContentTypes.add(new ContentType( XHTML_MIMETYPE )); - actualContentTypes.add(new ContentType( HTML_MIMETYPE )); - - actualContentTypes.add(new ContentType( RDFXML_MIMETYPE )); - actualContentTypes.add(new ContentType( N3_MIMETYPE )); - actualContentTypes.add(new ContentType( TTL_MIMETYPE )); - - - ContentType best = ContentType.getBestContentType(acceptHeader,actualContentTypes); - if (best!=null && ( - RDFXML_MIMETYPE.equals(best.getMediaType()) || - N3_MIMETYPE.equals(best.getMediaType()) || - TTL_MIMETYPE.equals(best.getMediaType()) )) - return best; - } - } - catch (Throwable th) { - log.error("problem while checking accept header " , th); - } - return null; -} -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONServlet.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONServlet.java deleted file mode 100644 index 809b112e0..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/JSONServlet.java +++ /dev/null @@ -1,621 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.controller; - -import java.io.IOException; -import java.io.PrintWriter; -import java.io.Writer; -import java.net.URLEncoder; -import java.util.Arrays; -import java.util.ArrayList; -import java.util.Enumeration; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; -import java.util.Map; - -import javax.servlet.ServletContext; -import javax.servlet.ServletException; -import javax.servlet.ServletOutputStream; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import javax.servlet.http.HttpSession; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; - -import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.rdf.model.Literal; - -import edu.cornell.mannlib.vitro.webapp.beans.DataProperty; -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.beans.VClass; -import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.IndividualListController; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.IndividualListController.PageRecord; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; -import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; -import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; -import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; -import edu.cornell.mannlib.vitro.webapp.dao.jena.VClassGroupCache; -import edu.cornell.mannlib.vitro.webapp.edit.n3editing.configuration.EditConfiguration; -import edu.cornell.mannlib.vitro.webapp.edit.n3editing.configuration.SelectListGenerator; -import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; -import edu.cornell.mannlib.vitro.webapp.utils.pageDataGetter.IndividualsForClassesDataGetter; -import edu.cornell.mannlib.vitro.webapp.utils.pageDataGetter.DataGetterUtils; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individual.IndividualTemplateModel; - -/** - * This servlet is for servicing requests for JSON objects/data. - * It could be generalized to get other types of data ex. XML, HTML etc - * @author bdc34 - * - */ -public class JSONServlet extends VitroHttpServlet { - - @Override - protected void doPost(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { - super.doPost(req, resp); - } - - @Override - protected void doGet(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { - super.doGet(req, resp); - VitroRequest vreq = new VitroRequest(req); - - try{ - if(vreq.getParameter("getEntitiesByVClass") != null ){ - if( vreq.getParameter("resultKey") == null) { - getEntitiesByVClass(req, resp); - return; - } else { - getEntitiesByVClassContinuation( req, resp); - return; - } - }else if( vreq.getParameter("getN3EditOptionList") != null ){ - doN3EditOptionList(req,resp); - return; - }else if( vreq.getParameter("getLuceneIndividualsByVClass") != null ){ - getLuceneIndividualsByVClass(req,resp); - return; - }else if( vreq.getParameter("getVClassesForVClassGroup") != null ){ - getVClassesForVClassGroup(req,resp); - return; - } else if( vreq.getParameter("getSolrIndividualsByVClasses") != null ){ - getSolrIndividualsByVClasses(req,resp); - return; - } else if( vreq.getParameter("getDataForPage") != null ){ - getDataForPage(req,resp); - return; - } - }catch(Exception ex){ - log.warn(ex,ex); - } - } - - private void getVClassesForVClassGroup(HttpServletRequest req, HttpServletResponse resp) throws IOException, JSONException { - JSONObject map = new JSONObject(); - VitroRequest vreq = new VitroRequest(req); - String vcgUri = vreq.getParameter("classgroupUri"); - if( vcgUri == null ){ - log.debug("no URI passed for classgroupUri"); - resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - return; - } - - VClassGroupCache vcgc = VClassGroupCache.getVClassGroupCache(getServletContext()); - VClassGroup vcg = vcgc.getGroup(vcgUri); - if( vcg == null ){ - log.debug("Could not find vclassgroup: " + vcgUri); - resp.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); - return; - } - - ArrayList classes = new ArrayList(vcg.size()); - for( VClass vc : vcg){ - JSONObject vcObj = new JSONObject(); - vcObj.put("name", vc.getName()); - vcObj.put("URI", vc.getURI()); - vcObj.put("entityCount", vc.getEntityCount()); - classes.add(vcObj); - } - map.put("classes", classes); - map.put("classGroupName", vcg.getPublicName()); - map.put("classGroupUri", vcg.getURI()); - - resp.setCharacterEncoding("UTF-8"); - resp.setContentType("application/json;charset=UTF-8"); - Writer writer = resp.getWriter(); - writer.write(map.toString()); - } - - private void getLuceneIndividualsByVClass( HttpServletRequest req, HttpServletResponse resp ){ - String errorMessage = null; - JSONObject rObj = null; - try{ - VitroRequest vreq = new VitroRequest(req); - VClass vclass=null; - - - String vitroClassIdStr = vreq.getParameter("vclassId"); - if ( vitroClassIdStr != null && !vitroClassIdStr.isEmpty()){ - vclass = vreq.getWebappDaoFactory().getVClassDao().getVClassByURI(vitroClassIdStr); - if (vclass == null) { - log.debug("Couldn't retrieve vclass "); - throw new Exception (errorMessage = "Class " + vitroClassIdStr + " not found"); - } - }else{ - log.debug("parameter vclassId URI parameter expected "); - throw new Exception("parameter vclassId URI parameter expected "); - } - rObj = getLuceneIndividualsByVClass(vclass.getURI(),req, getServletContext()); - }catch(Exception ex){ - errorMessage = ex.toString(); - log.error(ex,ex); - } - - if( rObj == null ) - rObj = new JSONObject(); - - try{ - resp.setCharacterEncoding("UTF-8"); - resp.setContentType("application/json;charset=UTF-8"); - - if( errorMessage != null ){ - rObj.put("errorMessage", errorMessage); - resp.setStatus(500 /*HttpURLConnection.HTTP_SERVER_ERROR*/); - }else{ - rObj.put("errorMessage", ""); - } - Writer writer = resp.getWriter(); - writer.write(rObj.toString()); - }catch(JSONException jse){ - log.error(jse,jse); - } catch (IOException e) { - log.error(e,e); - } - - } - - //Accepts multiple vclasses and returns individuals which correspond to the intersection of those classes (i.e. have all those types) - private void getSolrIndividualsByVClasses( HttpServletRequest req, HttpServletResponse resp ){ - String errorMessage = null; - JSONObject rObj = null; - try{ - VitroRequest vreq = new VitroRequest(req); - VClass vclass=null; - - //Could have multiple vclass Ids sent in - List vclassIds = new ArrayList(); - String[] vitroClassIdStr = vreq.getParameterValues("vclassId"); - if ( vitroClassIdStr != null && vitroClassIdStr.length > 0){ - for(String vclassId: vitroClassIdStr) { - vclass = vreq.getWebappDaoFactory().getVClassDao().getVClassByURI(vclassId); - if (vclass == null) { - log.debug("Couldn't retrieve vclass "); - throw new Exception (errorMessage = "Class " + vclassId + " not found"); - } - } - }else{ - log.debug("parameter vclassId URI parameter expected "); - throw new Exception("parameter vclassId URI parameter expected "); - } - vclassIds = Arrays.asList(vitroClassIdStr); - //rObj = getLuceneIndividualsByVClass(vclass.getURI(),req, getServletContext()); - rObj = getSolrIndividualsByVClasses(vclassIds,req, getServletContext()); - }catch(Exception ex){ - errorMessage = ex.toString(); - log.error(ex,ex); - } - - if( rObj == null ) - rObj = new JSONObject(); - - try{ - resp.setCharacterEncoding("UTF-8"); - resp.setContentType("application/json;charset=UTF-8"); - - if( errorMessage != null ){ - rObj.put("errorMessage", errorMessage); - resp.setStatus(500 /*HttpURLConnection.HTTP_SERVER_ERROR*/); - }else{ - rObj.put("errorMessage", ""); - } - Writer writer = resp.getWriter(); - writer.write(rObj.toString()); - }catch(JSONException jse){ - log.error(jse,jse); - } catch (IOException e) { - log.error(e,e); - } - } - - public static JSONObject getLuceneIndividualsByVClass(String vclassURI, HttpServletRequest req, ServletContext context) throws Exception { - - VitroRequest vreq = new VitroRequest(req); - Map map = getLuceneVclassResults(vclassURI, vreq, context); - //Last parameter defines whether single or multiple vclasses expected - JSONObject rObj = processVclassResults(map, vreq, context, false); - return rObj; - } - - public static JSONObject getSolrIndividualsByVClasses(List vclassURIs, HttpServletRequest req, ServletContext context) throws Exception { - VitroRequest vreq = new VitroRequest(req); - Map map = getSolrVclassIntersectionResults(vclassURIs, vreq, context); - JSONObject rObj = processVclassResults(map, vreq, context, true); - return rObj; - } - - //Factoring out to allow for results to be processed from query for both lucene and solr - //Map given to process method includes the actual individuals returned from the search - public static JSONObject processVclassResults(Map map, VitroRequest vreq, ServletContext context, boolean multipleVclasses) throws Exception{ - JSONObject rObj = DataGetterUtils.processVclassResultsJSON(map, vreq, multipleVclasses); - return rObj; - } - private static Map getLuceneVclassResults(String vclassURI, VitroRequest vreq, ServletContext context){ - String alpha = IndividualListController.getAlphaParameter(vreq); - int page = IndividualListController.getPageParameter(vreq); - Map map = null; - try { - map = IndividualListController.getResultsForVClass( - vclassURI, - page, - alpha, - vreq.getWebappDaoFactory().getIndividualDao(), - context); - } catch(Exception ex) { - log.error("Error in retrieval of Lucene results for VClass " + vclassURI, ex); - } - - return map; - } - - //Including version for Solr query for Vclass Intersections - private static Map getSolrVclassIntersectionResults(List vclassURIs, VitroRequest vreq, ServletContext context){ - String alpha = IndividualListController.getAlphaParameter(vreq); - int page = IndividualListController.getPageParameter(vreq); - Map map = null; - try { - map = IndividualListController.getResultsForVClassIntersections( - vclassURIs, - page, - alpha, - vreq.getWebappDaoFactory().getIndividualDao(), - context); - } catch(Exception ex) { - log.error("Error in retrieval of Lucene results for VClass " + vclassURIs.toString(), ex); - } - - return map; - } - - - - public static String getVClassName(Individual ind, String moniker, - WebappDaoFactory fullWdf) { - /* so the moniker frequently has a vclass name in it. Try to return - * the vclass name that is the same as the moniker so that the templates - * can detect this. */ - if( (moniker == null || moniker.isEmpty()) ){ - if( ind.getVClass() != null && ind.getVClass().getName() != null ) - return ind.getVClass().getName(); - else - return ""; - } - - List vcList = ind.getVClasses(); - for( VClass vc : vcList){ - if( vc != null && moniker.equals( vc.getName() )) - return moniker; - } - - // if we get here, then we didn't find a moniker that matched a vclass, - // so just return any vclass.name - if( ind.getVClass() != null && ind.getVClass().getName() != null ) - return ind.getVClass().getName(); - else - return ""; - } - - public static String getDataPropertyValue(Individual ind, DataProperty dp, WebappDaoFactory wdf){ - List values = wdf.getDataPropertyStatementDao() - .getDataPropertyValuesForIndividualByProperty(ind, dp); - if( values == null || values.isEmpty() ) - return ""; - else{ - if( values.get(0) != null ) - return values.get(0).getLexicalForm(); - else - return ""; - } - - } - - /** - * Gets an option list for a given EditConfiguration and Field. - * Requires following HTTP query parameters: - * editKey - * field - */ - private void doN3EditOptionList(HttpServletRequest req, HttpServletResponse resp) throws IOException { - log.debug("in doN3EditOptionList()"); - String field = req.getParameter("field"); - if( field == null ){ - log.debug("could not find query parameter 'field' for doN3EditOptionList"); - throw new IllegalArgumentException(" getN3EditOptionList requires parameter 'field'"); - } - - HttpSession sess = req.getSession(false); - EditConfiguration editConfig = EditConfiguration.getConfigFromSession(sess, req); - if( editConfig == null ) { - log.debug("could not find query parameter 'editKey' for doN3EditOptionList"); - throw new IllegalArgumentException(" getN3EditOptionList requires parameter 'editKey'"); - } - - if( log.isDebugEnabled() ) - log.debug(" attempting to get option list for field '" + field + "'"); - - // set ProhibitedFromSearch object so picklist doesn't show - // individuals from classes that should be hidden from list views - OntModel displayOntModel = - (OntModel) getServletConfig().getServletContext() - .getAttribute("displayOntModel"); - if (displayOntModel != null) { - ProhibitedFromSearch pfs = new ProhibitedFromSearch( - DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel); - editConfig.setProhibitedFromSearch(pfs); - } - - Map options = SelectListGenerator.getOptions(editConfig, field, (new VitroRequest(req)).getFullWebappDaoFactory()); - resp.setContentType("application/json"); - ServletOutputStream out = resp.getOutputStream(); - - out.println("["); - for(String key : options.keySet()){ - JSONArray jsonObj = new JSONArray(); - jsonObj.put( options.get(key)); - jsonObj.put( key); - out.println(" " + jsonObj.toString() + ","); - } - out.println("]"); - } - - private void getEntitiesByVClassContinuation(HttpServletRequest req, HttpServletResponse resp) throws ServletException, IOException { - log.debug("in getEntitiesByVClassContinuation()"); - VitroRequest vreq = new VitroRequest(req); - String resKey = vreq.getParameter("resultKey"); - if( resKey == null ) - throw new ServletException("Could not get resultKey"); - HttpSession session = vreq.getSession(); - if( session == null ) - throw new ServletException("there is no session to get the pervious results from"); - List entsInVClass = (List) session.getAttribute(resKey); - if( entsInVClass == null ) - throw new ServletException("Could not find List for resultKey " + resKey); - - List entsToReturn = new ArrayList(REPLY_SIZE); - boolean more = false; - int count = 0; - int size = REPLY_SIZE; - /* we have a large number of items to send back so we need to stash the list in the session scope */ - if( entsInVClass.size() > REPLY_SIZE){ - more = true; - ListIterator entsFromVclass = entsInVClass.listIterator(); - while ( entsFromVclass.hasNext() && count <= REPLY_SIZE ){ - entsToReturn.add( entsFromVclass.next()); - entsFromVclass.remove(); - count++; - } - if( log.isDebugEnabled() ) log.debug("getEntitiesByVClassContinuation(): Creating reply with continue token," + - " sending in this reply: " + count +", remaing to send: " + entsInVClass.size() ); - } else { - //send out reply with no continuation - entsToReturn = entsInVClass; - count = entsToReturn.size(); - session.removeAttribute(resKey); - if( log.isDebugEnabled()) log.debug("getEntitiesByVClassContinuation(): sending " + count + " Ind without continue token"); - } - - //put all the entities on the JSON array - JSONArray ja = individualsToJson( entsToReturn ); - - //put the responseGroup number on the end of the JSON array - if( more ){ - try{ - JSONObject obj = new JSONObject(); - obj.put("resultGroup", "true"); - obj.put("size", count); - - StringBuffer nextUrlStr = req.getRequestURL(); - nextUrlStr.append("?") - .append("getEntitiesByVClass").append( "=1&" ) - .append("resultKey=").append( resKey ); - obj.put("nextUrl", nextUrlStr.toString()); - - ja.put(obj); - }catch(JSONException je ){ - throw new ServletException(je.getMessage()); - } - } - resp.setContentType("application/json"); - ServletOutputStream out = resp.getOutputStream(); - out.print( ja.toString() ); - log.debug("done with getEntitiesByVClassContinuation()"); - } - - - - /** - * Gets a list of entities that are members of the indicated vClass. - * - * If the list is large then we will pass some token indicating that there is more - * to come. The results are sent back in 250 entity blocks. To get all of the - * entities for a VClass just keep requesting lists until there are not more - * continue tokens. - * - * If there are more entities the last item on the returned array will be an object - * with no id property. It will look like this: - * - * {"resultGroup":0, - * "resultKey":"2WEK2306", - * "nextUrl":"http://caruso.mannlib.cornell.edu:8080/vitro/dataservice?getEntitiesByVClass=1&resultKey=2WEK2306&resultGroup=1&vclassId=null", - * "entsInVClass":1752, - * "nextResultGroup":1, - * "standardReplySize":256} - * - */ - private void getEntitiesByVClass(HttpServletRequest req, HttpServletResponse resp) - throws ServletException, IOException{ - log.debug("in getEntitiesByVClass()"); - VitroRequest vreq = new VitroRequest(req); - String vclassURI = vreq.getParameter("vclassURI"); - WebappDaoFactory daos = (new VitroRequest(req)).getFullWebappDaoFactory(); - resp.setCharacterEncoding("UTF-8"); - - // ServletOutputStream doesn't support UTF-8 - PrintWriter out = resp.getWriter(); - resp.getWriter(); - - if( vclassURI == null ){ - log.debug("getEntitiesByVClass(): no value for 'vclassURI' found in the HTTP request"); - out.print( (new JSONArray()).toString() ); return; - } - - VClass vclass = daos.getVClassDao().getVClassByURI( vclassURI ); - if( vclass == null ){ - log.debug("getEntitiesByVClass(): could not find vclass for uri '"+ vclassURI + "'"); - out.print( (new JSONArray()).toString() ); return; - } - - List entsInVClass = daos.getIndividualDao().getIndividualsByVClass( vclass ); - if( entsInVClass == null ){ - log.debug("getEntitiesByVClass(): null List retruned by getIndividualsByVClass() for "+vclassURI); - out.print( (new JSONArray().toString() )); return ; - } - int numberOfEntsInVClass = entsInVClass.size(); - - List entsToReturn = new ArrayList( REPLY_SIZE ); - String requestHash = null; - int count = 0; - boolean more = false; - /* we have a large number of items to send back so we need to stash the list in the session scope */ - if( entsInVClass.size() > REPLY_SIZE){ - more = true; - HttpSession session = vreq.getSession(true); - requestHash = Integer.toString((vclassURI + System.currentTimeMillis()).hashCode()); - session.setAttribute(requestHash, entsInVClass ); - - ListIterator entsFromVclass = entsInVClass.listIterator(); - while ( entsFromVclass.hasNext() && count < REPLY_SIZE ){ - entsToReturn.add( entsFromVclass.next()); - entsFromVclass.remove(); - count++; - } - if( log.isDebugEnabled() ){ log.debug("getEntitiesByVClass(): Creating reply with continue token, found " + numberOfEntsInVClass + " Individuals"); } - }else{ - if( log.isDebugEnabled() ) log.debug("getEntitiesByVClass(): sending " + numberOfEntsInVClass +" Individuals without continue token"); - entsToReturn = entsInVClass; - count = entsToReturn.size(); - } - - - //put all the entities on the JSON array - JSONArray ja = individualsToJson( entsToReturn ); - - //put the responseGroup number on the end of the JSON array - if( more ){ - try{ - JSONObject obj = new JSONObject(); - obj.put("resultGroup", "true"); - obj.put("size", count); - obj.put("total", numberOfEntsInVClass); - - StringBuffer nextUrlStr = req.getRequestURL(); - nextUrlStr.append("?") - .append("getEntitiesByVClass").append( "=1&" ) - .append("resultKey=").append( requestHash ); - obj.put("nextUrl", nextUrlStr.toString()); - - ja.put(obj); - }catch(JSONException je ){ - throw new ServletException("unable to create continuation as JSON: " + je.getMessage()); - } - } - - resp.setContentType("application/json"); - out.print( ja.toString() ); - - log.debug("done with getEntitiesByVClass()"); - - } - - /** - * Gets data based on data getter for page uri and returns in the form of Json objects - * @param req - * @param resp - */ - private void getDataForPage(HttpServletRequest req, HttpServletResponse resp) { - VitroRequest vreq = new VitroRequest(req); - String errorMessage = null; - JSONObject rObj = null; - String pageUri = vreq.getParameter("pageUri"); - if(pageUri != null && !pageUri.isEmpty()) { - ServletContext context = getServletContext(); - Map data = DataGetterUtils.getDataForPage(pageUri, vreq, context); - //Convert to json version based on type of page - if(data != null) { - //Convert to json version based on type of page - rObj = DataGetterUtils.covertDataToJSONForPage(pageUri, data, vreq, context); - } - } - - if( rObj == null ) - rObj = new JSONObject(); - //Send object - try{ - resp.setCharacterEncoding("UTF-8"); - resp.setContentType("application/json;charset=UTF-8"); - - if( errorMessage != null ){ - rObj.put("errorMessage", errorMessage); - resp.setStatus(500 /*HttpURLConnection.HTTP_SERVER_ERROR*/); - }else{ - rObj.put("errorMessage", ""); - } - Writer writer = resp.getWriter(); - writer.write(rObj.toString()); - }catch(JSONException jse){ - log.error(jse,jse); - } catch (IOException e) { - log.error(e,e); - } - - } - - private JSONArray individualsToJson(List individuals) throws ServletException { - JSONArray ja = new JSONArray(); - Iterator it = individuals.iterator(); - try{ - while(it.hasNext()){ - Individual ent = (Individual) it.next(); - JSONObject entJ = new JSONObject(); - entJ.put("name", ent.getName()); - entJ.put("URI", ent.getURI()); - ja.put( entJ ); - } - }catch(JSONException ex){ - throw new ServletException("could not convert list of Individuals into JSON: " + ex); - } - - return ja; - } - - - - private static final int REPLY_SIZE = 256; - - private static final Log log = LogFactory.getLog(JSONServlet.class.getName()); -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/freemarker/IndividualListController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/freemarker/IndividualListController.java deleted file mode 100644 index 1d3cc9b28..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/freemarker/IndividualListController.java +++ /dev/null @@ -1,511 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.controller.freemarker; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import javax.servlet.ServletContext; -import javax.servlet.ServletException; - -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrServer; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrDocumentList; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.beans.VClass; -import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup; -import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ExceptionResponseValues; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues; -import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao; -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; -import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; -import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.BaseListedIndividual; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.ListedIndividual; -import freemarker.ext.beans.BeansWrapper; -import freemarker.template.TemplateModel; - -/** - * Generates a list of individuals for display in a template - */ -public class IndividualListController extends FreemarkerHttpServlet { - - private static final long serialVersionUID = 1L; - private static final Log log = LogFactory.getLog(IndividualListController.class.getName()); - - public static final int ENTITY_LIST_CONTROLLER_MAX_RESULTS = 30000; - public static final int INDIVIDUALS_PER_PAGE = 30; - public static final int MAX_PAGES = 40; //must be even - - private static final String TEMPLATE_DEFAULT = "individualList.ftl"; - - @Override - protected ResponseValues processRequest(VitroRequest vreq) { - - String templateName = TEMPLATE_DEFAULT; - Map body = new HashMap(); - String errorMessage = null; - - try { - Object obj = vreq.getAttribute("vclass"); - VClass vclass = null; - if ( obj == null ) { // look for vitroclass id parameter - String vitroClassIdStr = vreq.getParameter("vclassId"); - if ( !StringUtils.isEmpty(vitroClassIdStr)) { - try { - //TODO have to change this so vclass's group and entity count are populated - vclass = vreq.getWebappDaoFactory().getVClassDao().getVClassByURI(vitroClassIdStr); - if (vclass == null) { - log.error("Couldn't retrieve vclass " + vitroClassIdStr); - errorMessage = "Class " + vitroClassIdStr + " not found"; - } - } catch (Exception ex) { - throw new HelpException("IndividualListController: request parameter 'vclassId' must be a URI string."); - } - } - } else if (obj instanceof VClass) { - vclass = (VClass)obj; - } else { - throw new HelpException("IndividualListController: attribute 'vclass' must be of type " - + VClass.class.getName() + "."); - } - - body.put("vclassId", vclass.getURI()); - - if (vclass != null) { - String alpha = getAlphaParameter(vreq); - int page = getPageParameter(vreq); - Map map = getResultsForVClass( - vclass.getURI(), - page, - alpha, - vreq.getWebappDaoFactory().getIndividualDao(), - getServletContext()); - body.putAll(map); - - List inds = (List)map.get("entities"); - List indsTm = new ArrayList(); - for(Individual ind : inds ){ - indsTm.add(new ListedIndividual(ind,vreq)); - } - body.put("individuals", indsTm); - - List wpages = new ArrayList(); - List pages = (List)body.get("pages"); - BeansWrapper wrapper = new BeansWrapper(); - for( PageRecord pr: pages ){ - wpages.add( wrapper.wrap(pr) ); - } - - // Set title and subtitle. Title will be retrieved later in getTitle(). - VClassGroup classGroup = vclass.getGroup(); - String title; - if (classGroup == null) { - title = vclass.getName(); - } else { - title = classGroup.getPublicName(); - body.put("subtitle", vclass.getName()); - } - body.put("title", title); - body.put("rdfUrl", UrlBuilder.getUrl("/listrdf", "vclass", vclass.getURI())); - - } - - } catch (HelpException help){ - errorMessage = "Request attribute 'vclass' or request parameter 'vclassId' must be set before calling. Its value must be a class uri."; - } catch (Throwable e) { - return new ExceptionResponseValues(e); - } - - if (errorMessage != null) { - templateName = Template.ERROR_MESSAGE.toString(); - body.put("errorMessage", errorMessage); - } - - return new TemplateResponseValues(templateName, body); - } - - private class HelpException extends Throwable { - private static final long serialVersionUID = 1L; - - public HelpException(String string) { - super(string); - } - } - - public static String getAlphaParameter(VitroRequest request){ - return request.getParameter("alpha"); - } - - public static int getPageParameter(VitroRequest request) { - String pageStr = request.getParameter("page"); - if( pageStr != null ){ - try{ - return Integer.parseInt(pageStr); - }catch(NumberFormatException nfe){ - log.debug("could not parse page parameter"); - return 1; - } - }else{ - return 1; - } - } - - /** - * This method is now called in a couple of places. It should be refactored - * into a DAO or similar object. - */ - public static Map getResultsForVClass(String vclassURI, int page, String alpha, IndividualDao indDao, ServletContext context) - throws CorruptIndexException, IOException, ServletException{ - Map rvMap = new HashMap(); - try{ - //make lucene query for this rdf:type - List classUris = new ArrayList(); - classUris.add(vclassURI); - Query query = getQuery(classUris, alpha); - rvMap = getResultsForVClassQuery(query, page, alpha, indDao, context); - List individuals = (List) rvMap.get("entities"); - if (individuals == null) - log.debug("entities list is null for vclass " + vclassURI ); - } catch(Throwable th) { - log.error("An error occurred retrieving results for vclass query", th); - } - return rvMap; - } - - /* - * This method includes what was formerly a part of the method above, allowing for refactoring of code - * to use for a different number fo classes - */ - - public static Map getResultsForVClassQuery(Query query, int page, String alpha, IndividualDao indDao, ServletContext context) - throws CorruptIndexException, IOException, ServletException{ - Map rvMap = new HashMap(); - - //execute lucene query for individuals of the specified type - IndexSearcher index = LuceneIndexFactory.getIndexSearcher(context); - TopDocs docs = null; - try{ - docs = index.search(query, null, - ENTITY_LIST_CONTROLLER_MAX_RESULTS, - new Sort(Entity2LuceneDoc.term.NAME_LOWERCASE)); - }catch(Throwable th){ - log.error("Could not run search. " + th.getMessage()); - docs = null; - } - - if( docs == null ) - throw new ServletException("Could not run search in IndividualListController"); - - //get list of individuals for the search results - int size = docs.totalHits; - log.debug("Number of search results: " + size); - - // don't get all the results, only get results for the requestedSize - List individuals = new ArrayList(INDIVIDUALS_PER_PAGE); - int individualsAdded = 0; - int ii = (page-1)*INDIVIDUALS_PER_PAGE; - while( individualsAdded < INDIVIDUALS_PER_PAGE && ii < size ){ - ScoreDoc hit = docs.scoreDocs[ii]; - if (hit != null) { - Document doc = index.doc(hit.doc); - if (doc != null) { - String uri = doc.getField(Entity2LuceneDoc.term.URI).stringValue(); - Individual ind = indDao.getIndividualByURI( uri ); - if( ind != null ){ - individuals.add( ind ); - individualsAdded++; - } - } else { - log.warn("no document found for lucene doc id " + hit.doc); - } - } else { - log.debug("hit was null"); - } - ii++; - } - - rvMap.put("count", size); - - if( size > INDIVIDUALS_PER_PAGE ){ - rvMap.put("showPages", Boolean.TRUE); - List pageRecords = makePagesList(size, INDIVIDUALS_PER_PAGE, page); - rvMap.put("pages", pageRecords); - }else{ - rvMap.put("showPages", Boolean.FALSE); - rvMap.put("pages", Collections.emptyList()); - } - - rvMap.put("alpha",alpha); - - rvMap.put("totalCount", size); - rvMap.put("entities",individuals); - - return rvMap; - } - - - - //Solr based version - public static Map getResultsForVClassQuery(SolrQuery query, int page, String alpha, IndividualDao indDao, ServletContext context) - throws CorruptIndexException, IOException, ServletException{ - Map rvMap = new HashMap(); - //Execute solr query - SolrServer solr = SolrSetup.getSolrServer(context); - QueryResponse response = null; - - try { - response = solr.query(query); - - } catch (Throwable t) { - log.error("in first pass at search: " + t); - // this is a hack to deal with odd cases where search and index threads interact - try{ - //Can't use the method below in a static class? - //wait(150); - response = solr.query(query); - } catch (Exception ex) { - log.error(ex); - //doFailedSearch() - //return doFailedSearch(msg, qtxt, format); - } - } - - - SolrDocumentList docs = response.getResults(); - if( docs == null ) - throw new ServletException("Could not run search in IndividualListController"); - - //get list of individuals for the search results - - int size = new Long(docs.getNumFound()).intValue(); - log.debug("Number of search results: " + size); - - // don't get all the results, only get results for the requestedSize - List individuals = new ArrayList(INDIVIDUALS_PER_PAGE); - int individualsAdded = 0; - int ii = (page-1)*INDIVIDUALS_PER_PAGE; - while( individualsAdded < INDIVIDUALS_PER_PAGE && ii < size ){ - SolrDocument doc = docs.get(ii); - if (doc != null) { - String uri = doc.get(VitroLuceneTermNames.URI).toString(); - log.debug("Retrieving individual with uri "+ uri); - Individual ind = indDao.getIndividualByURI(uri); - if( ind != null ){ - individuals.add( ind ); - individualsAdded++; - } - } else { - log.warn("no document found for lucene doc id " + doc); - } - ii++; - } - - rvMap.put("count", size); - - if( size > INDIVIDUALS_PER_PAGE ){ - rvMap.put("showPages", Boolean.TRUE); - List pageRecords = makePagesList(size, INDIVIDUALS_PER_PAGE, page); - rvMap.put("pages", pageRecords); - }else{ - rvMap.put("showPages", Boolean.FALSE); - rvMap.put("pages", Collections.emptyList()); - } - - rvMap.put("alpha",alpha); - - rvMap.put("totalCount", size); - rvMap.put("entities",individuals); - - return rvMap; - } - - - - public static Map getResultsForVClassIntersections(List vclassURIs, int page, String alpha, IndividualDao indDao, ServletContext context) - throws CorruptIndexException, IOException, ServletException{ - Map rvMap = new HashMap(); - try{ - //make lucene query for multiple rdf types - //change to solr - SolrQuery query = getSolrQuery(vclassURIs, alpha); - //get results corresponding to this query - rvMap = getResultsForVClassQuery(query, page, alpha, indDao, context); - List individuals = (List) rvMap.get("entities"); - if (individuals == null) - log.debug("entities list is null for vclass " + vclassURIs.toString() ); - } catch(Throwable th) { - log.error("Error retrieving individuals corresponding to intersection multiple classes." + vclassURIs.toString(), th); - } - return rvMap; - } - - - /* - * This method creates a query to search for terms with rdf type corresponding to vclass Uri. - * The original version allowed only for one class URI but needed to be extended to enable multiple - * vclass Uris to be passed - */ - - private static BooleanQuery getQuery(ListvclassUris, String alpha){ - BooleanQuery query = new BooleanQuery(); - try{ - //query term for rdf:type - multiple types possible - for(String vclassUri: vclassUris) { - query.add( - new TermQuery( new Term(Entity2LuceneDoc.term.RDFTYPE, vclassUri)), - BooleanClause.Occur.MUST ); - } - //Add alpha filter if it is needed - Query alphaQuery = null; - if( alpha != null && !"".equals(alpha) && alpha.length() == 1){ - alphaQuery = - new PrefixQuery(new Term(Entity2LuceneDoc.term.NAME_LOWERCASE, alpha.toLowerCase())); - query.add(alphaQuery,BooleanClause.Occur.MUST); - } - - log.debug("Query: " + query); - return query; - } catch (Exception ex){ - log.error(ex,ex); - return new BooleanQuery(); - } - } - - //how to extend for solr query - //Alpha handling taken from SolrIndividualListController - private static SolrQuery getSolrQuery(ListvclassUris, String alpha){ - //SolrQuery query = getQuery(qtxt, maxHitCount, vreq); - //SolrServer solr = SolrSetup.getSolrServer(getServletContext()); - SolrQuery query = new SolrQuery(); - - // Solr requires these values, but we don't want them to be the real values for this page - // of results, else the refinement links won't work correctly: each page of results needs to - // show refinement links generated for all results, not just for the results on the current page. - query.setStart(0) - .setRows(1000); - String queryText = ""; - //Query text should be of form: - List queryTypes = new ArrayList(); - try{ - //query term for rdf:type - multiple types possible - for(String vclassUri: vclassUris) { - queryTypes.add(VitroLuceneTermNames.RDFTYPE + ":\"" + vclassUri + "\" "); - - } - - if(queryTypes.size() > 1) { - queryText = StringUtils.join(queryTypes, " AND "); - } else { - if(queryTypes.size() > 0) { - queryText = queryTypes.get(0); - } - } - - // Add alpha filter if it is needed - if ( alpha != null && !"".equals(alpha) && alpha.length() == 1) { - queryText += VitroLuceneTermNames.NAME_LOWERCASE + ":" + alpha.toLowerCase() + "*"; - } - - log.debug("Query text is " + queryText); - query.setQuery(queryText); - log.debug("Query: " + query); - return query; - } catch (Exception ex){ - log.error(ex,ex); - - return new SolrQuery(); - } - - } - - public static List makePagesList( int count, int pageSize, int selectedPage){ - - List records = new ArrayList( MAX_PAGES + 1 ); - int requiredPages = count/pageSize ; - int remainder = count % pageSize ; - if( remainder > 0 ) - requiredPages++; - - if( selectedPage < MAX_PAGES && requiredPages > MAX_PAGES ){ - //the selected pages is within the first maxPages, just show the normal pages up to maxPages. - for(int page = 1; page < requiredPages && page <= MAX_PAGES ; page++ ){ - records.add( new PageRecord( "page=" + page, Integer.toString(page), Integer.toString(page), selectedPage == page ) ); - } - records.add( new PageRecord( "page="+ (MAX_PAGES+1), Integer.toString(MAX_PAGES+1), "more...", false)); - }else if( requiredPages > MAX_PAGES && selectedPage+1 > MAX_PAGES && selectedPage < requiredPages - MAX_PAGES){ - //the selected pages is in the middle of the list of page - int startPage = selectedPage - MAX_PAGES / 2; - int endPage = selectedPage + MAX_PAGES / 2; - for(int page = startPage; page <= endPage ; page++ ){ - records.add( new PageRecord( "page=" + page, Integer.toString(page), Integer.toString(page), selectedPage == page ) ); - } - records.add( new PageRecord( "page="+ endPage+1, Integer.toString(endPage+1), "more...", false)); - }else if ( requiredPages > MAX_PAGES && selectedPage > requiredPages - MAX_PAGES ){ - //the selected page is in the end of the list - int startPage = requiredPages - MAX_PAGES; - double max = Math.ceil(count/pageSize); - for(int page = startPage; page <= max; page++ ){ - records.add( new PageRecord( "page=" + page, Integer.toString(page), Integer.toString(page), selectedPage == page ) ); - } - }else{ - //there are fewer than maxPages pages. - for(int i = 1; i <= requiredPages; i++ ){ - records.add( new PageRecord( "page=" + i, Integer.toString(i), Integer.toString(i), selectedPage == i ) ); - } - } - return records; - } - - public static class PageRecord { - public PageRecord(String param, String index, String text, boolean selected) { - this.param = param; - this.index = index; - this.text = text; - this.selected = selected; - } - public String param; - public String index; - public String text; - public boolean selected=false; - - public String getParam() { - return param; - } - public String getIndex() { - return index; - } - public String getText() { - return text; - } - public boolean getSelected(){ - return selected; - } - } - -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/PageDaoJena.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/PageDaoJena.java index c12587bd9..e3948c1d4 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/PageDaoJena.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/PageDaoJena.java @@ -186,13 +186,21 @@ public class PageDaoJena extends JenaBaseDao implements PageDao { @Override public Map getPageMappings() { Model displayModel = getOntModelSelector().getDisplayModel(); - QueryExecution qexec = QueryExecutionFactory.create( pageQuery, displayModel ); - Map rv = new HashMap(); - ResultSet resultSet = qexec.execSelect(); - while(resultSet.hasNext()){ - QuerySolution soln = resultSet.next(); - rv.put(nodeToString(soln.get("urlMapping")) , nodeToString( soln.get("pageUri") )); + displayModel.enterCriticalSection(false); + try{ + QueryExecution qexec = QueryExecutionFactory.create( pageQuery, displayModel ); + try{ + ResultSet resultSet = qexec.execSelect(); + while(resultSet.hasNext()){ + QuerySolution soln = resultSet.next(); + rv.put(nodeToString(soln.get("urlMapping")) , nodeToString( soln.get("pageUri") )); + } + }finally{ + qexec.close(); + } + }finally{ + displayModel.leaveCriticalSection(); } return rv; } @@ -211,8 +219,11 @@ public class PageDaoJena extends JenaBaseDao implements PageDao { displayModel.enterCriticalSection(false); try{ QueryExecution qexec = QueryExecutionFactory.create(pageQuery,displayModel,initialBindings ); - list = executeQueryToCollection( qexec ); - qexec.close(); + try{ + list = executeQueryToCollection( qexec ); + }finally{ + qexec.close(); + } }finally{ displayModel.leaveCriticalSection(); } @@ -236,12 +247,15 @@ public class PageDaoJena extends JenaBaseDao implements PageDao { displayModel.enterCriticalSection(false); try{ QueryExecution qexec = QueryExecutionFactory.create(pageDataGettersQuery, displayModel, initialBindings); - ResultSet rs = qexec.execSelect(); - while(rs.hasNext()){ - QuerySolution soln = rs.next(); - dataGetters.add( nodeToString( soln.get("dataGetter" ) )); + try{ + ResultSet rs = qexec.execSelect(); + while(rs.hasNext()){ + QuerySolution soln = rs.next(); + dataGetters.add( nodeToString( soln.get("dataGetter" ) )); + } + }finally{ + qexec.close(); } - qexec.close(); }finally{ displayModel.leaveCriticalSection(); } @@ -257,23 +271,31 @@ public class PageDaoJena extends JenaBaseDao implements PageDao { @Override public String getHomePageUri(){ Model displayModel = getOntModelSelector().getDisplayModel(); - QueryExecution qexec = QueryExecutionFactory.create( homePageUriQuery, displayModel ); - List rv = new ArrayList(); - ResultSet resultSet = qexec.execSelect(); - while(resultSet.hasNext()){ - QuerySolution soln = resultSet.next(); - rv.add( nodeToString(soln.get("pageUri")) ); - } - if( rv.size() == 0 ){ - log.error("No display:HomePage defined in display model."); - return null; - } - if( rv.size() > 1 ){ - log.error("More than one display:HomePage defined in display model."); - for( String hp : rv ){ - log.error("home page: " + hp); + displayModel.enterCriticalSection(false); + try{ + QueryExecution qexec = QueryExecutionFactory.create( homePageUriQuery, displayModel ); + try{ + ResultSet resultSet = qexec.execSelect(); + while(resultSet.hasNext()){ + QuerySolution soln = resultSet.next(); + rv.add( nodeToString(soln.get("pageUri")) ); + } + if( rv.size() == 0 ){ + log.error("No display:HomePage defined in display model."); + return null; + } + if( rv.size() > 1 ){ + log.error("More than one display:HomePage defined in display model."); + for( String hp : rv ){ + log.error("home page: " + hp); + } + } + }finally{ + qexec.close(); } + }finally{ + displayModel.leaveCriticalSection(); } return rv.get(0); } @@ -291,22 +313,29 @@ public class PageDaoJena extends JenaBaseDao implements PageDao { initialBindings.add("pageUri", ResourceFactory.createResource(pageUri)); Model displayModel = getOntModelSelector().getDisplayModel(); - QueryExecution qexec = QueryExecutionFactory.create( classGroupPageQuery, displayModel , initialBindings); - - List classGroupsForPage = new ArrayList(); - ResultSet resultSet = qexec.execSelect(); - while(resultSet.hasNext()){ - QuerySolution soln = resultSet.next(); - classGroupsForPage.add( nodeToString(soln.get("classGroup")) ); + try{ + QueryExecution qexec = QueryExecutionFactory.create( classGroupPageQuery, displayModel , initialBindings); + try{ + List classGroupsForPage = new ArrayList(); + ResultSet resultSet = qexec.execSelect(); + while(resultSet.hasNext()){ + QuerySolution soln = resultSet.next(); + classGroupsForPage.add( nodeToString(soln.get("classGroup")) ); + } + if( classGroupsForPage.size() == 0 ){ + log.debug("No classgroup info defined in display model for "+ pageUri); + return null; + } + if( classGroupsForPage.size() > 1 ){ + log.error("More than one display:forClassGroup defined in display model for page " + pageUri); + } + return classGroupsForPage.get(0); + }finally{ + qexec.close(); + } + }finally{ + displayModel.leaveCriticalSection(); } - if( classGroupsForPage.size() == 0 ){ - log.debug("No classgroup info defined in display model for "+ pageUri); - return null; - } - if( classGroupsForPage.size() > 1 ){ - log.error("More than one display:forClassGroup defined in display model for page " + pageUri); - } - return classGroupsForPage.get(0); } /** @@ -318,32 +347,40 @@ public class PageDaoJena extends JenaBaseDao implements PageDao { initialBindings.add("pageUri", ResourceFactory.createResource(pageUri)); Model displayModel = getOntModelSelector().getDisplayModel(); - QueryExecution qexec = QueryExecutionFactory.create( classIntersectionPageQuery, displayModel , initialBindings); - //Assuming unique labels or could use URI itself? - //TODO: Review whether to use labels or URIs - - - ResultSet resultSet = qexec.execSelect(); - while(resultSet.hasNext()){ - QuerySolution soln = resultSet.next(); - //Results format should be ?page hasClassIntersection . intersectsWithClass ?c; intersects With Class ?e. - String intersectionLabel = nodeToString(soln.get("label")); - - //first time encountering label, set up - if(!classIntersectionsMap.containsKey(intersectionLabel)) { - classIntersectionsMap.put(intersectionLabel, new ArrayList()); - } - - List classes = classIntersectionsMap.get(intersectionLabel); - classes.add(nodeToString(soln.get("class"))); - //classIntersections.add( nodeToString(soln.get("classIntersection")) ); + try{ + QueryExecution qexec = QueryExecutionFactory.create( classIntersectionPageQuery, displayModel , initialBindings); + try{ + //Assuming unique labels or could use URI itself? + //TODO: Review whether to use labels or URIs + + + ResultSet resultSet = qexec.execSelect(); + while(resultSet.hasNext()){ + QuerySolution soln = resultSet.next(); + //Results format should be ?page hasClassIntersection . intersectsWithClass ?c; intersects With Class ?e. + String intersectionLabel = nodeToString(soln.get("label")); + + //first time encountering label, set up + if(!classIntersectionsMap.containsKey(intersectionLabel)) { + classIntersectionsMap.put(intersectionLabel, new ArrayList()); + } + + List classes = classIntersectionsMap.get(intersectionLabel); + classes.add(nodeToString(soln.get("class"))); + //classIntersections.add( nodeToString(soln.get("classIntersection")) ); + } + if( classIntersectionsMap.size() == 0 ){ + log.debug("No class intersections info defined in display model for "+ pageUri); + return null; + } + + return classIntersectionsMap; + }finally{ + qexec.close(); + } + }finally{ + displayModel.leaveCriticalSection(); } - if( classIntersectionsMap.size() == 0 ){ - log.debug("No class intersections info defined in display model for "+ pageUri); - return null; - } - - return classIntersectionsMap; } @@ -362,40 +399,50 @@ public class PageDaoJena extends JenaBaseDao implements PageDao { QuerySolutionMap initialBindings = new QuerySolutionMap(); initialBindings.add("pageUri", ResourceFactory.createResource(pageUri)); List classes = new ArrayList(); - Model displayModel = getOntModelSelector().getDisplayModel(); - QueryExecution qexec = QueryExecutionFactory.create( individualsForClassesQuery, displayModel , initialBindings); - HashMap restrictClassesPresentMap = new HashMap(); - List restrictClasses = new ArrayList(); - - ResultSet resultSet = qexec.execSelect(); - while(resultSet.hasNext()){ - QuerySolution soln = resultSet.next(); - String dg = nodeToString(soln.get("dg")); - classes.add(nodeToString(soln.get("class"))); - String restrictClass = nodeToString(soln.get("restrictClass")); - if(!restrictClassesPresentMap.containsKey(restrictClass)) { - restrictClasses.add(restrictClass); - restrictClassesPresentMap.put(restrictClass, "true"); - } - } - if( classes.size() == 0 ){ - log.debug("No classes defined in display model for "+ pageUri); - return null; + Model displayModel = getOntModelSelector().getDisplayModel(); + try{ + QueryExecution qexec = QueryExecutionFactory.create( individualsForClassesQuery, displayModel , initialBindings); + try{ + HashMap restrictClassesPresentMap = new HashMap(); + List restrictClasses = new ArrayList(); + + ResultSet resultSet = qexec.execSelect(); + while(resultSet.hasNext()){ + QuerySolution soln = resultSet.next(); + String dg = nodeToString(soln.get("dg")); + classes.add(nodeToString(soln.get("class"))); + String restrictClass = nodeToString(soln.get("restrictClass")); + if(!restrictClassesPresentMap.containsKey(restrictClass)) { + restrictClasses.add(restrictClass); + restrictClassesPresentMap.put(restrictClass, "true"); + } + } + + if( classes.size() == 0 ){ + log.debug("No classes defined in display model for "+ pageUri); + return null; + } + classesAndRestrictions.put("classes", classes); + classesAndRestrictions.put("restrictClasses", restrictClasses); + return classesAndRestrictions; + }finally{ + qexec.close(); + } + }finally{ + displayModel.leaveCriticalSection(); } - classesAndRestrictions.put("classes", classes); - classesAndRestrictions.put("restrictClasses", restrictClasses); - return classesAndRestrictions; } - /* ****************************************************************************** */ + /* *************************** Utility methods ********************************* */ /** * Converts a sparql query that returns a multiple rows to a list of maps. * The maps will have column names as keys to the values. + * This method will not close qexec. */ protected List> executeQueryToCollection( QueryExecution qexec) { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java deleted file mode 100644 index 92216eb5e..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/AutocompleteController.java +++ /dev/null @@ -1,326 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.controller; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -import javax.servlet.ServletContext; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.Version; -import org.json.JSONArray; -import org.json.JSONObject; - -import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.Actions; -import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.usepages.UseBasicAjaxControllers; -import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; -import edu.cornell.mannlib.vitro.webapp.controller.ajax.VitroAjaxController; -import edu.cornell.mannlib.vitro.webapp.search.SearchException; -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; -import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; - -/** - * AutocompleteController generates autocomplete content - * through a Lucene search. - */ -public class AutocompleteController extends VitroAjaxController { - - private static final long serialVersionUID = 1L; - private static final Log log = LogFactory.getLog(AutocompleteController.class); - - //private static final String TEMPLATE_DEFAULT = "autocompleteResults.ftl"; - - private static String QUERY_PARAMETER_NAME = "term"; - - String NORESULT_MSG = ""; - private int defaultMaxSearchSize= 1000; - - @Override - protected Actions requiredActions(VitroRequest vreq) { - return new Actions(new UseBasicAjaxControllers()); - } - - @Override - protected void doRequest(VitroRequest vreq, HttpServletResponse response) - throws IOException, ServletException { - - try { - - int maxHitSize = defaultMaxSearchSize; - - String qtxt = vreq.getParameter(QUERY_PARAMETER_NAME); - Analyzer analyzer = getAnalyzer(getServletContext()); - - Query query = getQuery(vreq, analyzer, qtxt); - if (query == null ) { - log.debug("query for '" + qtxt +"' is null."); - doNoQuery(response); - return; - } - log.debug("query for '" + qtxt +"' is " + query.toString()); - - IndexSearcher searcherForRequest = LuceneIndexFactory.getIndexSearcher(getServletContext()); - - TopDocs topDocs = null; - try{ - topDocs = searcherForRequest.search(query,null,maxHitSize); - }catch(Throwable t){ - log.error("in first pass at search: " + t); - // this is a hack to deal with odd cases where search and index threads interact - try{ - wait(150); - topDocs = searcherForRequest.search(query,null,maxHitSize); - }catch (Exception e){ - log.error(e, e); - doNoSearchResults(response); - return; - } - } - - if( topDocs == null || topDocs.scoreDocs == null){ - log.error("topDocs for a search was null"); - doNoSearchResults(response); - return; - } - - int hitsLength = topDocs.scoreDocs.length; - if ( hitsLength < 1 ){ - doNoSearchResults(response); - return; - } - log.debug("found "+hitsLength+" hits"); - - List results = new ArrayList(); - for(int i=0; i MAX_QUERY_LENGTH ){ - log.debug("The search was too long. The maximum " + - "query length is " + MAX_QUERY_LENGTH ); - return null; - } - - query = makeNameQuery(querystr, analyzer, vreq); - - // Filter by type - { - BooleanQuery boolQuery = new BooleanQuery(); - String typeParam = (String) vreq.getParameter("type"); - boolQuery.add( new TermQuery( - new Term(VitroLuceneTermNames.RDFTYPE, - typeParam)), - BooleanClause.Occur.MUST); - boolQuery.add(query, BooleanClause.Occur.MUST); - query = boolQuery; - } - - } catch (Exception ex){ - throw new SearchException(ex.getMessage()); - } - - return query; - } - - private Query makeNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { - - String tokenizeParam = (String) request.getParameter("tokenize"); - boolean tokenize = "true".equals(tokenizeParam); - - // Note: Stemming is only relevant if we are tokenizing: an untokenized name - // query will not be stemmed. So we don't look at the stem parameter until we get to - // makeTokenizedNameQuery(). - if (tokenize) { - return makeTokenizedNameQuery(querystr, analyzer, request); - } else { - return makeUntokenizedNameQuery(querystr); - } - } - - private Query makeTokenizedNameQuery(String querystr, Analyzer analyzer, HttpServletRequest request) { - - String stemParam = (String) request.getParameter("stem"); - boolean stem = "true".equals(stemParam); - String termName = stem ? VitroLuceneTermNames.NAME_STEMMED : VitroLuceneTermNames.NAME_UNSTEMMED; - - BooleanQuery boolQuery = new BooleanQuery(); - - // Use the query parser to analyze the search term the same way the indexed text was analyzed. - // For example, text is lowercased, and function words are stripped out. - QueryParser parser = getQueryParser(termName, analyzer); - - // The wildcard query doesn't play well with stemming. Query term name:tales* doesn't match - // "tales", which is indexed as "tale", while query term name:tales does. Obviously we need - // the wildcard for name:tal*, so the only way to get them all to match is use a disjunction - // of wildcard and non-wildcard queries. The query will look have only an implicit disjunction - // operator: e.g., +(name:tales name:tales*) - try { - log.debug("Adding non-wildcard query for " + querystr); - Query query = parser.parse(querystr); - boolQuery.add(query, BooleanClause.Occur.SHOULD); - - // Prevent ParseException here when adding * after a space. - // If there's a space at the end, we don't need the wildcard query. - if (! querystr.endsWith(" ")) { - log.debug("Adding wildcard query for " + querystr); - Query wildcardQuery = parser.parse(querystr + "*"); - boolQuery.add(wildcardQuery, BooleanClause.Occur.SHOULD); - } - - log.debug("Name query is: " + boolQuery.toString()); - } catch (ParseException e) { - log.warn(e, e); - } - - return boolQuery; - } - - private Query makeUntokenizedNameQuery(String querystr) { - - querystr = querystr.toLowerCase(); - String termName = VitroLuceneTermNames.NAME_LOWERCASE; - BooleanQuery query = new BooleanQuery(); - log.debug("Adding wildcard query on unanalyzed name"); - query.add( - new WildcardQuery(new Term(termName, querystr + "*")), - BooleanClause.Occur.MUST); - - return query; - } - - private QueryParser getQueryParser(String searchField, Analyzer analyzer){ - // searchField indicates which field to search against when there is no term - // indicated in the query string. - // The analyzer is needed so that we use the same analyzer on the search queries as - // was used on the text that was indexed. - QueryParser qp = new QueryParser(Version.LUCENE_29, searchField,analyzer); - //this sets the query parser to AND all of the query terms it finds. - qp.setDefaultOperator(QueryParser.AND_OPERATOR); - return qp; - } - - private void doNoQuery(HttpServletResponse response) throws IOException { - // For now, we are not sending an error message back to the client because with the default autocomplete configuration it - // chokes. - doNoSearchResults(response); - } - - private void doSearchError(HttpServletResponse response) throws IOException { - // For now, we are not sending an error message back to the client because with the default autocomplete configuration it - // chokes. - doNoSearchResults(response); - } - - private void doNoSearchResults(HttpServletResponse response) throws IOException { - response.getWriter().write("[]"); - } - - public static final int MAX_QUERY_LENGTH = 500; - - public class SearchResult implements Comparable { - private String label; - private String uri; - - SearchResult(String label, String uri) { - this.label = label; - this.uri = uri; - } - - public String getLabel() { - return label; - } - - public String getJsonLabel() { - return JSONObject.quote(label); - } - - public String getUri() { - return uri; - } - - public String getJsonUri() { - return JSONObject.quote(uri); - } - - Map toMap() { - Map map = new HashMap(); - map.put("label", label); - map.put("uri", uri); - return map; - } - - public int compareTo(Object o) throws ClassCastException { - if ( !(o instanceof SearchResult) ) { - throw new ClassCastException("Error in SearchResult.compareTo(): expected SearchResult object."); - } - SearchResult sr = (SearchResult) o; - return label.compareToIgnoreCase(sr.getLabel()); - } - } - -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java deleted file mode 100644 index a7155b828..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/PagedSearchController.java +++ /dev/null @@ -1,868 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.controller; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import javax.servlet.ServletContext; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; - -import org.apache.commons.lang.StringUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.Term; -import org.apache.lucene.queryParser.MultiFieldQueryParser; -import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.util.Version; - -import edu.cornell.mannlib.vitro.webapp.beans.ApplicationBean; -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl; -import edu.cornell.mannlib.vitro.webapp.beans.VClass; -import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup; -import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.FreemarkerHttpServlet; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder.ParamMap; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ExceptionResponseValues; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues; -import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao; -import edu.cornell.mannlib.vitro.webapp.dao.VClassDao; -import edu.cornell.mannlib.vitro.webapp.dao.VClassGroupDao; -import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; -import edu.cornell.mannlib.vitro.webapp.search.SearchException; -import edu.cornell.mannlib.vitro.webapp.search.beans.Searcher; -import edu.cornell.mannlib.vitro.webapp.search.beans.VitroHighlighter; -import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery; -import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory; -import edu.cornell.mannlib.vitro.webapp.search.lucene.CustomSimilarity; -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames; -import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.LinkTemplateModel; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.BaseListedIndividual; -import freemarker.template.Configuration; - -/** - * PagedSearchController is the new search controller that interacts - * directly with the lucene API and returns paged, relevance ranked results. - * - * @author bdc34 - * - * Rewritten to use Freemarker: rjy7 - * - */ -public class PagedSearchController extends FreemarkerHttpServlet implements Searcher { - - private static final long serialVersionUID = 1L; - private static final Log log = LogFactory.getLog(PagedSearchController.class.getName()); - private static final String XML_REQUEST_PARAM = "xml"; - - private IndexSearcher searcher = null; - private int defaultHitsPerPage = 25; - private int defaultMaxSearchSize= 1000; - - protected static final Map> templateTable; - private static final float QUERY_BOOST = 2.0F; - - protected enum Format{ - HTML, XML; - } - - protected enum Result{ - PAGED, FORM, ERROR, BAD_QUERY - } - - static{ - templateTable = setupTemplateTable(); - } - - /** - * Overriding doGet from FreemarkerHttpController to do a page template (as - * opposed to body template) style output for XML requests. - * - * This follows the pattern in AutocompleteController.java. - */ - @Override - public void doGet(HttpServletRequest request, HttpServletResponse response) - throws IOException, ServletException { - boolean wasXmlRequested = isRequestedFormatXml(request); - if( ! wasXmlRequested ){ - super.doGet(request,response); - }else{ - try { - VitroRequest vreq = new VitroRequest(request); - Configuration config = getConfig(vreq); - ResponseValues rvalues = processRequest(vreq); - - response.setCharacterEncoding("UTF-8"); - response.setContentType("text/xml;charset=UTF-8"); - writeTemplate(rvalues.getTemplateName(), rvalues.getMap(), config, request, response); - } catch (Exception e) { - log.error(e, e); - } - } - } - - @Override - protected ResponseValues processRequest(VitroRequest vreq) { - - log.debug("All parameters present in the request: "+ vreq.getParameterMap().toString()); - - //There may be other non-html formats in the future - Format format = getFormat(vreq); - boolean wasXmlRequested = Format.XML == format; - log.debug("Requested format was " + (wasXmlRequested ? "xml" : "html")); - boolean wasHtmlRequested = ! wasXmlRequested; - - try { - ApplicationBean appBean = vreq.getAppBean(); - - //make sure an IndividualDao is available - if( vreq.getWebappDaoFactory() == null - || vreq.getWebappDaoFactory().getIndividualDao() == null ){ - log.error("Could not get webappDaoFactory or IndividualDao"); - throw new Exception("Could not access model."); - } - IndividualDao iDao = vreq.getWebappDaoFactory().getIndividualDao(); - VClassGroupDao grpDao = vreq.getWebappDaoFactory().getVClassGroupDao(); - VClassDao vclassDao = vreq.getWebappDaoFactory().getVClassDao(); - String alphaFilter = vreq.getParameter("alpha"); - - - log.debug("IndividualDao is " + iDao.toString() + " Public classes in the classgroup are " + grpDao.getPublicGroupsWithVClasses().toString()); - log.debug("VClassDao is "+ vclassDao.toString() ); - - int startIndex = 0; - try{ - startIndex = Integer.parseInt(vreq.getParameter("startIndex")); - }catch (Throwable e) { - startIndex = 0; - } - log.debug("startIndex is " + startIndex); - - int hitsPerPage = defaultHitsPerPage; - try{ - hitsPerPage = Integer.parseInt(vreq.getParameter("hitsPerPage")); - } catch (Throwable e) { - hitsPerPage = defaultHitsPerPage; - } - log.debug("hitsPerPage is " + hitsPerPage); - - int maxHitSize = defaultMaxSearchSize; - if( startIndex >= defaultMaxSearchSize - hitsPerPage ) - maxHitSize = startIndex + defaultMaxSearchSize; - if( alphaFilter != null ){ - maxHitSize = maxHitSize * 2; - hitsPerPage = maxHitSize; - } - log.debug("maxHitSize is " + maxHitSize); - - String qtxt = vreq.getParameter(VitroQuery.QUERY_PARAMETER_NAME); - Analyzer analyzer = getAnalyzer(getServletContext()); - - log.debug("Query text is "+ qtxt + " Analyzer is "+ analyzer.toString()); - - Query query = null; - try { - query = getQuery(vreq, analyzer, qtxt); - log.debug("query for '" + qtxt +"' is " + query.toString()); - } catch (ParseException e) { - return doBadQuery(appBean, qtxt,format); - } - - IndexSearcher searcherForRequest = LuceneIndexFactory.getIndexSearcher(getServletContext()); - - /* using the CustomSimilarity to override effects such as - * 1) rarity of a term doesn't affect the document score. - * 2) number of instances of a query term in the matched document doesn't affect the document score - * 3) field length doesn't affect the document score - * - * 3/29/2011 bk392 - */ - CustomSimilarity customSimilarity = new CustomSimilarity(); - searcherForRequest.setSimilarity(customSimilarity); - - TopDocs topDocs = null; - try{ - log.debug("Searching for query term in the Index with maxHitSize "+ maxHitSize); - log.debug("Query is "+ query.toString()); - - //sets the query boost for the query. the lucene docs matching this query term - //are multiplied by QUERY_BOOST to get their total score - //query.setBoost(QUERY_BOOST); - - topDocs = searcherForRequest.search(query,null,maxHitSize); - - log.debug("Total hits for the query are "+ topDocs.totalHits); - for(ScoreDoc scoreDoc : topDocs.scoreDocs){ - - Document document = searcherForRequest.doc(scoreDoc.doc); - Explanation explanation = searcherForRequest.explain(query, scoreDoc.doc); - - log.debug("Document title: "+ document.get(Entity2LuceneDoc.VitroLuceneTermNames.NAME_STEMMED) + " score: " +scoreDoc.score); - log.debug("Scoring of the doc explained " + explanation.toString()); - log.debug("Explanation's description "+ explanation.getDescription()); - log.debug("ALLTEXT: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXT)); - log.debug("ALLTEXTUNSTEMMED: " + document.get(Entity2LuceneDoc.VitroLuceneTermNames.ALLTEXTUNSTEMMED)); - - - } - - }catch(Throwable t){ - log.error("in first pass at search: " + t); - // this is a hack to deal with odd cases where search and index threads interact - try{ - wait(150); - topDocs = searcherForRequest.search(query,null,maxHitSize); - }catch (Exception ex){ - log.error(ex); - String msg = makeBadSearchMessage(qtxt,ex.getMessage()); - if (msg == null) { - msg = "The search request contained errors."; - } - return doFailedSearch(msg, qtxt,format); - } - } - - if( topDocs == null || topDocs.scoreDocs == null){ - log.error("topDocs for a search was null"); - String msg = "The search request contained errors."; - return doFailedSearch(msg, qtxt,format); - } - - - int hitsLength = topDocs.scoreDocs.length; - log.debug("No. of hits "+ hitsLength); - if ( hitsLength < 1 ){ - return doNoHits(qtxt,format); - } - log.debug("found "+hitsLength+" hits"); - - int lastHitToShow = 0; - if((startIndex + hitsPerPage) > hitsLength ) { - lastHitToShow = hitsLength; - } else { - lastHitToShow = startIndex + hitsPerPage - 1; - } - - List beans = new LinkedList(); - for(int i=startIndex; i= startIndex) && (i <= lastHitToShow) ){ - Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc); - String uri = doc.get(Entity2LuceneDoc.term.URI); - log.debug("Retrieving entity with uri "+ uri); - Individual ent = new IndividualImpl(); - ent.setURI(uri); - ent = iDao.getIndividualByURI(uri); - if(ent!=null) - beans.add(ent); - } - }catch(Exception e){ - log.error("problem getting usable Individuals from search " + - "hits" + e.getMessage()); - } - } - - ParamMap pagingLinkParams = new ParamMap(); - pagingLinkParams.put("querytext", qtxt); - pagingLinkParams.put("hitsPerPage", String.valueOf(hitsPerPage)); - - if( wasXmlRequested ){ - pagingLinkParams.put(XML_REQUEST_PARAM,"1"); - } - - /* Start putting together the data for the templates */ - - Map body = new HashMap(); - - String classGroupParam = vreq.getParameter("classgroup"); - boolean classGroupFilterRequested = false; - if (!StringUtils.isEmpty(classGroupParam)) { - VClassGroup grp = grpDao.getGroupByURI(classGroupParam); - classGroupFilterRequested = true; - if (grp != null && grp.getPublicName() != null) - body.put("classGroupName", grp.getPublicName()); - } - - String typeParam = vreq.getParameter("type"); - boolean typeFiltereRequested = false; - if (!StringUtils.isEmpty(typeParam)) { - VClass type = vclassDao.getVClassByURI(typeParam); - typeFiltereRequested = true; - if (type != null && type.getName() != null) - body.put("typeName", type.getName()); - } - - /* Add classgroup and type refinement links to body */ - if( wasHtmlRequested ){ - // Search request includes no classgroup and no type, so add classgroup search refinement links. - if ( !classGroupFilterRequested && !typeFiltereRequested ) { - List classgroups = getClassGroups(grpDao, topDocs, searcherForRequest); - List classGroupLinks = new ArrayList(classgroups.size()); - for (VClassGroup vcg : classgroups) { - if (vcg.getPublicName() != null) { - classGroupLinks.add(new VClassGroupSearchLink(qtxt, vcg)); - } - } - body.put("classGroupLinks", classGroupLinks); - - // Search request is for a classgroup, so add rdf:type search refinement links - // but try to filter out classes that are subclasses - } else if ( classGroupFilterRequested && !typeFiltereRequested ) { - List vClasses = getVClasses(vclassDao,topDocs,searcherForRequest); - List vClassLinks = new ArrayList(vClasses.size()); - for (VClass vc : vClasses) { - vClassLinks.add(new VClassSearchLink(qtxt, vc)); - } - body.put("classLinks", vClassLinks); - pagingLinkParams.put("classgroup", classGroupParam); - - // This case is never displayed - } else if (!StringUtils.isEmpty(alphaFilter)) { - body.put("alphas", getAlphas(topDocs, searcherForRequest)); - alphaSortIndividuals(beans); - } else { - pagingLinkParams.put("type", typeParam); - } - } - - // Convert search result individuals to template model objects - body.put("individuals", BaseListedIndividual - .getIndividualTemplateModels(beans, vreq)); - - body.put("querytext", qtxt); - body.put("title", qtxt + " - " + appBean.getApplicationName() - + " Search Results"); - - body.put("hitCount",hitsLength); - body.put("startIndex", startIndex); - - body.put("pagingLinks", getPagingLinks(startIndex, hitsPerPage, - hitsLength, maxHitSize, vreq.getServletPath(), - pagingLinkParams)); - - if (startIndex != 0) { - body.put("prevPage", getPreviousPageLink(startIndex, - hitsPerPage, vreq.getServletPath(), pagingLinkParams)); - } - if (startIndex < (hitsLength - hitsPerPage)) { - body.put("nextPage", getNextPageLink(startIndex, hitsPerPage, - vreq.getServletPath(), pagingLinkParams)); - } - - String template = templateTable.get(format).get(Result.PAGED); - - return new TemplateResponseValues(template, body); - } catch (Throwable e) { - return doSearchError(e,format); - } - } - - private void alphaSortIndividuals(List beans) { - Collections.sort(beans, new Comparator< Individual >(){ - public int compare(Individual o1, Individual o2) { - if( o1 == null || o1.getName() == null ) - return 1; - else - return o1.getName().compareTo(o2.getName()); - }}); - } - - private List getAlphas(TopDocs topDocs, IndexSearcher searcher) { - Set alphas = new HashSet(); - for(int i=0;i 0) - alphas.add( name.substring(0, 1)); - } catch (CorruptIndexException e) { - log.debug("Could not get alphas for document",e); - } catch (IOException e) { - log.debug("Could not get alphas for document",e); - } - - } - return new ArrayList(alphas); - } - - /** - * Get the class groups represented for the individuals in the topDocs. - */ - private List getClassGroups(VClassGroupDao grpDao, TopDocs topDocs, - IndexSearcher searcherForRequest) { - LinkedHashMap grpMap = grpDao.getClassGroupMap(); - int n = grpMap.size(); - - HashSet classGroupsInHits = new HashSet(n); - int grpsFound = 0; - - for(int i=0; i grpsFound ;i++){ - try{ - Document doc = searcherForRequest.doc(topDocs.scoreDocs[i].doc); - Field[] grps = doc.getFields(Entity2LuceneDoc.term.CLASSGROUP_URI); - if(grps != null || grps.length > 0){ - for(int j=0;j= n ) - break; - } - } - } - }catch(Exception e){ - log.error("problem getting VClassGroups from search hits " - + e.getMessage()); - } - } - - List classgroupURIs= Collections.list(Collections.enumeration(classGroupsInHits)); - List classgroups = new ArrayList( classgroupURIs.size() ); - for(String cgUri: classgroupURIs){ - if( cgUri != null && ! "".equals(cgUri) ){ - VClassGroup vcg = grpDao.getGroupByURI( cgUri ); - if( vcg == null ){ - log.debug("could not get classgroup for URI " + cgUri); - }else{ - classgroups.add(vcg); - } - } - } - grpDao.sortGroupList(classgroups); - - return classgroups; - } - - private class VClassGroupSearchLink extends LinkTemplateModel { - - VClassGroupSearchLink(String querytext, VClassGroup classgroup) { - super(classgroup.getPublicName(), "/search", "querytext", querytext, "classgroup", classgroup.getURI()); - } - } - - private class VClassSearchLink extends LinkTemplateModel { - - VClassSearchLink(String querytext, VClass type) { - super(type.getName(), "/search", "querytext", querytext, "type", type.getURI()); - } - } - - private List getPagingLinks(int startIndex, int hitsPerPage, int hitsLength, int maxHitSize, String baseUrl, ParamMap params) { - - List pagingLinks = new ArrayList(); - - // No paging links if only one page of results - if (hitsLength <= hitsPerPage) { - return pagingLinks; - } - - for (int i = 0; i < hitsLength; i += hitsPerPage) { - params.put("startIndex", String.valueOf(i)); - if ( i < maxHitSize - hitsPerPage) { - int pageNumber = i/hitsPerPage + 1; - if (i >= startIndex && i < (startIndex + hitsPerPage)) { - pagingLinks.add(new PagingLink(pageNumber)); - } else { - pagingLinks.add(new PagingLink(pageNumber, baseUrl, params)); - } - } else { - pagingLinks.add(new PagingLink("more...", baseUrl, params)); - } - } - - return pagingLinks; - } - - private String getPreviousPageLink(int startIndex, int hitsPerPage, String baseUrl, ParamMap params) { - params.put("startIndex", String.valueOf(startIndex-hitsPerPage)); - //return new PagingLink("Previous", baseUrl, params); - return UrlBuilder.getUrl(baseUrl, params); - } - - private String getNextPageLink(int startIndex, int hitsPerPage, String baseUrl, ParamMap params) { - params.put("startIndex", String.valueOf(startIndex+hitsPerPage)); - //return new PagingLink("Next", baseUrl, params); - return UrlBuilder.getUrl(baseUrl, params); - } - - private class PagingLink extends LinkTemplateModel { - - PagingLink(int pageNumber, String baseUrl, ParamMap params) { - super(String.valueOf(pageNumber), baseUrl, params); - } - - // Constructor for current page item: not a link, so no url value. - PagingLink(int pageNumber) { - setText(String.valueOf(pageNumber)); - } - - // Constructor for "more..." item - PagingLink(String text, String baseUrl, ParamMap params) { - super(text, baseUrl, params); - } - } - - private List getVClasses(VClassDao vclassDao, TopDocs topDocs, - IndexSearcher searherForRequest){ - HashSet typesInHits = getVClassUrisForHits(topDocs,searherForRequest); - List classes = new ArrayList(typesInHits.size()); - - Iterator it = typesInHits.iterator(); - while(it.hasNext()){ - String typeUri = it.next(); - try{ - if( VitroVocabulary.OWL_THING.equals(typeUri)) - continue; - VClass type = vclassDao.getVClassByURI(typeUri); - if( ! type.isAnonymous() && - type.getName() != null && !"".equals(type.getName()) && - type.getGroupURI() != null ) //don't display classes that aren't in classgroups - classes.add(type); - }catch(Exception ex){ - if( log.isDebugEnabled() ) - log.debug("could not add type " + typeUri, ex); - } - } - Collections.sort(classes, new Comparator(){ - public int compare(VClass o1, VClass o2) { - return o1.compareTo(o2); - }}); - return classes; - } - - private HashSet getVClassUrisForHits(TopDocs topDocs, - IndexSearcher searcherForRequest){ - HashSet typesInHits = new HashSet(); - for(int i=0; i MAX_QUERY_LENGTH ){ - log.debug("The search was too long. The maximum " + - "query length is " + MAX_QUERY_LENGTH ); - return null; - } - - log.debug("Parsing query using QueryParser "); - - QueryParser parser = getQueryParser(analyzer); - query = parser.parse(querystr); - - String alpha = request.getParameter("alpha"); - - if( alpha != null && !"".equals(alpha) && alpha.length() == 1){ - - log.debug("Firing alpha query "); - log.debug("request.getParameter(alpha) is " + alpha); - - BooleanQuery boolQuery = new BooleanQuery(); - boolQuery.add( query, BooleanClause.Occur.MUST ); - boolQuery.add( - new WildcardQuery(new Term(Entity2LuceneDoc.term.NAME_STEMMED, alpha+'*')), - BooleanClause.Occur.MUST); - query = boolQuery; - } - - //check if this is classgroup filtered - Object param = request.getParameter("classgroup"); - if( param != null && !"".equals(param)){ - - log.debug("Firing classgroup query "); - log.debug("request.getParameter(classgroup) is "+ param.toString()); - - BooleanQuery boolQuery = new BooleanQuery(); - boolQuery.add( query, BooleanClause.Occur.MUST); - boolQuery.add( new TermQuery( - new Term(Entity2LuceneDoc.term.CLASSGROUP_URI, - (String)param)), - BooleanClause.Occur.MUST); - query = boolQuery; - } - - //check if this is rdf:type filtered - param = request.getParameter("type"); - if( param != null && !"".equals(param)){ - log.debug("Firing type query "); - log.debug("request.getParameter(type) is "+ param.toString()); - - BooleanQuery boolQuery = new BooleanQuery(); - boolQuery.add( query, BooleanClause.Occur.MUST); - boolQuery.add( new TermQuery( - new Term(Entity2LuceneDoc.term.RDFTYPE, - (String)param)), - BooleanClause.Occur.MUST); - query = boolQuery; - } - - log.debug("Query: " + query); - - } catch (ParseException e) { - throw new ParseException(e.getMessage()); - } catch (Exception ex){ - throw new SearchException(ex.getMessage()); - } - - return query; - } - - @SuppressWarnings("static-access") - private QueryParser getQueryParser(Analyzer analyzer){ - //defaultSearchField indicates which field search against when there is no term - //indicated in the query string. - //The analyzer is needed so that we use the same analyzer on the search queries as - //was used on the text that was indexed. - //QueryParser qp = new QueryParser("NAME",analyzer); - //this sets the query parser to AND all of the query terms it finds. - //set up the map of stemmed field names -> unstemmed field names -// HashMap map = new HashMap(); -// map.put(Entity2LuceneDoc.term.ALLTEXT,Entity2LuceneDoc.term.ALLTEXTUNSTEMMED); -// qp.setStemmedToUnstemmed(map); - - MultiFieldQueryParser qp = new MultiFieldQueryParser(Version.LUCENE_29, new String[]{ - VitroLuceneTermNames.NAME_STEMMED, - VitroLuceneTermNames.NAME_UNSTEMMED, - VitroLuceneTermNames.RDFTYPE, - VitroLuceneTermNames.ALLTEXT, - VitroLuceneTermNames.ALLTEXTUNSTEMMED, - VitroLuceneTermNames.NAME_LOWERCASE, - VitroLuceneTermNames.CLASSLOCALNAME, - VitroLuceneTermNames.CLASSLOCALNAMELOWERCASE }, analyzer); - - // QueryParser qp = new QueryParser(Version.LUCENE_29, "name", analyzer); - - //AND_OPERATOR returns documents even if the terms in the query lie in different fields. - //The only requirement is that they exist in a single document. - //qp.setDefaultOperator(QueryParser.AND_OPERATOR); - - - return qp; - } - - private ExceptionResponseValues doSearchError(Throwable e, Format f) { - Map body = new HashMap(); - body.put("message", "Search failed: " + e.getMessage()); - return new ExceptionResponseValues(getTemplate(f,Result.ERROR), body, e); - } - - private TemplateResponseValues doBadQuery(ApplicationBean appBean, String query, Format f) { - Map body = new HashMap(); - body.put("title", "Search " + appBean.getApplicationName()); - body.put("query", query); - return new TemplateResponseValues(getTemplate(f,Result.BAD_QUERY), body); - } - - private TemplateResponseValues doFailedSearch(String message, String querytext, Format f) { - Map body = new HashMap(); - body.put("title", "Search for '" + querytext + "'"); - if ( StringUtils.isEmpty(message) ) { - message = "Search failed."; - } - body.put("message", message); - return new TemplateResponseValues(getTemplate(f,Result.ERROR), body); - } - - private TemplateResponseValues doNoHits(String querytext, Format f) { - Map body = new HashMap(); - body.put("title", "Search for '" + querytext + "'"); - body.put("message", "No matching results."); - return new TemplateResponseValues(getTemplate(f,Result.ERROR), body); - } - - /** - * Makes a message to display to user for a bad search term. - * @param query - * @param exceptionMsg - */ - private String makeBadSearchMessage(String querytext, String exceptionMsg){ - String rv = ""; - try{ - //try to get the column in the search term that is causing the problems - int coli = exceptionMsg.indexOf("column"); - if( coli == -1) return ""; - int numi = exceptionMsg.indexOf(".", coli+7); - if( numi == -1 ) return ""; - String part = exceptionMsg.substring(coli+7,numi ); - int i = Integer.parseInt(part) - 1; - - // figure out where to cut preview and post-view - int errorWindow = 5; - int pre = i - errorWindow; - if (pre < 0) - pre = 0; - int post = i + errorWindow; - if (post > querytext.length()) - post = querytext.length(); - // log.warn("pre: " + pre + " post: " + post + " term len: - // " + term.length()); - - // get part of the search term before the error and after - String before = querytext.substring(pre, i); - String after = ""; - if (post > i) - after = querytext.substring(i + 1, post); - - rv = "The search term had an error near " - + before + "" + querytext.charAt(i) - + "" + after + ""; - } catch (Throwable ex) { - return ""; - } - return rv; - } - - @SuppressWarnings("unchecked") - private HashSet getDataPropertyBlacklist(){ -// HashSetdpBlacklist = (HashSet) -// getServletContext().getAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST); -// return dpBlacklist; - return null; - } - - @SuppressWarnings("unchecked") - private HashSet getObjectPropertyBlacklist(){ -// HashSetopBlacklist = (HashSet) -// getServletContext().getAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST); -// return opBlacklist; - return null; - } - - - private final String defaultSearchField = "ALLTEXT"; - public static final int MAX_QUERY_LENGTH = 500; - - - /** - * Need to accept notification from indexer that the index has been changed. - */ - public void close() { - searcher = null; - } - - public VitroHighlighter getHighlighter(VitroQuery q) { - throw new Error("PagedSearchController.getHighlighter() is unimplemented"); - } - - public VitroQueryFactory getQueryFactory() { - throw new Error("PagedSearchController.getQueryFactory() is unimplemented"); - } - - public List search(VitroQuery query) throws SearchException { - throw new Error("PagedSearchController.search() is unimplemented"); - } - - protected boolean isRequestedFormatXml(HttpServletRequest req){ - if( req != null ){ - String param = req.getParameter(XML_REQUEST_PARAM); - if( param != null && "1".equals(param)){ - return true; - }else{ - return false; - } - }else{ - return false; - } - } - - protected Format getFormat(HttpServletRequest req){ - if( req != null && req.getParameter("xml") != null && "1".equals(req.getParameter("xml"))) - return Format.XML; - else - return Format.HTML; - } - - protected static String getTemplate(Format format, Result result){ - if( format != null && result != null) - return templateTable.get(format).get(result); - else{ - log.error("getTemplate() must not have a null format or result."); - return templateTable.get(Format.HTML).get(Result.ERROR); - } - } - - protected static Map> setupTemplateTable(){ - Map> templateTable = - new HashMap>(); - - HashMap resultsToTemplates = new HashMap(); - - //setup HTML format - resultsToTemplates.put(Result.PAGED, "search-pagedResults.ftl"); - resultsToTemplates.put(Result.FORM, "search-form.ftl"); - resultsToTemplates.put(Result.ERROR, "search-error.ftl"); - resultsToTemplates.put(Result.BAD_QUERY, "search-badQuery.ftl"); - templateTable.put(Format.HTML, Collections.unmodifiableMap(resultsToTemplates)); - - //setup XML format - resultsToTemplates = new HashMap(); - resultsToTemplates.put(Result.PAGED, "search-xmlResults.ftl"); - resultsToTemplates.put(Result.FORM, "search-xmlForm.ftl"); - resultsToTemplates.put(Result.ERROR, "search-xmlError.ftl"); - resultsToTemplates.put(Result.BAD_QUERY, "search-xmlBadQuery.ftl"); - templateTable.put(Format.XML, Collections.unmodifiableMap(resultsToTemplates)); - - return Collections.unmodifiableMap(templateTable); - } -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodes.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodes.java new file mode 100644 index 000000000..75eca90f8 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodes.java @@ -0,0 +1,22 @@ +package edu.cornell.mannlib.vitro.webapp.search.indexing; + +import java.util.Collections; +import java.util.List; + +import com.hp.hpl.jena.ontology.OntModel; + +public class AdditionalURIsForContextNodes implements AdditionalURIsToIndex { + + private OntModel model; + + public AdditionalURIsForContextNodes( OntModel jenaOntModel){ + this.model = jenaOntModel; + } + + @Override + public List findAdditionalURIsToIndex(String uri) { + // TODO Auto-generated method stub + return Collections.emptyList(); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsToIndex.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsToIndex.java new file mode 100644 index 000000000..fdb8d2dd3 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsToIndex.java @@ -0,0 +1,11 @@ +package edu.cornell.mannlib.vitro.webapp.search.indexing; + +import java.util.List; + +/** + * Interface to use with IndexBuilder to find more URIs to index given a URI. + * + */ +public interface AdditionalURIsToIndex { + List findAdditionalURIsToIndex(String uri); +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java index 1dd1d913e..9deef835d 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java @@ -47,6 +47,7 @@ public class IndexBuilder extends Thread { protected long reindexInterval = 1000 * 60 /* msec */ ; protected int numberOfThreads = 10; + protected List additionalURIsFinders; public static final boolean UPDATE_DOCS = false; public static final boolean NEW_DOCS = true; @@ -56,19 +57,20 @@ public class IndexBuilder extends Thread { public IndexBuilder( ServletContext context, IndexerIface indexer, - WebappDaoFactory wdf){ + WebappDaoFactory wdf, + List additionalURIsFinders){ super("IndexBuilder"); this.indexer = indexer; this.wdf = wdf; this.context = context; - + this.additionalURIsFinders = additionalURIsFinders; this.changedUris = new HashSet(); this.start(); } protected IndexBuilder(){ //for testing only - this( null, null, null); + this( null, null, null, null); } public void setWdf(WebappDaoFactory wdf){ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexFactory.java deleted file mode 100644 index e5a2314e9..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexFactory.java +++ /dev/null @@ -1,126 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.lucene; - -import java.io.File; -import java.io.IOException; -import java.util.List; - -import javax.servlet.ServletContext; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; - -import edu.cornell.mannlib.vitro.webapp.search.SearchException; - -public class LuceneIndexFactory { - - IndexSearcher searcher = null; - String baseIndexDirName = null; - - private static final Log log = LogFactory.getLog(LuceneIndexFactory.class.getName()); - - public static final String LUCENE_INDEX_FACTORY= "LuceneIndexFactory"; - - public LuceneIndexFactory(String baseIndexDirName){ - this.baseIndexDirName = baseIndexDirName; - } - - /** - * Get a lucene IndexSearch. This may return null. - */ - public static IndexSearcher getIndexSearcher( ServletContext context){ - return getLuceneIndexFactoryFromContext(context).innerGetIndexSearcher(context); - } - - protected static LuceneIndexFactory getLuceneIndexFactoryFromContext(ServletContext context){ - Object obj = context.getAttribute(LUCENE_INDEX_FACTORY); - if( obj == null ){ - log.error("cannot get LuceneIndexFactory from context. Search is not setup correctly"); - return null; - } - if( ! (obj instanceof LuceneIndexFactory)){ - log.error("LuceneIndexFactory in context was not of correct type. Expected " + LuceneIndexFactory.class.getName() - + " found " + obj.getClass().getName() + " Search is not setup correctly"); - return null; - } - return (LuceneIndexFactory)obj; - } - - - public static LuceneIndexFactory setup(ServletContext context, String baseIndexDirName){ - LuceneIndexFactory lif = (LuceneIndexFactory)context.getAttribute(LuceneIndexFactory.LUCENE_INDEX_FACTORY); - if( lif == null ){ - lif = new LuceneIndexFactory(baseIndexDirName); - context.setAttribute(LuceneIndexFactory.LUCENE_INDEX_FACTORY, lif); - } - return lif; - } - - /** - * This method can be used to force the LuceneIndexFactory to return a new IndexSearcher. - * This will force a re-opening of the search index. - * - * This could be useful if the index was rebult in a different directory on the file system. - */ - public synchronized void forceNewIndexSearcher(){ - log.debug("forcing the re-opening of the search index"); - IndexSearcher oldSearcher = searcher; - - - searcher = null; - } - - protected synchronized void forceClose(){ - log.debug("forcing the closing of the search index"); - try { - if( searcher != null ) - searcher.close(); - } catch (IOException e) { - log.error("could not close lucene searcher: " + e.getMessage()); - } - searcher = null; - } - - private synchronized IndexSearcher innerGetIndexSearcher(ServletContext context) { - if (searcher == null ) { - String liveDir = getLiveIndexDir( context ); - if( liveDir != null ){ - try { - Directory fsDir = FSDirectory.getDirectory(liveDir); - searcher = new IndexSearcher(fsDir); - } catch (IOException e) { - String base = getBaseIndexDir(); - log.error("could not make IndexSearcher " + e); - log.error("It is likely that you have not made a directory for the lucene index. " - + "Create the directory " + base + " and set permissions/ownership so" - + " that the tomcat process can read and write to it."); - } - }else{ - log.error("Could not create IndexSearcher because index directory was null. It may be that the LucenSetup.indexDir is " + - " not set in your deploy.properties file."); - } - } - return searcher; - } - - protected String getBaseIndexDir(){ - if( this.baseIndexDirName == null ) - log.error("LucenIndexFactory was not setup correctly, it must have a value for baseIndexDir"); - return this.baseIndexDirName; - } - - protected String getLiveIndexDir(ServletContext servletContext){ - String base = getBaseIndexDir(); - if( base == null ) - return null; - else - return base + File.separator + "live"; - } - - - -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java deleted file mode 100644 index 06ff64007..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java +++ /dev/null @@ -1,489 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.lucene; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.List; - -import org.apache.commons.io.FileUtils; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.LockObtainFailedException; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.search.IndexingException; -import edu.cornell.mannlib.vitro.webapp.search.beans.Searcher; -import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; -import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface; - -/** - * - * @author bdc34 - * - */ -public class LuceneIndexer implements IndexerIface { - - private final static Log log = LogFactory.getLog(LuceneIndexer.class); - - LinkedList obj2DocList = new LinkedList(); - String baseIndexDir = null; - String liveIndexDir = null; - Analyzer analyzer = null; - List searchers = Collections.EMPTY_LIST; - IndexWriter writer = null; - boolean indexing = false; - boolean fullRebuild = false; - HashSet urisIndexed; - private LuceneIndexFactory luceneIndexFactory; - private String currentOffLineDir; - - - //JODA timedate library can use java date format strings. - //http://java.sun.com/j2se/1.3/docs/api/java/text/SimpleDateFormat.html - public static String MODTIME_DATE_FORMAT = "YYYYMMDDHHmmss"; - - //date format for use with entity sunrise AND sunset - //don't let that SUNSET in the name fool you. - //controls the time resolution of the search. - // "YYYYMMDDHHmm" would have minute resolution - // "YYYYMMDD" would have day resolution; - public static String DATE_FORMAT = "YYYYMMDD"; - - /** - * Used for the sunrise to indicate that - * the entity has an very early sunrise - */ - public static String BEGINNING_OF_TIME = "00000000"; - /** - * used for the sunset to indicate that - * the entity has a very late sunset - */ - public static String END_OF_TIME = "ZZZ_END_OF_TIME"; - - private static final IndexWriter.MaxFieldLength MAX_FIELD_LENGTH = - IndexWriter.MaxFieldLength.UNLIMITED; - - public LuceneIndexer(String baseIndexDir, String liveIndexDir, List searchers, Analyzer analyzer ) throws IOException{ - this.baseIndexDir = baseIndexDir; - this.liveIndexDir = liveIndexDir; - this.analyzer = analyzer; - if( searchers != null ) - this.searchers = searchers; - - updateTo1p2(); - makeEmptyIndexIfNone(); - } - - public synchronized void addObj2Doc(Obj2DocIface o2d) { - if (o2d != null) - obj2DocList.add(o2d); - } - - public synchronized List getObj2DocList() { - return obj2DocList; - } - - public synchronized void addSearcher(Searcher s){ - if( searchers == null ){ - searchers = new ArrayList(); - } - searchers.add( s ); - } - - @Override - public synchronized void prepareForRebuild() throws IndexingException { - if( this.indexing ) - log.error("Only an update will be performed, must call prepareForRebuild() before startIndexing()"); - else - this.fullRebuild = true; - } - - /** - * Checks to see if indexing is currently happening. - */ - public synchronized boolean isIndexing(){ - return indexing; - } - - public synchronized void startIndexing() throws IndexingException{ - while( indexing ){ //wait for indexing to end. - log.debug("LuceneIndexer.startIndexing() waiting..."); - try{ wait(); } catch(InterruptedException ex){} - } - checkStartPreconditions(); - try { - log.debug("Starting to index"); - if( this.fullRebuild ){ - String offLineDir = getOffLineBuildDir(); - this.currentOffLineDir = offLineDir; - writer = new IndexWriter(offLineDir, analyzer, true, MAX_FIELD_LENGTH); - writer.setSimilarity(new CustomSimilarity()); - }else{ - writer = getLiveIndexWriter(false); - } - indexing = true; - urisIndexed = new HashSet(); - } catch(Throwable th){ - throw new IndexingException("startIndexing() unable " + - "to make IndexWriter:" + th.getMessage()); - }finally{ - notifyAll(); - } - } - - - public synchronized void endIndexing() { - if( ! indexing ){ - notifyAll(); - return; - } - try { - urisIndexed = null; - log.debug("ending index"); - if( writer != null ) - writer.optimize(); - - if( this.fullRebuild ) - bringRebuildOnLine(); - - //close the searcher so it will find the newly indexed documents - for( Searcher s : searchers){ - s.close(); - } - //this is the call that replaces Searcher.close() - luceneIndexFactory.forceNewIndexSearcher(); - - } catch (IOException e) { - log.error("LuceneIndexer.endIndexing() - " - + "unable to optimize lucene index: \n" + e); - }finally{ - fullRebuild = false; - closeWriter(); - indexing = false; - notifyAll(); - } - } - - public void setLuceneIndexFactory(LuceneIndexFactory lif) { - luceneIndexFactory = lif; - } - - public synchronized Analyzer getAnalyzer(){ - return analyzer; - } - - /** - * Indexes an object. startIndexing() must be called before this method - * to setup the modifier. - * - */ - public void index(Individual ind, boolean newDoc) throws IndexingException { - if( ! indexing ) - throw new IndexingException("LuceneIndexer: must call " + - "startIndexing() before index()."); - if( writer == null ) - throw new IndexingException("LuceneIndexer: cannot build index," + - "IndexWriter is null."); - if( ind == null ) - log.debug("Individual to index was null, ignoring."); - try { - if( urisIndexed.contains(ind.getURI()) ){ - log.debug("already indexed " + ind.getURI() ); - return; - }else{ - urisIndexed.add(ind.getURI()); - log.debug("indexing " + ind.getURI()); - Iterator it = getObj2DocList().iterator(); - while (it.hasNext()) { - Obj2DocIface obj2doc = (Obj2DocIface) it.next(); - if (obj2doc.canTranslate(ind)) { - Document d = (Document) obj2doc.translate(ind); - if( d != null){ - if( !newDoc ){ - writer.updateDocument((Term)obj2doc.getIndexId(ind), d); - log.debug("updated " + ind.getName() + " " + ind.getURI()); - }else{ - writer.addDocument(d); - log.debug("added " + ind.getName() + " " + ind.getURI()); - } - }else{ - log.debug("removing from index " + ind.getURI()); - writer.deleteDocuments((Term)obj2doc.getIndexId(ind)); - } - } - } - } - } catch (IOException ex) { - throw new IndexingException(ex.getMessage()); - } - } - - /** - * Removes a single object from index. obj is translated - * using the obj2DocList. - */ - public void removeFromIndex(Individual ind) throws IndexingException { - if( writer == null ) - throw new IndexingException("LuceneIndexer: cannot delete from " + - "index, IndexWriter is null."); - try { - Iterator it = getObj2DocList().iterator(); - while (it.hasNext()) { - Obj2DocIface obj2doc = (Obj2DocIface) it.next(); - if (obj2doc.canTranslate(ind)) { - writer.deleteDocuments((Term)obj2doc.getIndexId(ind)); - log.debug("deleted " + ind.getName() + " " + ind.getURI()); - } - } - } catch (IOException ex) { - throw new IndexingException(ex.getMessage()); - } - } - - /** - * This will make a new directory and create a lucene index in it. - */ - private synchronized void makeNewIndex() throws IOException{ - - } - - private synchronized void closeWriter(){ - if( writer != null )try{ - writer.commit(); - writer.close(); - }catch(IOException ioe){ - log.error("LuceneIndexer.endIndexing() unable " + - "to close indexModifier " + ioe.getMessage()); - }catch(java.lang.IllegalStateException ise){ - //this is thrown when trying to close a closed index. - }catch(Throwable t){//must not jump away from here - log.error("in LuceneIndexer.closeModifier(): \n"+t); - } - writer = null; - } - - private synchronized void bringRebuildOnLine() { - closeWriter(); - File offLineDir = new File(currentOffLineDir); - File liveDir = new File(liveIndexDir); - - log.debug("deleting old live directory " + liveDir.getAbsolutePath()); - boolean deleted = deleteDir(liveDir); - if (! deleted ){ - log.debug("failed to delete live index directory " - + liveDir.getAbsolutePath()); - log.debug("Attempting to close searcher and delete live directory"); - this.luceneIndexFactory.forceClose(); - boolean secondDeleted = deleteDir(liveDir); - if( ! secondDeleted ){ - log.error("Search index is out of date and cannot be replaced " + - "because could not remove lucene index from directory" - + liveDir.getAbsolutePath()); - } - return; - } - - log.debug("moving " + offLineDir.getAbsolutePath() + " to " - + liveDir.getAbsolutePath()); - - boolean success = offLineDir.renameTo( liveDir ); - if( ! success ){ - log.error("could not move off line index at " - + offLineDir.getAbsolutePath() + " to live index directory " - + liveDir.getAbsolutePath()); - return; - } - - File oldWorkignDir = new File(currentOffLineDir); - if( oldWorkignDir.exists() ) - log.debug("old working directory should have been removed " + - "but still exits at " + oldWorkignDir.getAbsolutePath()); - - currentOffLineDir = null; - } - - private synchronized String getOffLineBuildDir(){ - File baseDir = new File(baseIndexDir); - baseDir.mkdirs(); - File tmpDir = new File( baseIndexDir + File.separator + "tmp" ); - tmpDir.mkdir(); - File offLineBuildDir = new File( baseIndexDir + File.separator + "tmp" + File.separator + "offLineRebuild" + System.currentTimeMillis()); - offLineBuildDir.mkdir(); - String dirName = offLineBuildDir.getAbsolutePath(); - if( ! dirName.endsWith(File.separator) ) - dirName = dirName + File.separator; - return dirName; - } - - public long getModified() { - long rv = 0; - try{ - FSDirectory d = FSDirectory.getDirectory(liveIndexDir); - rv = IndexReader.lastModified(d); - }catch(IOException ex){ - log.error("LuceneIndexer.getModified() - could not get modified time "+ ex); - } - return rv; - } - - /** Deletes all files and subdirectories under dir. - * Returns true if all deletions were successful. - * If a deletion fails, the method stops attempting to delete - * and returns false. */ - private static boolean deleteDir(File dir) { - if (dir.isDirectory()) { - String[] children = dir.list(); - for (int i=0; i - - edu.cornell.mannlib.vitro.search.setup.LuceneSetup - - - - * @author bdc34 - * - */ -public class LuceneSetup implements javax.servlet.ServletContextListener { - private static final Log log = LogFactory.getLog(LuceneSetup.class.getName()); - - private static final String PROPERTY_VITRO_HOME = "vitro.home.directory"; - private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex"; - - /** - * Gets run to set up DataSource when the webapp servlet context gets - * created. - */ - @Override - public void contextInitialized(ServletContextEvent sce) { - - if (AbortStartup.isStartupAborted(sce.getServletContext())) { - return; - } - - try { - ServletContext context = sce.getServletContext(); - - String baseIndexDir = getBaseIndexDirName(context); - log.info("Setting up Lucene index. Base directory of lucene index: " + baseIndexDir); - - setBoolMax(); - - // these should really be set as annotation properties. - HashSet dataPropertyBlacklist = new HashSet(); - context.setAttribute(SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); - HashSet objectPropertyBlacklist = new HashSet(); - objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom"); - context.setAttribute(SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist); - - //This is where to get a LucenIndex from. The indexer will - //need to reference this to notify it of updates to the index - context.setAttribute(BASE_INDEX_DIR, baseIndexDir); - LuceneIndexFactory lif = LuceneIndexFactory.setup(context, baseIndexDir); - String liveIndexDir = lif.getLiveIndexDir(context); - - // Here we want to put the LuceneIndex object into the application scope. - // This will attempt to create a new directory and empty index if there is none. - LuceneIndexer indexer = new LuceneIndexer( - getBaseIndexDirName(context), liveIndexDir, null, - getAnalyzer()); - context.setAttribute(ANALYZER, getAnalyzer()); - - //bk392 adding another argument to Entity2LuceneDoc - // that takes care of sparql queries for context nodes. - - OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); - Entity2LuceneDoc translator = new Entity2LuceneDoc( - new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearchImpl(context) - - ); - indexer.addObj2Doc(translator); - - context.setAttribute(LuceneIndexer.class.getName(), indexer); - indexer.setLuceneIndexFactory(lif); - - if( indexer.isIndexCorroupt() ){ - log.info("lucene index is corrupt, requesting rebuild"); - } - if( indexer.isIndexEmpty() ){ - log.info("lucene index is empty, requesting rebuild"); - sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE); - } - - // This is where the builder gets the list of places to try to - // get objects to index. It is filtered so that non-public text - // does not get into the search index. - WebappDaoFactory wadf = (WebappDaoFactory) context.getAttribute("webappDaoFactory"); - VitroFilters vf = VitroFilterUtils.getPublicFilter(context); - wadf = new WebappDaoFactoryFiltering(wadf, vf); - - List sources = new ArrayList(); - sources.add(wadf.getIndividualDao()); - - //IndexBuilder builder = new IndexBuilder(context, indexer, sources); - IndexBuilder builder = new IndexBuilder(context, indexer, wadf); - - // here we add the IndexBuilder with the LuceneIndexer - // to the servlet context so we can access it later in the webapp. - context.setAttribute(IndexBuilder.class.getName(), builder); - - // set up listeners so search index builder is notified of changes to model - ServletContext ctx = sce.getServletContext(); - SearchReindexingListener srl = new SearchReindexingListener(builder); - ModelContext.registerListenerForChanges(ctx, srl); - - if( sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) instanceof Boolean && - (Boolean)sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) ){ - log.info("Rebuild of lucene index required before startup."); - builder.doIndexRebuild(); - int n = 0; - while( builder.isReindexRequested() || builder.isIndexing() ){ - n++; - if( n % 20 == 0 ) //output message every 10 sec. - log.info("Still rebuilding lucene index"); - Thread.sleep(500); - } - } - - log.info("Setup of Lucene index completed."); - } catch (Throwable t) { - AbortStartup.abortStartup(sce.getServletContext()); - log.error("***** Error setting up Lucene index *****", t); - throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration"); - } - } - - /** - * Gets run when the webApp Context gets destroyed. - */ - @Override - public void contextDestroyed(ServletContextEvent sce) { - log.debug("**** Running " + this.getClass().getName() + ".contextDestroyed()"); - IndexBuilder builder = (IndexBuilder) sce.getServletContext().getAttribute(IndexBuilder.class.getName()); - if( builder != null){ - builder.stopIndexingThread(); - } - } - - /** - * In wild card searches the query is first broken into many boolean - * searches OR'ed together. So if there is a query that would match a lot of - * records we need a high max boolean limit for the lucene search. - * - * This sets some static method in the lucene library to achieve this. - */ - public static void setBoolMax() { - BooleanQuery.setMaxClauseCount(16384); - } - - /** - * Gets the name of the directory to store the lucene index in. The - * {@link ConfigurationProperties} should have a property named - * 'vitro.home.directory' which has the parent directory of the directory to - * store the lucene index for this clone in. If the property is not found, - * an exception will be thrown. - * - * @return a string that is the directory to store the lucene index. - * @throws IllegalStateException - * if the property is not found, or if the home directory does - * not exist. - * @throws IOException - * if the directory doesn't exist and we fail to create it. - */ - private String getBaseIndexDirName(ServletContext ctx) throws IOException { - String homeDirName = ConfigurationProperties.getBean(ctx).getProperty( - PROPERTY_VITRO_HOME); - if (homeDirName == null) { - throw new IllegalStateException(PROPERTY_VITRO_HOME - + " not found in properties file."); - } - - File homeDir = new File(homeDirName); - if (!homeDir.exists()) { - throw new IllegalStateException("Vitro home directory '" - + homeDir.getAbsolutePath() + "' does not exist."); - } - - File luceneDir = new File(homeDir, LUCENE_SUBDIRECTORY_NAME); - if (!luceneDir.exists()) { - boolean created = luceneDir.mkdir(); - if (!created) { - throw new IOException( - "Unable to create Lucene index directory at '" - + luceneDir + "'"); - } - } - - return luceneDir.getPath(); - } - - /** - * Gets the analyzer that will be used when building the indexing - * and when analyzing the incoming search terms. - * - * @return - */ - private Analyzer getAnalyzer() { - - PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.LUCENE_29)); - - analyzer.addAnalyzer(ALLTEXT, new HtmlLowerStopStemAnalyzer()); - analyzer.addAnalyzer(ALLTEXTUNSTEMMED, new HtmlLowerStopAnalyzer()); - analyzer.addAnalyzer(NAME_UNSTEMMED, new HtmlLowerStopAnalyzer()); - analyzer.addAnalyzer(NAME_STEMMED, new HtmlLowerStopStemAnalyzer()); - analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29)); - analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29)); - analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer()); - analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer()); - - - return analyzer; - } - - public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup"; - public static final String ANALYZER= "lucene.analyzer"; - public static final String BASE_INDEX_DIR = "lucene.indexDir"; - public static final String SEARCH_DATAPROPERTY_BLACKLIST = - "search.dataproperty.blacklist"; - public static final String SEARCH_OBJECTPROPERTY_BLACKLIST = - "search.objectproperty.blacklist"; - -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java deleted file mode 100644 index 577b72354..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java +++ /dev/null @@ -1,205 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.search.lucene; - -import java.io.File; -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; - -import javax.servlet.ServletContext; -import javax.servlet.ServletContextEvent; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.cjk.CJKAnalyzer; -import org.apache.lucene.search.BooleanQuery; - -import com.hp.hpl.jena.ontology.OntModel; - -import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties; -import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; -import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; -import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering; -import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; -import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; -import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; -import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; -import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl; -import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; -import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; - -/** - * Setup objects for lucene searching and indexing. - * - * The indexing and search objects, IndexBuilder and Searcher are found by the - * controllers IndexController and SearchController through the servletContext. - * This object will have the method contextInitialized() called when the tomcat - * server starts this webapp. - * - * The contextInitialized() will try to find the lucene index directory, - * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will - * also get a list of Obj2Doc objects so it can translate object to lucene docs. - * - * To execute this at context creation put this in web.xml: - - - edu.cornell.mannlib.vitro.search.setup.LuceneSetup - - - - * @author bdc34 - * - */ -public class LuceneSetupCJK implements javax.servlet.ServletContextListener { - private static String indexDir = null; - private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName()); - private static final String PROPERTY_VITRO_HOME = "vitro.home.directory"; - private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex"; - - /** - * Gets run to set up DataSource when the webapp servlet context gets created. - */ - @Override - @SuppressWarnings("unchecked") - public void contextInitialized(ServletContextEvent sce) { - ServletContext context = sce.getServletContext(); - log.info("**** Running "+this.getClass().getName()+".contextInitialized()"); - try{ - indexDir = getIndexDirName(sce); - log.info("Lucene indexDir: " + indexDir); - - setBoolMax(); - - HashSet dataPropertyBlacklist = new HashSet(); - context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); - - HashSet objectPropertyBlacklist = new HashSet(); - objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom"); - context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist); - - //This is where to get a LucenIndex from. The indexer will - //need to reference this to notify it of updates to the index - LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir); - String liveIndexDir = lif.getLiveIndexDir(context); - - //here we want to put the LuceneIndex object into the application scope - LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer()); - context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer()); - - OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); - Entity2LuceneDoc translator = new Entity2LuceneDoc( - new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearchImpl(context) - ); - indexer.addObj2Doc(translator); - - indexer.setLuceneIndexFactory(lif); - - //This is where the builder gets the list of places to try to - //get objects to index. It is filtered so that non-public text - //does not get into the search index. - WebappDaoFactory wadf = - (WebappDaoFactory) context.getAttribute("webappDaoFactory"); - VitroFilters vf = VitroFilterUtils.getPublicFilter(context); - wadf = new WebappDaoFactoryFiltering(wadf,vf); - - List sources = new ArrayList(); - sources.add(wadf.getIndividualDao()); - - //IndexBuilder builder = new IndexBuilder(context,indexer,sources); - IndexBuilder builder = new IndexBuilder(context, indexer, wadf); - - // here we add the IndexBuilder with the LuceneIndexer - // to the servlet context so we can access it later in the webapp. - context.setAttribute(IndexBuilder.class.getName(),builder); - - //set up listeners so search index builder is notified of changes to model - OntModel baseOntModel = (OntModel)sce.getServletContext().getAttribute("baseOntModel"); - OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel"); - SearchReindexingListener srl = new SearchReindexingListener( builder ); - ModelContext.registerListenerForChanges(sce.getServletContext(), srl); - - }catch(Exception ex){ - log.error("Could not setup lucene full text search." , ex); - } - - log.debug("**** End of "+this.getClass().getName()+".contextInitialized()"); - } - - /** - * Gets run when the webApp Context gets destroyed. - */ - @Override - public void contextDestroyed(ServletContextEvent sce) { - - log.info("**** Running "+this.getClass().getName()+".contextDestroyed()"); - IndexBuilder builder = (IndexBuilder)sce.getServletContext().getAttribute(IndexBuilder.class.getName()); - builder.stopIndexingThread(); - } - - /** - * In wild card searches the query is first broken into many boolean searches - * OR'ed together. So if there is a query that would match a lot of records - * we need a high max boolean limit for the lucene search. - * - * This sets some static method in the lucene library to achieve this. - */ - public static void setBoolMax() { - BooleanQuery.setMaxClauseCount(16384); - } - - /** - * Gets the name of the directory to store the lucene index in. The - * {@link ConfigurationProperties} should have a property named - * 'vitro.home.directory' which has the parent directory of the directory to - * store the lucene index for this clone in. If the property is not found, - * an exception will be thrown. - * - * @return a string that is the directory to store the lucene index. - * @throws IllegalStateException - * if the property is not found, - * or if the home directory does not exist. - * @throws IOException - * if the directory doesn't exist and we fail to create it. - */ - private String getIndexDirName(ServletContextEvent cte) throws IOException { - String homeDirName = ConfigurationProperties.getBean(cte).getProperty( - PROPERTY_VITRO_HOME); - if (homeDirName == null) { - throw new IllegalStateException(PROPERTY_VITRO_HOME - + " not found in properties file."); - } - - File homeDir = new File(homeDirName); - if (!homeDir.exists()) { - throw new IllegalStateException("Vitro home directory '" - + homeDir.getAbsolutePath() + "' does not exist."); - } - - File luceneDir = new File(homeDir, LUCENE_SUBDIRECTORY_NAME); - if (!luceneDir.exists()) { - boolean created = luceneDir.mkdir(); - if (!created) { - throw new IOException( - "Unable to create Lucene index directory at '" - + luceneDir + "'"); - } - } - - return luceneDir.getPath(); - } - - /** - * Gets the analyzer that will be used when building the indexing - * and when analyzing the incoming search terms. - * - * @return - */ - private Analyzer getAnalyzer() { - return new CJKAnalyzer(); - } - -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java index c890c9adb..6a28feb72 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java @@ -32,6 +32,8 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactoryJena; import edu.cornell.mannlib.vitro.webapp.search.IndexConstants; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearchImpl; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; +import edu.cornell.mannlib.vitro.webapp.search.indexing.AdditionalURIsForContextNodes; +import edu.cornell.mannlib.vitro.webapp.search.indexing.AdditionalURIsToIndex; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup; @@ -102,7 +104,11 @@ public class SolrSetup implements javax.servlet.ServletContextListener{ VitroFilters vf = VitroFilterUtils.getPublicFilter(context); wadf = new WebappDaoFactoryFiltering(wadf, vf); - IndexBuilder builder = new IndexBuilder(context, solrIndexer, wadf); + //make objects that will find additional URIs for context nodes etc + List uriFinders = new ArrayList(); + uriFinders.add( new AdditionalURIsForContextNodes(jenaOntModel) ); + + IndexBuilder builder = new IndexBuilder(context, solrIndexer, wadf, uriFinders); // to the servlet context so we can access it later in the webapp. context.setAttribute(IndexBuilder.class.getName(), builder); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/BrowseDataGetter.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/BrowseDataGetter.java index 458bd2d68..f16d134d0 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/BrowseDataGetter.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/BrowseDataGetter.java @@ -19,7 +19,7 @@ import org.json.JSONObject; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup; -import edu.cornell.mannlib.vitro.webapp.controller.JSONServlet; +import edu.cornell.mannlib.vitro.webapp.controller.SolrJsonServlet; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; @@ -81,7 +81,7 @@ public class BrowseDataGetter implements PageDataGetter { VClass vclass = vreq.getWebappDaoFactory().getVClassDao().getVClassByURI(classUri); map.put("class", new VClassTemplateModel(vclass)); - JSONObject vclassRes = JSONServlet.getLuceneIndividualsByVClass(vclass.getURI(), request, context); + JSONObject vclassRes = SolrJsonServlet.getSolrIndividualsByVClass(vclass.getURI(), request, context); map.put("totalCount", JsonToFmModel.convertJSONObjectToMap( (String) vclassRes.get("totalCount") )); map.put("alpha", JsonToFmModel.convertJSONObjectToMap( (String) vclassRes.get("alpha") )); map.put("individuals", JsonToFmModel.convertJSONArrayToList( (JSONArray) vclassRes.get("individuals") )); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/DataGetterUtils.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/DataGetterUtils.java index c97d5bf75..f02663c13 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/DataGetterUtils.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/DataGetterUtils.java @@ -12,7 +12,6 @@ import java.util.List; import java.util.Map; import javax.servlet.ServletContext; -import javax.servlet.http.HttpServletResponse; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -26,18 +25,9 @@ import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup; import edu.cornell.mannlib.vitro.webapp.controller.Controllers; import edu.cornell.mannlib.vitro.webapp.controller.SolrJsonServlet; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.IndividualListController; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.PageController; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.SolrIndividualListController.PageRecord; -import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; -import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; +import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; -import edu.cornell.mannlib.vitro.webapp.dao.jena.VClassGroupCache; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.VClassGroupTemplateModel; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.BaseListedIndividual; -import freemarker.ext.beans.BeansWrapper; -import freemarker.template.TemplateModel; public class DataGetterUtils { protected static final String DATA_GETTER_MAP = "pageTypeToDataGetterMap"; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/IndividualsForClassesDataGetter.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/IndividualsForClassesDataGetter.java index f5a2ac3eb..8fe34bce4 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/IndividualsForClassesDataGetter.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/pageDataGetter/IndividualsForClassesDataGetter.java @@ -4,39 +4,25 @@ package edu.cornell.mannlib.vitro.webapp.utils.pageDataGetter; import java.net.URLEncoder; import java.util.ArrayList; -import java.util.Enumeration; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Set; -import org.apache.commons.lang.StringUtils; import javax.servlet.ServletContext; +import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.json.JSONArray; import org.json.JSONObject; -import edu.cornell.mannlib.vitro.webapp.beans.DataProperty; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.beans.VClassGroup; -import edu.cornell.mannlib.vitro.webapp.controller.Controllers; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.IndividualListController; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.SolrIndividualListController; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.IndividualListController.PageRecord; import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; -import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; -import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; import edu.cornell.mannlib.vitro.webapp.dao.jena.VClassGroupCache; import edu.cornell.mannlib.vitro.webapp.web.templatemodels.VClassGroupTemplateModel; -import edu.cornell.mannlib.vitro.webapp.web.templatemodels.individuallist.BaseListedIndividual; -import edu.cornell.mannlib.vitro.webapp.controller.JSONServlet; -import freemarker.ext.beans.BeansWrapper; -import freemarker.template.TemplateModel; /** * This will pass these variables to the template: @@ -52,9 +38,7 @@ public class IndividualsForClassesDataGetter implements PageDataGetter{ Map> classIntersectionsMap = vreq.getWebappDaoFactory().getPageDao().getClassesAndRestrictionsForPage(pageUri); - //Use Individual List Controller to get all the individuals and related data - String alpha = IndividualListController.getAlphaParameter(vreq); - int pageParam = IndividualListController.getPageParameter(vreq); + //Use Individual List Controller to get all the individuals and related data List inds = new ArrayList(); try{ List classes = classIntersectionsMap.get("classes"); @@ -184,4 +168,22 @@ public class IndividualsForClassesDataGetter implements PageDataGetter{ vc.setEntityCount(0); } } + + protected static String getAlphaParameter(VitroRequest request){ + return request.getParameter("alpha"); + } + + protected static int getPageParameter(VitroRequest request) { + String pageStr = request.getParameter("page"); + if( pageStr != null ){ + try{ + return Integer.parseInt(pageStr); + }catch(NumberFormatException nfe){ + log.debug("could not parse page parameter"); + return 1; + } + }else{ + return 1; + } + } } \ No newline at end of file diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodesTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodesTest.java new file mode 100644 index 000000000..57be52350 --- /dev/null +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/indexing/AdditionalURIsForContextNodesTest.java @@ -0,0 +1,31 @@ +package edu.cornell.mannlib.vitro.webapp.search.indexing; + +import static org.junit.Assert.assertTrue; + +import java.util.List; + +import org.junit.Test; + +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.rdf.model.ModelFactory; + + +public class AdditionalURIsForContextNodesTest { + + @Test + public void testFindAdditionalURIsToIndex() { + +// //make a test model with an person, an authorship context node and a book +// OntModel model = ModelFactory.createOntologyModel(); +// +// //make an AdditionalURIsForContextNodesTest object with that model +// AdditionalURIsForContextNodes uriFinder = new AdditionalURIsForContextNodes( model ); +// +// //execute the method and check the results +// List uris = uriFinder.findAdditionalURIsToIndex( "http://example.com/personA"); +// +// assertTrue("could not find authorship context node", uris.contains("http://example.com/authorshipNode")); +// assertTrue("could not find book indivdiual", uris.contains("http://example.com/bookA")); + } + +}