From 78491234db2199f83a05e8d0d30cd402caeba33d Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Tue, 20 Jan 2015 10:25:55 -0500 Subject: [PATCH 1/7] VIVO-871 Improve performance when reading object properties. Also, improve display. --- .../jena/ObjectPropertyStatementDaoJena.java | 4 +- .../jena/ObjectPropertyStatementDaoSDB.java | 313 ++++++++++-------- .../tasks/UpdateStatementsTask.java | 2 +- .../searchindex/tasks/UpdateUrisTask.java | 2 +- .../body/admin/admin-showThreads.ftl | 23 +- 5 files changed, 196 insertions(+), 148 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoJena.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoJena.java index 34eeb6dd8..44bedbdd2 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoJena.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoJena.java @@ -53,8 +53,8 @@ public class ObjectPropertyStatementDaoJena extends JenaBaseDao implements Objec private static final Log log = LogFactory.getLog(ObjectPropertyStatementDaoJena.class); - private DatasetWrapperFactory dwf; - private RDFService rdfService; + protected DatasetWrapperFactory dwf; + protected RDFService rdfService; public ObjectPropertyStatementDaoJena(RDFService rdfService, DatasetWrapperFactory dwf, diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoSDB.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoSDB.java index 07565f011..30cf15db9 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoSDB.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/ObjectPropertyStatementDaoSDB.java @@ -2,164 +2,205 @@ package edu.cornell.mannlib.vitro.webapp.dao.jena; +import static edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ModelSerializationFormat.N3; + import java.util.ArrayList; -import java.util.HashMap; +import java.util.HashSet; import java.util.List; -import java.util.Map; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.ontology.OntModelSpec; -import com.hp.hpl.jena.query.Dataset; -import com.hp.hpl.jena.query.QueryExecution; -import com.hp.hpl.jena.query.QueryExecutionFactory; -import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.rdf.model.Property; -import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Statement; -import com.hp.hpl.jena.shared.Lock; -import com.hp.hpl.jena.util.iterator.ClosableIterator; +import com.hp.hpl.jena.vocabulary.RDF; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.ObjectProperty; import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement; import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatementImpl; import edu.cornell.mannlib.vitro.webapp.dao.ObjectPropertyStatementDao; -import edu.cornell.mannlib.vitro.webapp.dao.jena.IndividualSDB.IndividualNotFoundException; import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactorySDB.SDBDatasetMode; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; +import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; public class ObjectPropertyStatementDaoSDB extends ObjectPropertyStatementDaoJena implements ObjectPropertyStatementDao { + private static final Log log = LogFactory + .getLog(ObjectPropertyStatementDaoSDB.class); - private static final Log log = LogFactory.getLog(ObjectPropertyStatementDaoSDB.class); - - private DatasetWrapperFactory dwf; - private SDBDatasetMode datasetMode; - private WebappDaoFactorySDB wadf; - - public ObjectPropertyStatementDaoSDB( - RDFService rdfService, - DatasetWrapperFactory dwf, - SDBDatasetMode datasetMode, - WebappDaoFactorySDB wadf) { - super (rdfService, dwf, wadf); - this.dwf = dwf; - this.datasetMode = datasetMode; + // Get the types of the base entity. + private static final String SUBJECT_TYPE_QUERY = "" + + "PREFIX rdf: \n" + + "CONSTRUCT { \n" // + + " ?uri rdf:type ?type . \n" // + + "} WHERE { \n" // + + " ?uri rdf:type ?type . \n" // + + "} \n"; + + // Get the types of all objects of properties. + private static final String OBJECT_TYPE_QUERY = "" + + "PREFIX rdf: \n" + + "CONSTRUCT { \n" // + + " ?uri ?p ?o . \n" // + + " ?o rdf:type ?type . \n" // + + "} WHERE { \n" // + + " ?uri ?p ?o . \n" // + + " ?o rdf:type ?type . \n" // + + "} \n"; + + // Get the labels of all objects of properties. + private static final String OBJECT_LABEL_QUERY = "" + + "PREFIX rdfs: \n" + + "CONSTRUCT { \n" // + + " ?uri ?p ?o . \n" // + + " ?o rdfs:label ?label . \n" // + + "} WHERE { \n" // + + " ?uri ?p ?o . \n" // + + " ?o rdfs:label ?label . \n" // + + "} \n"; + + private final WebappDaoFactorySDB wadf; + private final SDBDatasetMode datasetMode; + + public ObjectPropertyStatementDaoSDB(RDFService rdfService, + DatasetWrapperFactory dwf, SDBDatasetMode datasetMode, + WebappDaoFactorySDB wadf) { + super(rdfService, dwf, wadf); this.wadf = wadf; + this.datasetMode = datasetMode; + } + + @Override + public Individual fillExistingObjectPropertyStatements(Individual entity) { + if (entity == null || entity.getURI() == null) + return entity; + else { + List objectPropertyStatements = new ArrayList<>(); + String subjectUri = entity.getURI(); + + Model m = getInfoForObjectsOfThisEntity(subjectUri); + + Set subjectTypes = getTypes(m, subjectUri); + for (ObjectPropertyPair pair : getRawObjectPropertyPairs(m, + subjectUri)) { + String predicateUri = pair.getPredicateUri(); + String objectUri = pair.getObjectUri(); + Set objectTypes = getTypes(m, objectUri); + + ObjectProperty prop = findRawProperty(predicateUri); + if (prop == null) { + continue; + } + + Individual object = new IndividualSDB(objectUri, dwf, + datasetMode, wadf, m); + objectPropertyStatements.add(createStatement(entity, prop, + object)); + } + entity.setObjectPropertyStatements(objectPropertyStatements); + return entity; + } + } + + /** + * Get the types of this entity. Get the related object and the predicates + * by which they are related. Get the types and labels of those related + * objects. + */ + private Model getInfoForObjectsOfThisEntity(String subjectUri) { + Model m = ModelFactory.createDefaultModel(); + try { + m.add(RDFServiceUtils.parseModel( + rdfService.sparqlConstructQuery( + substituteUri(subjectUri, SUBJECT_TYPE_QUERY), N3), + N3)); + m.add(RDFServiceUtils.parseModel( + rdfService.sparqlConstructQuery( + substituteUri(subjectUri, OBJECT_TYPE_QUERY), N3), + N3)); + m.add(RDFServiceUtils.parseModel( + rdfService.sparqlConstructQuery( + substituteUri(subjectUri, OBJECT_LABEL_QUERY), N3), + N3)); + } catch (RDFServiceException e) { + log.warn("Failed to fill object property statements for '" + + subjectUri + "'", e); + } + return m; + } + + private String substituteUri(String uri, String query) { + return query.replace("?uri", "<" + uri + "> "); + } + + private Set getTypes(Model m, String uri) { + Set typeUris = new HashSet<>(); + for (RDFNode typeNode : m.listObjectsOfProperty(m.createResource(uri), + RDF.type).toSet()) { + if (typeNode.isURIResource()) { + typeUris.add(typeNode.asResource().getURI()); + } + } + return typeUris; + } + + private List getRawObjectPropertyPairs(Model m, + String subjectUri) { + List list = new ArrayList<>(); + for (Statement stmt : m.listStatements(m.createResource(subjectUri), + null, (RDFNode) null).toList()) { + if (wadf.getNonuserNamespaces().contains( + stmt.getPredicate().getNameSpace())) { + continue; + } + if (!stmt.getObject().isURIResource()) { + continue; + } + list.add(new ObjectPropertyPair(stmt.getPredicate().getURI(), stmt + .getObject().asResource().getURI())); + } + return list; + } + + private ObjectProperty findRawProperty(String predicateUri) { + return wadf.getObjectPropertyDao().getObjectPropertyByURI(predicateUri); + } + + private ObjectPropertyStatement createStatement(Individual entity, + ObjectProperty prop, Individual object) { + ObjectPropertyStatementImpl ops = new ObjectPropertyStatementImpl(); + ops.setSubject(entity); + ops.setProperty(prop); + ops.setObject(object); + return ops; + } + + // ---------------------------------------------------------------------- + // Helper classes + // ---------------------------------------------------------------------- + + private static class ObjectPropertyPair { + private final String predicateUri; + private final String objectUri; + + public ObjectPropertyPair(String predicateUri, String objectUri) { + this.predicateUri = predicateUri; + this.objectUri = objectUri; + } + + public String getPredicateUri() { + return predicateUri; + } + + public String getObjectUri() { + return objectUri; + } + } - - @Override - public Individual fillExistingObjectPropertyStatements(Individual entity) { - if (entity.getURI() == null) - return entity; - else { - Map uriToObjectProperty = new HashMap(); - String query = "CONSTRUCT { \n" + - " <" + entity.getURI() + "> ?p ?o . \n" + -// " ?o a ?oType . \n" + -// " ?o <" + RDFS.label.getURI() + "> ?oLabel . \n" + -// " ?o <" + VitroVocabulary.MONIKER + "> ?oMoniker \n" + - "} WHERE { \n" + - " { <" + entity.getURI() + "> ?p ?o } \n" + -// " UNION { <" + entity.getURI() + "> ?p ?o . ?o a ?oType } \n" + -// " UNION { <" + entity.getURI() + "> ?p ?o . \n" + -// " ?o <" + RDFS.label.getURI() + "> ?oLabel } \n" + -// " UNION { <" + entity.getURI() + "> ?p ?o . \n " + -// " ?o <" + VitroVocabulary.MONIKER + "> ?oMoniker } \n" + - "}"; - long startTime = System.currentTimeMillis(); - Model m = null; - DatasetWrapper w = dwf.getDatasetWrapper(); - Dataset dataset = w.getDataset(); - dataset.getLock().enterCriticalSection(Lock.READ); - QueryExecution qexec = null; - try { - qexec = QueryExecutionFactory.create(QueryFactory.create(query), dataset); - m = qexec.execConstruct(); - } finally { - if(qexec != null) qexec.close(); - dataset.getLock().leaveCriticalSection(); - w.close(); - } - if (log.isDebugEnabled()) { - log.debug("Time (ms) to query for related individuals: " + (System.currentTimeMillis() - startTime)); - if (System.currentTimeMillis() - startTime > 1000) { - //log.debug(query); - log.debug("Results size (statements): " + m.size()); - } - } - - OntModel ontModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM, m); - ontModel.enterCriticalSection(Lock.READ); - try { - Resource ind = ontModel.getResource(entity.getURI()); - List objPropertyStmtList = new ArrayList(); - ClosableIterator propIt = ind.listProperties(); - try { - while (propIt.hasNext()) { - Statement st = propIt.next(); - if (st.getObject().isResource() && !(NONUSER_NAMESPACES.contains(st.getPredicate().getNameSpace()))) { - try { - ObjectPropertyStatement objPropertyStmt = new ObjectPropertyStatementImpl(); - objPropertyStmt.setSubjectURI(entity.getURI()); - objPropertyStmt.setSubject(entity); - objPropertyStmt.setObjectURI(((Resource)st.getObject()).getURI()); - - objPropertyStmt.setPropertyURI(st.getPredicate().getURI()); - Property prop = st.getPredicate(); - if( uriToObjectProperty.containsKey(prop.getURI())){ - objPropertyStmt.setProperty(uriToObjectProperty.get(prop.getURI())); - }else{ - ObjectProperty p = getWebappDaoFactory().getObjectPropertyDao().getObjectPropertyByURI(prop.getURI()); - if( p != null ){ - uriToObjectProperty.put(prop.getURI(), p); - objPropertyStmt.setProperty(uriToObjectProperty.get(prop.getURI())); - }else{ - //if ObjectProperty not found in ontology, skip it - continue; - } - } - if (objPropertyStmt.getObjectURI() != null) { - //this might throw IndividualNotFoundException - Individual objInd = new IndividualSDB( - objPropertyStmt.getObjectURI(), - this.dwf, - datasetMode, - wadf); - objPropertyStmt.setObject(objInd); - } - - //only add statement to list if it has its values filled out - if ( (objPropertyStmt.getSubjectURI() != null) - && (objPropertyStmt.getPropertyURI() != null) - && (objPropertyStmt.getObject() != null) ) { - objPropertyStmtList.add(objPropertyStmt); - } - - } catch (IndividualNotFoundException t) { - log.debug(t,t); - continue; - } catch (Throwable t){ - log.error(t,t); - continue; - } - } - } - } finally { - propIt.close(); - } - entity.setObjectPropertyStatements(objPropertyStmtList); - } finally { - ontModel.leaveCriticalSection(); - } - return entity; - } - } - } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java index 505c58fcb..dcf489fa6 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java @@ -77,7 +77,7 @@ public class UpdateStatementsTask implements Task { this.uris = Collections.synchronizedSet(new HashSet()); - this.status = new Status(changes.size(), 200, listeners); + this.status = new Status(changes.size(), 500, listeners); } @Override diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java index 9eec3d2ff..d84f40799 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java @@ -66,7 +66,7 @@ public class UpdateUrisTask implements Task { this.listeners = listeners; this.pool = pool; - this.status = new Status(uris.size(), 200, listeners); + this.status = new Status(uris.size(), 500, listeners); this.searchEngine = ApplicationUtils.instance().getSearchEngine(); diff --git a/webapp/web/templates/freemarker/body/admin/admin-showThreads.ftl b/webapp/web/templates/freemarker/body/admin/admin-showThreads.ftl index 70043a9d5..cf6e61d0f 100644 --- a/webapp/web/templates/freemarker/body/admin/admin-showThreads.ftl +++ b/webapp/web/templates/freemarker/body/admin/admin-showThreads.ftl @@ -20,12 +20,19 @@ table.threadInfo th {

${i18n().background_threads}

- <#list threads as threadInfo> - - - - - -
${i18n().name}${threadInfo.name}
${i18n().work_level}${threadInfo.workLevel}
${i18n().since}${threadInfo.since}
${i18n().flags}${threadInfo.flags}
- + + + + + + + + <#list threads as threadInfo> + + + + + + +
${i18n().name}${i18n().work_level}${i18n().since}${i18n().flags}
${threadInfo.name}${threadInfo.workLevel}${threadInfo.since}${threadInfo.flags}
\ No newline at end of file From 83a5523ace522d1a99dfacff3a637301cfbb7bf1 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Tue, 20 Jan 2015 14:06:11 -0500 Subject: [PATCH 2/7] VIVO-869 Improve the display Show exclusions count as separate from deletions count. When there is an error on the Ajax call, display the error status and stop asking for refreshes. When a rebuild is requested, redirect to just status, so refreshing the page would not request another rebuild. --- .../searchIndexer/SearchIndexerStatus.java | 10 +++++-- .../search/controller/IndexController.java | 20 +++++++++---- .../search/controller/IndexHistory.java | 3 +- .../searchindex/tasks/UpdateUrisTask.java | 28 +++++++++++++++---- webapp/web/js/search/searchIndex.js | 13 +++++++-- .../freemarker/body/admin/searchIndex.ftl | 2 ++ .../body/admin/searchIndexStatus.ftl | 2 +- 7 files changed, 62 insertions(+), 16 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexerStatus.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexerStatus.java index c4d9848c6..89a7a5309 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexerStatus.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexerStatus.java @@ -102,19 +102,25 @@ public class SearchIndexerStatus { } public static class UriCounts extends Counts { + private final int excluded; private final int deleted; private final int updated; private final int remaining; private final int total; - public UriCounts(int deleted, int updated, int remaining, int total) { + public UriCounts(int excluded, int deleted, int updated, int remaining, int total) { super(Type.URI_COUNTS); + this.excluded = excluded; this.deleted = deleted; this.updated = updated; this.remaining = remaining; this.total = total; } + public int getExcluded() { + return excluded; + } + public int getDeleted() { return deleted; } @@ -133,7 +139,7 @@ public class SearchIndexerStatus { @Override public String toString() { - return "[deleted=" + deleted + ", updated=" + updated + return "[excluded=" + excluded + ", deleted=" + deleted + ", updated=" + updated + ", remaining=" + remaining + ", total=" + total + "]"; } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java index 43f3e3676..f22cfcf6a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java @@ -16,10 +16,13 @@ import org.apache.commons.logging.LogFactory; import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils; import edu.cornell.mannlib.vitro.webapp.auth.permissions.SimplePermission; +import edu.cornell.mannlib.vitro.webapp.auth.policy.PolicyHelper; +import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.AuthorizationRequest; import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.RequestedAction; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.FreemarkerHttpServlet; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; +import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.RedirectResponseValues; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer; @@ -99,10 +102,6 @@ public class IndexController extends FreemarkerHttpServlet { @Override public void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException, ServletException { - if (!isAuthorizedToDisplayPage(req, resp, REQUIRED_ACTIONS)) { - return; - } - switch (RequestType.fromRequest(req)) { case STATUS: showStatus(req, resp); @@ -118,12 +117,17 @@ public class IndexController extends FreemarkerHttpServlet { return "Rebuild Search Index"; } + @Override + protected AuthorizationRequest requiredActions(VitroRequest vreq) { + return REQUIRED_ACTIONS; + } + @Override protected ResponseValues processRequest(VitroRequest vreq) { switch (RequestType.fromRequest(vreq)) { case REBUILD: requestRebuild(); - return showDisplay(); + return new RedirectResponseValues(PAGE_URL); default: return showDisplay(); } @@ -138,6 +142,12 @@ public class IndexController extends FreemarkerHttpServlet { private void showStatus(HttpServletRequest req, HttpServletResponse resp) throws IOException { + if (!PolicyHelper.isAuthorizedForActions(req, REQUIRED_ACTIONS)) { + resp.setStatus(HttpServletResponse.SC_FORBIDDEN); + resp.getWriter().write("You are not authorized to access this page."); + return; + } + try { Map body = new HashMap<>(); body.put("statusUrl", UrlBuilder.getUrl(PAGE_URL, "status", "true")); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexHistory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexHistory.java index 0bbac9364..75e18b962 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexHistory.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexHistory.java @@ -28,7 +28,7 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatu public class IndexHistory implements SearchIndexer.Listener { private static final Log log = LogFactory.getLog(IndexHistory.class); - private final static int MAX_EVENTS = 10; + private final static int MAX_EVENTS = 20; private final Deque events = new LinkedList<>(); @@ -84,6 +84,7 @@ public class IndexHistory implements SearchIndexer.Listener { } private void addCounts(UriCounts counts, Map map) { + map.put("excluded", counts.getExcluded()); map.put("updated", counts.getUpdated()); map.put("deleted", counts.getDeleted()); map.put("remaining", counts.getRemaining()); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java index d84f40799..3deb8c6ee 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java @@ -69,8 +69,7 @@ public class UpdateUrisTask implements Task { this.status = new Status(uris.size(), 500, listeners); this.searchEngine = ApplicationUtils.instance().getSearchEngine(); - - + } @Override @@ -86,8 +85,10 @@ public class UpdateUrisTask implements Task { break; } else { Individual ind = getIndividual(uri); - if (ind == null || isExcluded(ind)) { + if (ind == null) { deleteDocument(uri); + } else if (isExcluded(ind)) { + excludeDocument(uri); } else { updateDocument(ind); } @@ -133,6 +134,17 @@ public class UpdateUrisTask implements Task { } } + /** An exclusion is just a delete for different reasons. */ + private void excludeDocument(String uri) { + try { + searchEngine.deleteById(SearchIndexerUtils.getIdForUri(uri)); + status.incrementExclusions(); + log.debug("excluded '" + uri + "' from search index."); + } catch (Exception e) { + log.warn("Failed to exclude '" + uri + "' from search index", e); + } + } + private void updateDocument(Individual ind) { Runnable workUnit = new UpdateDocumentWorkUnit(ind, modifiers); pool.submit(workUnit, this); @@ -165,6 +177,7 @@ public class UpdateUrisTask implements Task { private final ListenerList listeners; private int updated = 0; private int deleted = 0; + private int excluded = 0; private Date since = new Date(); public Status(int total, int progressInterval, ListenerList listeners) { @@ -184,6 +197,11 @@ public class UpdateUrisTask implements Task { since = new Date(); } + public synchronized void incrementExclusions() { + excluded++; + since = new Date(); + } + private void maybeFireProgressEvent() { if (updated > 0 && updated % progressInterval == 0) { listeners.fireEvent(new Event(PROGRESS, @@ -192,9 +210,9 @@ public class UpdateUrisTask implements Task { } public synchronized SearchIndexerStatus getSearchIndexerStatus() { - int remaining = total - updated - deleted; + int remaining = total - updated - deleted - excluded; return new SearchIndexerStatus(PROCESSING_URIS, since, - new UriCounts(deleted, updated, remaining, total)); + new UriCounts(excluded, deleted, updated, remaining, total)); } } diff --git a/webapp/web/js/search/searchIndex.js b/webapp/web/js/search/searchIndex.js index 560136fa6..fca116ba6 100644 --- a/webapp/web/js/search/searchIndex.js +++ b/webapp/web/js/search/searchIndex.js @@ -9,8 +9,12 @@ function updateSearchIndexerStatus() { url: searchIndexerStatusUrl, dataType: "html", complete: function(xhr, status) { - updatePanelContents(xhr.responseText); - setTimeout(updateSearchIndexerStatus,5000); + if (xhr.status == 200) { + updatePanelContents(xhr.responseText); + setTimeout(updateSearchIndexerStatus,5000); + } else { + displayErrorMessage(xhr.status + " " + xhr.statusText); + } } }); } @@ -19,4 +23,9 @@ function updatePanelContents(contents) { document.getElementById("searchIndexerStatus").innerHTML = contents; } +function displayErrorMessage(message) { + document.getElementById("searchIndexerError").innerHTML = "

" + message + "

"; +} + + $(document).ready(updateSearchIndexerStatus()); diff --git a/webapp/web/templates/freemarker/body/admin/searchIndex.ftl b/webapp/web/templates/freemarker/body/admin/searchIndex.ftl index 76fae4191..cd9330034 100644 --- a/webapp/web/templates/freemarker/body/admin/searchIndex.ftl +++ b/webapp/web/templates/freemarker/body/admin/searchIndex.ftl @@ -7,6 +7,8 @@

${i18n().search_index_status}

+
+
Search Indexer Status
diff --git a/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl b/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl index d4e97c3f0..804cad8dc 100644 --- a/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl +++ b/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl @@ -64,7 +64,7 @@ <#macro showIndexerCounts countsType, counts> <#if countsType == "URI_COUNTS"> - Updated: ${counts.updated}, deleted: ${counts.deleted}, remaining: ${counts.remaining}, total: ${counts.total} + Updated: ${counts.updated}, excluded: ${counts.excluded}, deleted: ${counts.deleted}, remaining: ${counts.remaining}, total: ${counts.total} <#elseif countsType == "STATEMENT_COUNTS"> Processed: ${counts.processed}, remaining: ${counts.remaining}, total: ${counts.total} <#elseif countsType == "REBUILD_COUNTS"> From fcfd2e7be6c4e7286b17256564b74c8f21958553 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Tue, 20 Jan 2015 16:09:53 -0500 Subject: [PATCH 3/7] VIVO-869 The display should not barf if no IndexHistory is present. --- .../webapp/search/controller/IndexController.java | 9 ++++++--- .../freemarker/body/admin/searchIndexStatus.ftl | 10 +++++++--- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java index f22cfcf6a..2c100a140 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/controller/IndexController.java @@ -144,17 +144,20 @@ public class IndexController extends FreemarkerHttpServlet { throws IOException { if (!PolicyHelper.isAuthorizedForActions(req, REQUIRED_ACTIONS)) { resp.setStatus(HttpServletResponse.SC_FORBIDDEN); - resp.getWriter().write("You are not authorized to access this page."); + resp.getWriter().write( + "You are not authorized to access this page."); return; } - + try { Map body = new HashMap<>(); body.put("statusUrl", UrlBuilder.getUrl(PAGE_URL, "status", "true")); body.put("rebuildUrl", UrlBuilder.getUrl(PAGE_URL, "rebuild", "true")); body.put("status", buildStatusMap(indexer.getStatus())); - body.put("history", history.toMaps()); + if (history != null) { + body.put("history", history.toMaps()); + } String rendered = FreemarkerProcessingServiceSetup.getService( getServletContext()).renderTemplate(STATUS_TEMPLATE_NAME, diff --git a/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl b/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl index 804cad8dc..4ffff2a13 100644 --- a/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl +++ b/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl @@ -40,9 +40,13 @@

History

- <#list history as ie> - <@showIndexerEvent ie /> - + <#if history?has_content > + <#list history as ie> + <@showIndexerEvent ie /> + + <#else> + +
Event Status Since Counts
Search indexer history is not available.
From d37c41cf650a93a823b6b86936ded7331ea9630b Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Tue, 20 Jan 2015 17:21:09 -0500 Subject: [PATCH 4/7] VIVO-869 Improve throttling for indexing tasks. Start the implementation in "paused" state, so tasks submitted before startup() are queued. Increase the time interval on IndexingChangeListener, so we get larger batches. Change the RejectedExecutionHandler on the pool to CallerRunsPolicy, so if there is no available thread for a work unit then the thread of the task itself will run it. --- .../modules/searchIndexer/SearchIndexer.java | 19 ++++++++++++++++++- .../searchindex/IndexingChangeListener.java | 2 +- .../webapp/searchindex/SearchIndexerImpl.java | 18 ++++++++++++++---- .../searchindex/SearchIndexerSetup.java | 3 ++- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java index 724d2de41..79c7e04ce 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java @@ -12,6 +12,9 @@ import edu.cornell.mannlib.vitro.webapp.modules.Application; /** * Interface for the code that controls the contents of the search index. * + * If calls are made to schedule tasks prior to startup(), they will be queued, + * since the indexer is created in paused mode. + * * The only calls that are valid after shutdown are shutdown(), getStatus() and * removeListener(). */ @@ -26,6 +29,8 @@ public interface SearchIndexer extends Application.Module { * We accumulate a batch of affected URIs, removing duplicates if they * occur, and then submit them for updates. * + * If called before startup or while paused, this task will be queued. + * * @param urls * if null or empty, this call has no effect. * @throws IllegalStateException @@ -43,6 +48,8 @@ public interface SearchIndexer extends Application.Module { * A URI belongs in the index if it refers to an existing individual in the * model, and is not excluded. * + * If called before startup or while paused, this task will be queued. + * * @param uris * if null or empty, this call has no effect. * @throws IllegalStateException @@ -57,6 +64,8 @@ public interface SearchIndexer extends Application.Module { * If a rebuild is already pending or in progress, this method has no * effect. * + * If called before startup or while paused, this task will be queued. + * * @throws IllegalStateException * if called after shutdown() */ @@ -66,6 +75,11 @@ public interface SearchIndexer extends Application.Module { * Stop processing new tasks. Requests will be queued until a call to * unpause(). * + * The SearchIndexer is paused when created. When fully initialized, it + * should be unpaused. + * + * If already paused, this call has no effect. + * * @throws IllegalStateException * if called after shutdown() */ @@ -75,7 +89,10 @@ public interface SearchIndexer extends Application.Module { * Resume processing new tasks. Any requests that were received since the * call to pause() will now be scheduled for processing. * - * Has no effect if called after shutdown(). + * The SearchIndexer is paused when created. When fully initialized, it + * should be unpaused. + * + * Has no effect if called after shutdown() or if not paused. */ void unpause(); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java index b9012189c..958a6419a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java @@ -160,7 +160,7 @@ public class IndexingChangeListener implements ChangeListener { if (queue.isShutdown()) { log.warn("Attempt to start ticker after shutdown request."); } else { - queue.schedule(new TickerResponse(), 500, TimeUnit.MILLISECONDS); + queue.schedule(new TickerResponse(), 1, TimeUnit.SECONDS); running = true; } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java index 727652447..8091b71d9 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java @@ -263,8 +263,8 @@ public class SearchIndexerImpl implements SearchIndexer { if (status.getState() != State.SHUTDOWN) { listeners.fireEvent(new Event(SHUTDOWN_REQUESTED, status)); - pool.shutdown(); taskQueue.shutdown(); + pool.shutdown(); for (DocumentModifier dm : modifiers) { try { @@ -321,7 +321,7 @@ public class SearchIndexerImpl implements SearchIndexer { private static class Scheduler { private final TaskQueue taskQueue; private final List deferredQueue; - private volatile boolean paused; + private volatile boolean paused = true; public Scheduler(TaskQueue taskQueue) { this.taskQueue = taskQueue; @@ -461,6 +461,9 @@ public class SearchIndexerImpl implements SearchIndexer { * * The task is notified as each unit completes. * + * If no thread is available for a work unit, the thread of the task itself + * will run it. This provides automatic throttling. + * * Only one task is active at a time, so the task can simply wait until this * pool is idle to know that all of its units have completed. * @@ -474,14 +477,21 @@ public class SearchIndexerImpl implements SearchIndexer { this.pool = new ThreadPoolExecutor(threadPoolSize, threadPoolSize, 10, TimeUnit.SECONDS, new ArrayBlockingQueue(50), new VitroBackgroundThread.Factory( - "SearchIndexer_ThreadPool")); + "SearchIndexer_ThreadPool"), + new ThreadPoolExecutor.CallerRunsPolicy()); } public void submit(Runnable workUnit, Task task) { try { pool.execute(new WorkUnitWrapper(workUnit, task)); } catch (RejectedExecutionException e) { - log.warn("Work unit was rejected: " + workUnit + " for " + task); + if (pool.isShutdown()) { + log.warn("Work unit was rejected: " + workUnit + " for " + + task); + } else { + log.error("Work unit was rejected: " + workUnit + " for " + + task, e); + } } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java index 13fd9d33a..bb94ab607 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java @@ -32,7 +32,7 @@ import edu.cornell.mannlib.vitro.webapp.utils.developer.listeners.DeveloperDisab * Start the SearchIndexer. Create a listener on the RDFService and link it to * the indexer. * - * Create a history object as a listener and make it avaiable to the + * Create a history object as a listener and make it available to the * IndexController. * * Create a listener that will call commit() on the SearchEngine every time it @@ -75,6 +75,7 @@ public class SearchIndexerSetup implements ServletContextListener { searchIndexer .startup(app, new ComponentStartupStatusImpl(this, ss)); + searchIndexer.unpause(); ss.info(this, "Setup of search indexer completed."); } catch (RDFServiceException e) { From d5a497774ebf82b35bd0d89f07b4b626d0672398 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Tue, 20 Jan 2015 17:22:43 -0500 Subject: [PATCH 5/7] VIVO-869 Don't operate on null URIs. If a UriFinder finds a null URI, issue a warning message. If a null URI is submitted to UpdateUrisTask, ignore it. --- .../indexing/IndexingUriFinderListBasic.java | 15 ++++++++++++++- .../webapp/searchindex/tasks/UpdateUrisTask.java | 2 ++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListBasic.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListBasic.java index c45567122..f29e60651 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListBasic.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListBasic.java @@ -9,12 +9,18 @@ import java.util.HashSet; import java.util.List; import java.util.Set; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import com.hp.hpl.jena.rdf.model.Statement; /** * The basic implementation. */ public class IndexingUriFinderListBasic implements IndexingUriFinderList { + private static final Log log = LogFactory + .getLog(IndexingUriFinderListBasic.class); + private final List finders; public IndexingUriFinderListBasic( @@ -40,7 +46,14 @@ public class IndexingUriFinderListBasic implements IndexingUriFinderList { public Set findAdditionalUris(Statement stmt) { Set uris = new HashSet<>(); for (IndexingUriFinder uriFinder : finders) { - uris.addAll(uriFinder.findAdditionalURIsToIndex(stmt)); + List additions = uriFinder.findAdditionalURIsToIndex(stmt); + for (String addition : additions) { + if (addition == null) { + log.warn("Finder " + uriFinder + " returned a null URI."); + } else { + uris.add(addition); + } + } } return uris; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java index 3deb8c6ee..9afa10249 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java @@ -83,6 +83,8 @@ public class UpdateUrisTask implements Task { if (isInterrupted()) { log.info("Interrupted: " + status.getSearchIndexerStatus()); break; + } else if (uri == null) { + // Nothing to do } else { Individual ind = getIndividual(uri); if (ind == null) { From 7d16e103577f87467c8f8b2691a01a4cbf713b83 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Tue, 20 Jan 2015 18:08:52 -0500 Subject: [PATCH 6/7] VIVO-945 Produce more concise log messages when a failure is caused by shutdown. --- .../modules/searchEngine/SearchEngine.java | 7 ++- .../SearchEngineNotRespondingException.java | 27 ++++++++++++ .../searchengine/solr/SolrSearchEngine.java | 44 +++++++++++++------ .../searchindex/SearchIndexerSetup.java | 4 ++ .../searchindex/tasks/RebuildIndexTask.java | 10 ++++- .../tasks/UpdateDocumentWorkUnit.java | 4 ++ .../searchindex/tasks/UpdateUrisTask.java | 7 +++ .../utils/searchengine/SearchQueryUtils.java | 3 -- 8 files changed, 88 insertions(+), 18 deletions(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngineNotRespondingException.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngine.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngine.java index b44d77586..c347fb7bf 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngine.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngine.java @@ -9,6 +9,11 @@ import edu.cornell.mannlib.vitro.webapp.modules.Application; /** * The principle interface for the SearchEngine. All search-related objects are * created by these methods. + * + * All methods that throw SearchEngineException should attempt to distinguish + * whether the exception is caused because the SearchEngine is not responding. + * In that case, they should throw a SearchEngineNotRespondingException, so the + * client code can choose to respond accordingly. */ public interface SearchEngine extends Application.Module { @@ -86,7 +91,7 @@ public interface SearchEngine extends Application.Module { * Query the search index and return the results. Response is never null. */ SearchResponse query(SearchQuery query) throws SearchEngineException; - + /** * Find the number of documents in the search index. */ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngineNotRespondingException.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngineNotRespondingException.java new file mode 100644 index 000000000..255921538 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchEngine/SearchEngineNotRespondingException.java @@ -0,0 +1,27 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.modules.searchEngine; + +/** + * Indicates that a request to the SearchEngine has timed out, or given some + * other indication that no response will be coming. + */ +public class SearchEngineNotRespondingException extends SearchEngineException { + + public SearchEngineNotRespondingException() { + super(); + } + + public SearchEngineNotRespondingException(String message) { + super(message); + } + + public SearchEngineNotRespondingException(Throwable cause) { + super(cause); + } + + public SearchEngineNotRespondingException(String message, Throwable cause) { + super(message, cause); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/solr/SolrSearchEngine.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/solr/SolrSearchEngine.java index 848de8acf..7f9114a38 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/solr/SolrSearchEngine.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchengine/solr/SolrSearchEngine.java @@ -3,6 +3,7 @@ package edu.cornell.mannlib.vitro.webapp.searchengine.solr; import java.io.IOException; +import java.net.SocketTimeoutException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -19,6 +20,7 @@ import edu.cornell.mannlib.vitro.webapp.modules.Application; import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineNotRespondingException; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse; @@ -72,8 +74,8 @@ public class SolrSearchEngine implements SearchEngine { try { server.ping(); } catch (SolrServerException | IOException e) { - throw new SearchEngineException( - "Solr server did not respont to ping.", e); + throw appropriateException("Solr server did not respond to ping.", + e); } } @@ -93,8 +95,8 @@ public class SolrSearchEngine implements SearchEngine { try { server.add(SolrConversionUtils.convertToSolrInputDocuments(docs)); } catch (SolrServerException | IOException e) { - throw new SearchEngineException( - "Solr server failed to add documents " + docs, e); + throw appropriateException("Solr server failed to add documents " + + docs, e); } } @@ -103,8 +105,7 @@ public class SolrSearchEngine implements SearchEngine { try { server.commit(); } catch (SolrServerException | IOException e) { - throw new SearchEngineException("Failed to commit to Solr server.", - e); + throw appropriateException("Failed to commit to Solr server.", e); } } @@ -113,8 +114,7 @@ public class SolrSearchEngine implements SearchEngine { try { server.commit(wait, wait); } catch (SolrServerException | IOException e) { - throw new SearchEngineException("Failed to commit to Solr server.", - e); + throw appropriateException("Failed to commit to Solr server.", e); } } @@ -128,7 +128,7 @@ public class SolrSearchEngine implements SearchEngine { try { server.deleteById(new ArrayList<>(ids)); } catch (SolrServerException | IOException e) { - throw new SearchEngineException( + throw appropriateException( "Solr server failed to delete documents: " + ids, e); } } @@ -138,7 +138,7 @@ public class SolrSearchEngine implements SearchEngine { try { server.deleteByQuery(query); } catch (SolrServerException | IOException e) { - throw new SearchEngineException( + throw appropriateException( "Solr server failed to delete documents: " + query, e); } } @@ -162,14 +162,32 @@ public class SolrSearchEngine implements SearchEngine { QueryResponse response = server.query(solrQuery); return SolrConversionUtils.convertToSearchResponse(response); } catch (SolrServerException e) { - throw new SearchEngineException( + throw appropriateException( "Solr server failed to execute the query" + query, e); } } @Override public int documentCount() throws SearchEngineException { - SearchResponse response = query(createQuery("*:*")); - return (int) response.getResults().getNumFound(); + SearchResponse response = query(createQuery("*:*")); + return (int) response.getResults().getNumFound(); } + + /** + * If there is a SocketTimeoutException in the causal chain for this + * exception, then wrap it in a SearchEngineNotRespondingException instead + * of a generic SearchEngineException. + */ + private SearchEngineException appropriateException(String message, + Exception e) { + Throwable cause = e; + while (cause != null) { + if (cause instanceof SocketTimeoutException) { + return new SearchEngineNotRespondingException(message, e); + } + cause = cause.getCause(); + } + return new SearchEngineException(message, e); + } + } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java index bb94ab607..ad7d9f177 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java @@ -15,6 +15,7 @@ import org.apache.commons.logging.LogFactory; import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils; import edu.cornell.mannlib.vitro.webapp.modules.Application; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineNotRespondingException; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type; @@ -118,6 +119,9 @@ public class SearchIndexerSetup implements ServletContextListener { private void commitChanges() { try { searchEngine.commit(); + } catch (SearchEngineNotRespondingException e) { + log.error("Failed to commit the changes: " + + "the search engine is not responding."); } catch (Exception e) { log.error("Failed to commit the changes.", e); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java index bf1d0e3ac..60be6dc7e 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java @@ -17,6 +17,7 @@ import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils; import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineNotRespondingException; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.RebuildCounts; @@ -105,6 +106,9 @@ public class RebuildIndexTask implements Task { try { searchEngine.deleteByQuery(query); searchEngine.commit(); + } catch (SearchEngineNotRespondingException e) { + log.warn("Failed to delete outdated documents from the search index: " + + "the search engine is not responding."); } catch (SearchEngineException e) { log.warn("Failed to delete outdated documents " + "from the search index", e); @@ -114,8 +118,12 @@ public class RebuildIndexTask implements Task { private int getDocumentCount() { try { return searchEngine.documentCount(); + } catch (SearchEngineNotRespondingException e) { + log.warn("Failed to get document count from the search index: " + + "the search engine is not responding."); + return 0; } catch (SearchEngineException e) { - log.warn("Failed to get docoument count from the search index.", e); + log.warn("Failed to get document count from the search index.", e); return 0; } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java index ffaf6dcb6..c0840f05b 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java @@ -31,6 +31,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement; import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineNotRespondingException; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; @@ -65,6 +66,9 @@ public class UpdateDocumentWorkUnit implements Runnable { modifiers.modifyDocument(ind, doc); addIndexedTime(doc); searchEngine.add(doc); + } catch (SearchEngineNotRespondingException e) { + log.warn("Failed to add '" + ind + "' to the search index: " + + "the search engine is not responding."); } catch (Exception e) { log.warn("Failed to add '" + ind + "' to the search index.", e); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java index 9afa10249..9634616f5 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java @@ -21,6 +21,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineNotRespondingException; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.UriCounts; @@ -131,6 +132,9 @@ public class UpdateUrisTask implements Task { searchEngine.deleteById(SearchIndexerUtils.getIdForUri(uri)); status.incrementDeletes(); log.debug("deleted '" + uri + "' from search index."); + } catch (SearchEngineNotRespondingException e) { + log.warn("Failed to delete '" + uri + "' from search index: " + + "the search engine is not responding."); } catch (Exception e) { log.warn("Failed to delete '" + uri + "' from search index", e); } @@ -142,6 +146,9 @@ public class UpdateUrisTask implements Task { searchEngine.deleteById(SearchIndexerUtils.getIdForUri(uri)); status.incrementExclusions(); log.debug("excluded '" + uri + "' from search index."); + } catch (SearchEngineNotRespondingException e) { + log.warn("Failed to exclude '" + uri + "' from search index: " + + "the search engine is not responding.", e); } catch (Exception e) { log.warn("Failed to exclude '" + uri + "' from search index", e); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/searchengine/SearchQueryUtils.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/searchengine/SearchQueryUtils.java index 3ab74ccf6..3c61a06fa 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/searchengine/SearchQueryUtils.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/searchengine/SearchQueryUtils.java @@ -13,10 +13,7 @@ import org.apache.commons.logging.LogFactory; import edu.cornell.mannlib.vitro.webapp.application.ApplicationUtils; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; -import edu.cornell.mannlib.vitro.webapp.controller.freemarker.IndividualListQueryResults; -import edu.cornell.mannlib.vitro.webapp.dao.IndividualDao; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; -import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery.Order; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse; From d382e0efd66c8a692e01441c16dde7b5717e3e37 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Wed, 21 Jan 2015 12:30:44 -0500 Subject: [PATCH 7/7] VIVO-869 Don't submit updates while indexer is paused. When the pause flag is set, the IndexingChangeListener will continue to accumulate changes even if there are gaps in the stream. The listener will not submit a task until the indexer is unpaused. --- webapp/config/example.developer.properties | 2 +- .../modules/searchIndexer/SearchIndexer.java | 13 +++++--- .../searchindex/IndexingChangeListener.java | 33 ++++++++++++++++--- .../webapp/searchindex/SearchIndexerImpl.java | 5 +++ .../searchindex/SearchIndexerSetup.java | 4 ++- 5 files changed, 46 insertions(+), 11 deletions(-) diff --git a/webapp/config/example.developer.properties b/webapp/config/example.developer.properties index e0046ca8f..dd108909f 100644 --- a/webapp/config/example.developer.properties +++ b/webapp/config/example.developer.properties @@ -68,7 +68,7 @@ # developer.searchIndex.showDocuments = false # developer.searchIndex.uriOrNameRestriction = .* # developer.searchIndex.documentRestriction = .* -# developer.searchIndex.logIndexingBreakdownTimings = .* +# developer.searchIndex.logIndexingBreakdownTimings = false # developer.searchIndex.suppressModelChangeListener = false # developer.searchDeletions.enable = false diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java index 79c7e04ce..8780e892a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/modules/searchIndexer/SearchIndexer.java @@ -29,7 +29,7 @@ public interface SearchIndexer extends Application.Module { * We accumulate a batch of affected URIs, removing duplicates if they * occur, and then submit them for updates. * - * If called before startup or while paused, this task will be queued. + * If called before startup or while paused, this task will be queued. * * @param urls * if null or empty, this call has no effect. @@ -48,7 +48,7 @@ public interface SearchIndexer extends Application.Module { * A URI belongs in the index if it refers to an existing individual in the * model, and is not excluded. * - * If called before startup or while paused, this task will be queued. + * If called before startup or while paused, this task will be queued. * * @param uris * if null or empty, this call has no effect. @@ -64,7 +64,7 @@ public interface SearchIndexer extends Application.Module { * If a rebuild is already pending or in progress, this method has no * effect. * - * If called before startup or while paused, this task will be queued. + * If called before startup or while paused, this task will be queued. * * @throws IllegalStateException * if called after shutdown() @@ -73,7 +73,7 @@ public interface SearchIndexer extends Application.Module { /** * Stop processing new tasks. Requests will be queued until a call to - * unpause(). + * unpause(). Fires a PAUSED event to listeners. * * The SearchIndexer is paused when created. When fully initialized, it * should be unpaused. @@ -87,7 +87,8 @@ public interface SearchIndexer extends Application.Module { /** * Resume processing new tasks. Any requests that were received since the - * call to pause() will now be scheduled for processing. + * call to pause() will now be scheduled for processing. Fires an UNPAUSED + * event to listeners. * * The SearchIndexer is paused when created. When fully initialized, it * should be unpaused. @@ -149,6 +150,8 @@ public interface SearchIndexer extends Application.Module { public enum Type { STARTUP, PROGRESS, + PAUSE, UNPAUSE, + START_PROCESSING_URIS, STOP_PROCESSING_URIS, START_PROCESSING_STATEMENTS, STOP_PROCESSING_STATEMENTS, diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java index 958a6419a..8f66d6ff7 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/IndexingChangeListener.java @@ -2,6 +2,9 @@ package edu.cornell.mannlib.vitro.webapp.searchindex; +import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.PAUSE; +import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.UNPAUSE; + import java.io.ByteArrayInputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; @@ -20,6 +23,7 @@ import com.hp.hpl.jena.rdf.model.StmtIterator; import edu.cornell.mannlib.vitro.webapp.dao.jena.event.EditEvent; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer; +import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event; import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeListener; import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread; @@ -27,13 +31,18 @@ import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread; * When a change is heard, wait for an interval to see if more changes come in. * When changes stop coming in for a specified interval, send what has * accumulated. + * + * When the SearchIndexer pauses, stop sending changes until the SearchIndexer + * unpauses. */ -public class IndexingChangeListener implements ChangeListener { +public class IndexingChangeListener implements ChangeListener, + SearchIndexer.Listener { private static final Log log = LogFactory .getLog(IndexingChangeListener.class); private final SearchIndexer searchIndexer; private final Ticker ticker; + private volatile boolean paused = true; /** All access to the list must be synchronized. */ private final List changes; @@ -42,16 +51,32 @@ public class IndexingChangeListener implements ChangeListener { this.searchIndexer = searchIndexer; this.ticker = new Ticker(); this.changes = new ArrayList<>(); + + searchIndexer.addListener(this); } private synchronized void noteChange(Statement stmt) { changes.add(stmt); - ticker.start(); + if (!paused) { + ticker.start(); + } + } + + @Override + public void receiveSearchIndexerEvent(Event event) { + if (event.getType() == PAUSE) { + paused = true; + } else if (event.getType() == UNPAUSE) { + paused = false; + respondToTicker(); + } } private synchronized void respondToTicker() { - searchIndexer.scheduleUpdatesForStatements(changes); - changes.clear(); + if (!paused && !changes.isEmpty()) { + searchIndexer.scheduleUpdatesForStatements(changes); + changes.clear(); + } } public void shutdown() { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java index 8091b71d9..3062ca6f5 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java @@ -3,9 +3,11 @@ package edu.cornell.mannlib.vitro.webapp.searchindex; import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY; +import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.PAUSE; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_COMPLETE; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_REQUESTED; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STARTUP; +import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.UNPAUSE; import static edu.cornell.mannlib.vitro.webapp.utils.developer.Key.SEARCH_INDEX_LOG_INDEXING_BREAKDOWN_TIMINGS; import static edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel.IDLE; import static edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel.WORKING; @@ -39,6 +41,7 @@ import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess; import edu.cornell.mannlib.vitro.webapp.modules.Application; import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer; +import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; @@ -235,11 +238,13 @@ public class SearchIndexerImpl implements SearchIndexer { @Override public void pause() { scheduler.pause(); + listeners.fireEvent(new Event(PAUSE, getStatus())); } @Override public void unpause() { scheduler.unpause(); + listeners.fireEvent(new Event(UNPAUSE, getStatus())); } @Override diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java index ad7d9f177..394ba688a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerSetup.java @@ -61,9 +61,11 @@ public class SearchIndexerSetup implements ServletContextListener { searchIndexer = app.getSearchIndexer(); listener = new IndexingChangeListener(searchIndexer); - + + // Wrap it so it can be disabled by a developer flag. listenerWrapper = new DeveloperDisabledChangeListener(listener, Key.SEARCH_INDEX_SUPPRESS_MODEL_CHANGE_LISTENER); + RDFServiceUtils.getRDFServiceFactory(ctx).registerListener( listenerWrapper);