diff --git a/webapp/config/example.developer.properties b/webapp/config/example.developer.properties index f90be6be8..e0046ca8f 100644 --- a/webapp/config/example.developer.properties +++ b/webapp/config/example.developer.properties @@ -68,6 +68,8 @@ # developer.searchIndex.showDocuments = false # developer.searchIndex.uriOrNameRestriction = .* # developer.searchIndex.documentRestriction = .* +# developer.searchIndex.logIndexingBreakdownTimings = .* +# developer.searchIndex.suppressModelChangeListener = false # developer.searchDeletions.enable = false diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java index 24fa1b2ea..c92e26f4f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/SearchIndexerImpl.java @@ -6,6 +6,9 @@ import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_COMPLETE; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_REQUESTED; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STARTUP; +import static edu.cornell.mannlib.vitro.webapp.utils.developer.Key.SEARCH_INDEX_LOG_INDEXING_BREAKDOWN_TIMINGS; +import static edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel.IDLE; +import static edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel.WORKING; import static java.util.concurrent.TimeUnit.MINUTES; import java.util.ArrayList; @@ -39,14 +42,25 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; +import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList; +import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierListBasic; +import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierListDeveloper; import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; +import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList; +import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderListBasic; +import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderListDeveloper; import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder; +import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderList; +import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListBasic; +import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListDeveloper; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateStatementsTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask; import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader; import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoaderException; +import edu.cornell.mannlib.vitro.webapp.utils.developer.DeveloperSettings; import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread; +import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel; /** * An implementation of the SearchIndexer interface. @@ -113,13 +127,14 @@ public class SearchIndexerImpl implements SearchIndexer { if (changes == null || changes.isEmpty()) { return; } - + if (taskQueue.isShutdown()) { throw new IllegalStateException("SearchIndexer is shut down."); } - - Task task = new UpdateStatementsTask(changes, uriFinders, excluders, - modifiers, wadf.getIndividualDao(), listeners, pool); + + Task task = new UpdateStatementsTask(changes, createFindersList(), + createExcludersList(), createModifiersList(), + wadf.getIndividualDao(), listeners, pool); scheduler.scheduleTask(task); log.debug("Scheduled updates for " + changes.size() + " statements."); } @@ -129,13 +144,13 @@ public class SearchIndexerImpl implements SearchIndexer { if (uris == null || uris.isEmpty()) { return; } - + if (taskQueue.isShutdown()) { throw new IllegalStateException("SearchIndexer is shut down."); } - - Task task = new UpdateUrisTask(uris, excluders, modifiers, - wadf.getIndividualDao(), listeners, pool); + + Task task = new UpdateUrisTask(uris, createExcludersList(), + createModifiersList(), wadf.getIndividualDao(), listeners, pool); scheduler.scheduleTask(task); log.debug("Scheduled updates for " + uris.size() + " uris."); } @@ -145,13 +160,42 @@ public class SearchIndexerImpl implements SearchIndexer { if (taskQueue.isShutdown()) { throw new IllegalStateException("SearchIndexer is shut down."); } - - Task task = new RebuildIndexTask(excluders, modifiers, - wadf.getIndividualDao(), listeners, pool); + + Task task = new RebuildIndexTask(createExcludersList(), + createModifiersList(), wadf.getIndividualDao(), listeners, pool); scheduler.scheduleTask(task); log.debug("Scheduled a full rebuild."); } + private SearchIndexExcluderList createExcludersList() { + if (isDeveloperOptionSet()) { + return new SearchIndexExcluderListDeveloper(excluders); + } else { + return new SearchIndexExcluderListBasic(excluders); + } + } + + private DocumentModifierList createModifiersList() { + if (isDeveloperOptionSet()) { + return new DocumentModifierListDeveloper(modifiers); + } else { + return new DocumentModifierListBasic(modifiers); + } + } + + private IndexingUriFinderList createFindersList() { + if (isDeveloperOptionSet()) { + return new IndexingUriFinderListDeveloper(uriFinders); + } else { + return new IndexingUriFinderListBasic(uriFinders); + } + } + + private boolean isDeveloperOptionSet() { + return DeveloperSettings.getInstance().getBoolean( + SEARCH_INDEX_LOG_INDEXING_BREAKDOWN_TIMINGS); + } + @Override public void pause() { scheduler.pause(); @@ -327,11 +371,22 @@ public class SearchIndexerImpl implements SearchIndexer { @Override public void run() { current.set(new QueueStatus(task)); + setWorkLevel(WORKING); log.debug("starting task: " + task); + task.run(); + current.set(new QueueStatus(SearchIndexerStatus.idle())); + setWorkLevel(IDLE); log.debug("ended task: " + task); } + + private void setWorkLevel(WorkLevel level) { + if (Thread.currentThread() instanceof VitroBackgroundThread) { + ((VitroBackgroundThread) Thread.currentThread()) + .setWorkLevel(level); + } + } } /** Either a specific status or a task to interrogate. */ @@ -429,12 +484,23 @@ public class SearchIndexerImpl implements SearchIndexer { @Override public void run() { try { + setWorkLevel(WORKING); + workUnit.run(); + + setWorkLevel(IDLE); } finally { task.notifyWorkUnitCompletion(workUnit); } } + private void setWorkLevel(WorkLevel level) { + if (Thread.currentThread() instanceof VitroBackgroundThread) { + ((VitroBackgroundThread) Thread.currentThread()) + .setWorkLevel(level); + } + } + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierList.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierList.java new file mode 100644 index 000000000..247edda00 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierList.java @@ -0,0 +1,40 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; + +/** + * An ordered list of DocumentModifier objects, in a handy package. + * + * Implementations should make a protective copy of the list of + * DocumentModifiers. Implementations must be thread-safe. + * + * The life-cycle is: + * + *
+ * startIndexing(), 
+ * 0 or more modifyDocument() by multiple threads,
+ * stopIndexing().
+ * 
+ */ +public interface DocumentModifierList { + + /** + * Do any required setup on the individual modifiers. + */ + void startIndexing(); + + /** + * Do any required teardown on the individual modifiers. + */ + void stopIndexing(); + + /** + * Exercise the list of modifiers, making changes to this document based on + * this individual. + */ + void modifyDocument(Individual ind, SearchInputDocument doc); + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierListBasic.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierListBasic.java new file mode 100644 index 000000000..251197d27 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierListBasic.java @@ -0,0 +1,42 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; + +/** + * The basic implementation. + */ +public class DocumentModifierListBasic implements DocumentModifierList { + private final List modifiers; + + public DocumentModifierListBasic( + Collection modifiers) { + this.modifiers = Collections + .unmodifiableList(new ArrayList<>(modifiers)); + } + + @Override + public void startIndexing() { + // Nothing to do. + } + + @Override + public void stopIndexing() { + // Nothing to do. + } + + @Override + public void modifyDocument(Individual ind, SearchInputDocument doc) { + for (DocumentModifier m : modifiers) { + m.modifyDocument(ind, doc); + } + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierListDeveloper.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierListDeveloper.java new file mode 100644 index 000000000..1c4082231 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/DocumentModifierListDeveloper.java @@ -0,0 +1,101 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; + +/** + * An implementation that accumulates timing figures for each modifier and + * writes them to the log. + * + * Note that this must be thread-safe. + */ +public class DocumentModifierListDeveloper implements DocumentModifierList { + private static final Log log = LogFactory + .getLog(DocumentModifierListDeveloper.class); + + private final List timings; + private final AtomicInteger count = new AtomicInteger(); + + public DocumentModifierListDeveloper( + Collection modifiers) { + List list = new ArrayList<>(); + for (DocumentModifier modifier : modifiers) { + list.add(new ModifierTiming(modifier)); + } + this.timings = Collections.unmodifiableList(list); + } + + @Override + public void startIndexing() { + // Nothing to do. + } + + /** + * Each time a modifier is run, accumulate the timings for it. + */ + @Override + public void modifyDocument(Individual ind, SearchInputDocument doc) { + count.incrementAndGet(); + + for (ModifierTiming timing : timings) { + long startTime = System.currentTimeMillis(); + timing.getModifier().modifyDocument(ind, doc); + timing.addElapsedTime(System.currentTimeMillis() - startTime); + } + } + + /** + * Write the timings to the log. + */ + @Override + public void stopIndexing() { + String message = String.format( + "Timings for %d modifiers after %d calls:", timings.size(), + count.get()); + for (ModifierTiming timing : timings) { + int totalMillis = timing.getTotal(); + float totalSeconds = totalMillis / 1000.0F; + int average = totalMillis / count.get(); + message += String + .format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s", + count.get(), totalSeconds, average, + timing.getModifier()); + } + log.info(message); + } + + private static class ModifierTiming { + private final DocumentModifier modifier; + private final AtomicLong totalElapsedMillis = new AtomicLong(); + + public ModifierTiming(DocumentModifier modifier) { + this.modifier = modifier; + } + + public DocumentModifier getModifier() { + return modifier; + } + + public int getTotal() { + return (int) totalElapsedMillis.get(); + } + + public void addElapsedTime(long elapsed) { + totalElapsedMillis.addAndGet(elapsed); + } + + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderList.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderList.java new file mode 100644 index 000000000..de563bbb2 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderList.java @@ -0,0 +1,41 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; + +/** + * An ordered list of SearxchIndexExcluder objects, in a handy package. + * + * Implementations should make a protective copy of the list of + * SearxchIndexExcluders. Implementations must be thread-safe. + * + * The life-cycle is: + * + *
+ * startIndexing(), 
+ * 0 or more isExcluded() by multiple threads, 
+ * stopIndexing().
+ * 
+ */ +public interface SearchIndexExcluderList { + + /** + * Do any required setup on the individual Excluders. + */ + void startIndexing(); + + /** + * Do any required teardown on the individual Excluders. + */ + void stopIndexing(); + + /** + * Poll the list of excluders regarding this individual. + * + * If any returns non-null, the individual is excluded. If all return null, + * the individual is not excluded. + */ + boolean isExcluded(Individual ind); + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderListBasic.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderListBasic.java new file mode 100644 index 000000000..19bd117f2 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderListBasic.java @@ -0,0 +1,51 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; + +/** + * The basic implementation. + */ +public class SearchIndexExcluderListBasic implements SearchIndexExcluderList { + private static final Log log = LogFactory + .getLog(SearchIndexExcluderListBasic.class); + + private final List excluders; + + public SearchIndexExcluderListBasic( + Collection excluders) { + this.excluders = Collections + .unmodifiableList(new ArrayList<>(excluders)); + } + + @Override + public void startIndexing() { + // Nothing to do. + } + + @Override + public void stopIndexing() { + // Nothing to do. + } + + @Override + public boolean isExcluded(Individual ind) { + for (SearchIndexExcluder excluder : excluders) { + String message = excluder.checkForExclusion(ind); + if (message != SearchIndexExcluder.DONT_EXCLUDE) { + log.debug("Excluded " + ind + " because " + message); + return true; + } + } + return false; + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderListDeveloper.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderListDeveloper.java new file mode 100644 index 000000000..259b01573 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/SearchIndexExcluderListDeveloper.java @@ -0,0 +1,122 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; + +/** + * An implementation that accumulates timing figures for each excluder and + * writes them to the log. + * + * Note that this must be thread-safe. + */ +public class SearchIndexExcluderListDeveloper implements + SearchIndexExcluderList { + private static final Log log = LogFactory + .getLog(SearchIndexExcluderListDeveloper.class); + + private final List timings; + private final AtomicInteger count = new AtomicInteger(); + + public SearchIndexExcluderListDeveloper( + Collection excluders) { + + List list = new ArrayList<>(); + for (SearchIndexExcluder excluder : excluders) { + list.add(new ExcluderTiming(excluder)); + } + this.timings = Collections.unmodifiableList(list); + } + + @Override + public void startIndexing() { + // Nothing to do. + } + + /** + * Each time a excluder is run, accumulate the timings for it. Note that + * those at the top of the list will run more times than those at the + * bottom. + */ + @Override + public boolean isExcluded(Individual ind) { + count.incrementAndGet(); + + for (ExcluderTiming timing : timings) { + long startTime = System.currentTimeMillis(); + + String message = timing.getExcluder().checkForExclusion(ind); + + timing.incrementCount(); + timing.addElapsedTime(System.currentTimeMillis() - startTime); + + if (message != SearchIndexExcluder.DONT_EXCLUDE) { + log.debug("Excluded " + ind + " because " + message); + return true; + } + } + return false; + } + + /** + * Write the timings to the log. + */ + @Override + public void stopIndexing() { + String message = String.format( + "Timings for %d excluders after %d calls:", timings.size(), + count.get()); + for (ExcluderTiming timing : timings) { + int thisCount = timing.getCount(); + int totalMillis = timing.getTotal(); + float totalSeconds = totalMillis / 1000.0F; + int average = totalMillis / thisCount; + message += String + .format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s", + thisCount, totalSeconds, average, + timing.getExcluder()); + } + log.info(message); + } + + private static class ExcluderTiming { + private final SearchIndexExcluder excluder; + private final AtomicInteger invocationCount = new AtomicInteger(); + private final AtomicLong totalElapsedMillis = new AtomicLong(); + + public ExcluderTiming(SearchIndexExcluder excluder) { + this.excluder = excluder; + } + + public SearchIndexExcluder getExcluder() { + return excluder; + } + + public void incrementCount() { + invocationCount.incrementAndGet(); + } + + public int getTotal() { + return (int) totalElapsedMillis.get(); + } + + public int getCount() { + return invocationCount.get(); + } + + public void addElapsedTime(long elapsed) { + totalElapsedMillis.addAndGet(elapsed); + } + + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderList.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderList.java new file mode 100644 index 000000000..a1a4169c7 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderList.java @@ -0,0 +1,41 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.indexing; + +import java.util.Set; + +import com.hp.hpl.jena.rdf.model.Statement; + +/** + * An ordered list of IndexingUriFinder objects, in a handy package. + * + * Implementations should make a protective copy of the list of + * IndexingUriFinders. Implementations must be thread-safe. + * + * The life-cycle is: + * + *
+ * startIndexing(), 
+ * 0 or more findAdditionalUris() by multiple threads, 
+ * stopIndexing().
+ * 
+ */ +public interface IndexingUriFinderList { + + /** + * Do any required setup on the individual finders. + */ + void startIndexing(); + + /** + * Do any required teardown on the individual finders. + */ + void stopIndexing(); + + /** + * Exercise the list of finders, and return a set of the URIs that they + * found for this statement. + */ + Set findAdditionalUris(Statement stmt); + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListBasic.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListBasic.java new file mode 100644 index 000000000..c45567122 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListBasic.java @@ -0,0 +1,48 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.indexing; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import com.hp.hpl.jena.rdf.model.Statement; + +/** + * The basic implementation. + */ +public class IndexingUriFinderListBasic implements IndexingUriFinderList { + private final List finders; + + public IndexingUriFinderListBasic( + Collection finders) { + this.finders = Collections.synchronizedList(new ArrayList<>(finders)); + } + + @Override + public void startIndexing() { + for (IndexingUriFinder finder : finders) { + finder.startIndexing(); + } + } + + @Override + public void stopIndexing() { + for (IndexingUriFinder finder : finders) { + finder.endIndexing(); + } + } + + @Override + public Set findAdditionalUris(Statement stmt) { + Set uris = new HashSet<>(); + for (IndexingUriFinder uriFinder : finders) { + uris.addAll(uriFinder.findAdditionalURIsToIndex(stmt)); + } + return uris; + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListDeveloper.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListDeveloper.java new file mode 100644 index 000000000..807987abb --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/indexing/IndexingUriFinderListDeveloper.java @@ -0,0 +1,115 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.indexing; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.hp.hpl.jena.rdf.model.Statement; + +/** + * An implementation that accumulates timing figures for each finder and writes + * them to the log. + * + * Note that this must be thread-safe. + */ +public class IndexingUriFinderListDeveloper implements IndexingUriFinderList { + private static final Log log = LogFactory + .getLog(IndexingUriFinderListDeveloper.class); + + private final List timings; + private final AtomicInteger count = new AtomicInteger(); + + public IndexingUriFinderListDeveloper( + Collection finders) { + List list = new ArrayList<>(); + for (IndexingUriFinder finder : finders) { + list.add(new FinderTiming(finder)); + } + this.timings = Collections.unmodifiableList(list); + } + + @Override + public void startIndexing() { + for (FinderTiming timing : timings) { + timing.getFinder().startIndexing(); + } + } + + /** + * Each time a finder is run, accumulate the timings for it. + */ + @Override + public Set findAdditionalUris(Statement stmt) { + count.incrementAndGet(); + + Set uris = new HashSet<>(); + for (FinderTiming timing : timings) { + long startTime = System.currentTimeMillis(); + uris.addAll(timing.getFinder().findAdditionalURIsToIndex(stmt)); + timing.addElapsedTime(System.currentTimeMillis() - startTime); + } + return uris; + } + + /** + * Write the timings to the log. + */ + @Override + public void stopIndexing() { + for (FinderTiming timing : timings) { + timing.getFinder().endIndexing(); + } + + String message = String.format( + "Timings for %d modifiers after %d calls:", timings.size(), + count.get()); + for (FinderTiming timing : timings) { + int totalMillis = timing.getTotal(); + float totalSeconds = totalMillis / 1000.0F; + int average = totalMillis / count.get(); + message += String + .format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s", + count.get(), totalSeconds, average, + timing.getFinder()); + } + log.info(message); + + } + + // ---------------------------------------------------------------------- + // Helper classes + // ---------------------------------------------------------------------- + + private static class FinderTiming { + private final IndexingUriFinder finder; + private final AtomicLong totalElapsedMillis = new AtomicLong(); + + public FinderTiming(IndexingUriFinder finder) { + this.finder = finder; + } + + public IndexingUriFinder getFinder() { + return finder; + } + + public int getTotal() { + return (int) totalElapsedMillis.get(); + } + + public void addElapsedTime(long elapsed) { + totalElapsedMillis.addAndGet(elapsed); + } + + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/FindUrisForStatementWorkUnit.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/FindUrisForStatementWorkUnit.java index 06e6a0c7e..739a3604a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/FindUrisForStatementWorkUnit.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/FindUrisForStatementWorkUnit.java @@ -2,13 +2,12 @@ package edu.cornell.mannlib.vitro.webapp.searchindex.tasks; -import java.util.Collection; import java.util.HashSet; import java.util.Set; import com.hp.hpl.jena.rdf.model.Statement; -import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder; +import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderList; /** * Ask all of the URI Finders to find URIs that might be affected by this @@ -16,21 +15,19 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder; */ public class FindUrisForStatementWorkUnit implements Runnable { private final Statement stmt; - private final Collection uriFinders; + private final IndexingUriFinderList finders; private final Set uris; public FindUrisForStatementWorkUnit(Statement stmt, - Collection uriFinders) { + IndexingUriFinderList finders) { this.stmt = stmt; - this.uriFinders = uriFinders; + this.finders = finders; this.uris = new HashSet<>(); } @Override public void run() { - for (IndexingUriFinder uriFinder : uriFinders) { - uris.addAll(uriFinder.findAdditionalURIsToIndex(stmt)); - } + uris.addAll(finders.findAdditionalUris(stmt)); } public Statement getStatement() { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java index bc6cf9426..fd6b7be5d 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/RebuildIndexTask.java @@ -7,10 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndex import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State.REBUILDING; import java.text.SimpleDateFormat; -import java.util.ArrayList; import java.util.Collection; import java.util.Date; -import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -26,8 +24,8 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatu import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool; -import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; -import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; +import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList; +import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList; /** * Get the URIs of all individuals in the model. Update each of their search @@ -40,8 +38,8 @@ public class RebuildIndexTask implements Task { private static final Log log = LogFactory.getLog(RebuildIndexTask.class); private final IndividualDao indDao; - private final List excluders; - private final List modifiers; + private final SearchIndexExcluderList excluders; + private final DocumentModifierList modifiers; private final ListenerList listeners; private final WorkerThreadPool pool; private final SearchEngine searchEngine; @@ -51,11 +49,11 @@ public class RebuildIndexTask implements Task { private volatile SearchIndexerStatus status; - public RebuildIndexTask(Collection excluders, - Collection modifiers, IndividualDao indDao, + public RebuildIndexTask(SearchIndexExcluderList excluders, + DocumentModifierList modifiers, IndividualDao indDao, ListenerList listeners, WorkerThreadPool pool) { - this.excluders = new ArrayList<>(excluders); - this.modifiers = new ArrayList<>(modifiers); + this.excluders = excluders; + this.modifiers = modifiers; this.indDao = indDao; this.listeners = listeners; this.pool = pool; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java index 715e1717d..4367bee9a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateDocumentWorkUnit.java @@ -13,8 +13,6 @@ import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_ import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE; import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI; -import java.util.ArrayList; -import java.util.Collection; import java.util.List; import org.apache.commons.lang.StringUtils; @@ -34,7 +32,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils; -import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; +import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList; public class UpdateDocumentWorkUnit implements Runnable { private static final Log log = LogFactory @@ -45,16 +43,15 @@ public class UpdateDocumentWorkUnit implements Runnable { private static final String URI_RDFS_LABEL = RDFS.label.getURI(); private final Individual ind; - private final List modifiers; + private final DocumentModifierList modifiers; private final SearchEngine searchEngine; - public UpdateDocumentWorkUnit(Individual ind, - Collection modifiers) { + public UpdateDocumentWorkUnit(Individual ind, DocumentModifierList modifiers) { this.ind = ind; - this.modifiers = new ArrayList<>(modifiers); + this.modifiers = modifiers; this.searchEngine = ApplicationUtils.instance().getSearchEngine(); } - + public Individual getInd() { return ind; } @@ -72,9 +69,7 @@ public class UpdateDocumentWorkUnit implements Runnable { addDataPropertyText(doc); addEntityBoost(doc); - for (DocumentModifier modifier : modifiers) { - modifier.modifyDocument(ind, doc); - } + modifiers.modifyDocument(ind, doc); addIndexedTime(doc); @@ -170,10 +165,10 @@ public class UpdateDocumentWorkUnit implements Runnable { } private void addEntityBoost(SearchInputDocument doc) { - Float boost = ind.getSearchBoost(); - if(boost != null && ! boost.equals(0.0F)) { - doc.setDocumentBoost(boost); - } + Float boost = ind.getSearchBoost(); + if (boost != null && !boost.equals(0.0F)) { + doc.setDocumentBoost(boost); + } } private void addIndexedTime(SearchInputDocument doc) { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java index c72198eba..505c58fcb 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateStatementsTask.java @@ -26,9 +26,9 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatu import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool; -import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; -import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; -import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder; +import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList; +import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList; +import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderList; /** * Receive a collection of statements that have been added to the model, or @@ -53,9 +53,9 @@ public class UpdateStatementsTask implements Task { .getLog(UpdateStatementsTask.class); private final List changes; - private final Set uriFinders; - private final Set excluders; - private final Set modifiers; + private final IndexingUriFinderList uriFinders; + private final SearchIndexExcluderList excluders; + private final DocumentModifierList modifiers; private final IndividualDao indDao; private final ListenerList listeners; private final WorkerThreadPool pool; @@ -64,10 +64,9 @@ public class UpdateStatementsTask implements Task { private final Status status; public UpdateStatementsTask(List changes, - Set uriFinders, - Set excluders, - Set modifiers, IndividualDao indDao, - ListenerList listeners, WorkerThreadPool pool) { + IndexingUriFinderList uriFinders, + SearchIndexExcluderList excluders, DocumentModifierList modifiers, + IndividualDao indDao, ListenerList listeners, WorkerThreadPool pool) { this.changes = new ArrayList<>(changes); this.uriFinders = uriFinders; this.excluders = excluders; @@ -94,7 +93,8 @@ public class UpdateStatementsTask implements Task { } private void findAffectedUris() { - tellFindersWeAreStarting(); + log.debug("Tell finders we are starting."); + uriFinders.startIndexing(); for (Statement stmt : changes) { if (isInterrupted()) { @@ -106,21 +106,8 @@ public class UpdateStatementsTask implements Task { } waitForWorkUnitsToComplete(); - tellFindersWeAreStopping(); - } - - private void tellFindersWeAreStarting() { - log.debug("Tell finders we are starting."); - for (IndexingUriFinder uriFinder : uriFinders) { - uriFinder.startIndexing(); - } - } - - private void tellFindersWeAreStopping() { log.debug("Tell finders we are stopping."); - for (IndexingUriFinder uriFinder : uriFinders) { - uriFinder.endIndexing(); - } + uriFinders.stopIndexing(); } private boolean isInterrupted() { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java index c6e66f9e1..5b4997792 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/tasks/UpdateUrisTask.java @@ -7,7 +7,6 @@ import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndex import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STOP_PROCESSING_URIS; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State.PROCESSING_URIS; -import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.HashSet; @@ -30,8 +29,8 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool; -import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; -import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; +import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList; +import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList; /** * Given a list of URIs, remove the ones that don't belong in the index and @@ -49,8 +48,8 @@ public class UpdateUrisTask implements Task { private final Set uris; private final IndividualDao indDao; - private final List excluders; - private final List modifiers; + private final SearchIndexExcluderList excluders; + private final DocumentModifierList modifiers; private final ListenerList listeners; private final WorkerThreadPool pool; @@ -58,12 +57,11 @@ public class UpdateUrisTask implements Task { private final SearchEngine searchEngine; public UpdateUrisTask(Collection uris, - Collection excluders, - Collection modifiers, IndividualDao indDao, - ListenerList listeners, WorkerThreadPool pool) { + SearchIndexExcluderList excluders, DocumentModifierList modifiers, + IndividualDao indDao, ListenerList listeners, WorkerThreadPool pool) { this.uris = new HashSet<>(uris); - this.excluders = new ArrayList<>(excluders); - this.modifiers = new ArrayList<>(modifiers); + this.excluders = excluders; + this.modifiers = modifiers; this.indDao = indDao; this.listeners = listeners; this.pool = pool; @@ -77,6 +75,9 @@ public class UpdateUrisTask implements Task { public void run() { listeners.fireEvent(new Event(START_PROCESSING_URIS, status .getSearchIndexerStatus())); + excluders.startIndexing(); + modifiers.startIndexing(); + for (String uri : uris) { if (isInterrupted()) { log.info("Interrupted: " + status.getSearchIndexerStatus()); @@ -92,6 +93,9 @@ public class UpdateUrisTask implements Task { } pool.waitUntilIdle(); commitChanges(); + + excluders.stopIndexing(); + modifiers.stopIndexing(); listeners.fireEvent(new Event(STOP_PROCESSING_URIS, status .getSearchIndexerStatus())); } @@ -131,14 +135,7 @@ public class UpdateUrisTask implements Task { } private boolean isExcluded(Individual ind) { - for (SearchIndexExcluder excluder : excluders) { - String message = excluder.checkForExclusion(ind); - if (message != SearchIndexExcluder.DONT_EXCLUDE) { - log.debug("Excluded " + ind + " because " + message); - return true; - } - } - return false; + return excluders.isExcluded(ind); } /** A delete is fast enough to be done synchronously. */ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/developer/Key.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/developer/Key.java index 9dafcb266..73434c2aa 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/developer/Key.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/developer/Key.java @@ -110,6 +110,13 @@ public enum Key { SEARCH_INDEX_DOCUMENT_RESTRICTION( "developer.searchIndex.documentRestriction", false), + /** + * Accumulate breakdown timings for search indexing, and log them at the end + * of the indexing operation. + */ + SEARCH_INDEX_LOG_INDEXING_BREAKDOWN_TIMINGS( + "developer.searchIndex.logIndexingBreakdownTimings", true), + /** * If set, don't pass model change events to the search indexer. */ diff --git a/webapp/web/js/developer/developerPanel.js b/webapp/web/js/developer/developerPanel.js index 1c09a9cda..28d0e806e 100644 --- a/webapp/web/js/developer/developerPanel.js +++ b/webapp/web/js/developer/developerPanel.js @@ -54,6 +54,7 @@ function DeveloperPanel(developerAjaxUrl) { document.getElementById("developer_i18n_logStringRequests").disabled = !developerEnabled; document.getElementById("developer_loggingRDFService_enable").disabled = !developerEnabled; document.getElementById("developer_searchIndex_enable").disabled = !developerEnabled; + document.getElementById("developer_searchIndex_logIndexingBreakdownTimings").disabled = !developerEnabled; document.getElementById("developer_searchIndex_suppressModelChangeListener").disabled = !developerEnabled; document.getElementById("developer_searchDeletions_enable").disabled = !developerEnabled; document.getElementById("developer_searchEngine_enable").disabled = !developerEnabled; diff --git a/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl b/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl index 99219f30f..d4e97c3f0 100644 --- a/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl +++ b/webapp/web/templates/freemarker/body/admin/searchIndexStatus.ftl @@ -68,11 +68,9 @@ <#elseif countsType == "STATEMENT_COUNTS"> Processed: ${counts.processed}, remaining: ${counts.remaining}, total: ${counts.total} <#elseif countsType == "REBUILD_COUNTS"> - Number of document before rebuild: ${counts.documentsBefore}, after rebuild: - <#if counts.documentsAfter == 0> - UNKNOWN - <#else> - ${counts.documentsAfter} + Number of document before rebuild: ${counts.documentsBefore} + <#if counts.documentsAfter != 0> + - after rebuild: ${counts.documentsAfter} diff --git a/webapp/web/templates/freemarker/page/partials/developerPanel.ftl b/webapp/web/templates/freemarker/page/partials/developerPanel.ftl index 1390ee5ee..25a9983ff 100644 --- a/webapp/web/templates/freemarker/page/partials/developerPanel.ftl +++ b/webapp/web/templates/freemarker/page/partials/developerPanel.ftl @@ -129,9 +129,13 @@ <@showTextbox "developer_searchIndex_documentRestriction", "Restrict by document contents" /> + <@showCheckbox "developer_searchIndex_logIndexingBreakdownTimings", + "Log breakdown timings for indexing operation." /> <@showCheckbox "developer_searchDeletions_enable", "Log deletions." /> - <@showCheckbox "developer_searchIndex_suppressModelChangeListener", - "Suppress the automatic indexing of changed triples." /> +
+ <@showCheckbox "developer_searchIndex_suppressModelChangeListener", + "Suppress the automatic indexing of changed triples." /> +