VIVO-871 Produce timing breakdowns from a developer option

This commit is contained in:
Jim Blake 2015-01-13 16:03:16 -05:00
parent 3fd72a7110
commit 982e81aece
20 changed files with 747 additions and 94 deletions

View file

@ -68,6 +68,8 @@
# developer.searchIndex.showDocuments = false # developer.searchIndex.showDocuments = false
# developer.searchIndex.uriOrNameRestriction = .* # developer.searchIndex.uriOrNameRestriction = .*
# developer.searchIndex.documentRestriction = .* # developer.searchIndex.documentRestriction = .*
# developer.searchIndex.logIndexingBreakdownTimings = .*
# developer.searchIndex.suppressModelChangeListener = false
# developer.searchDeletions.enable = false # developer.searchDeletions.enable = false

View file

@ -6,6 +6,9 @@ import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames.DISPLAY;
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_COMPLETE; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_COMPLETE;
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_REQUESTED; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.SHUTDOWN_REQUESTED;
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STARTUP; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STARTUP;
import static edu.cornell.mannlib.vitro.webapp.utils.developer.Key.SEARCH_INDEX_LOG_INDEXING_BREAKDOWN_TIMINGS;
import static edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel.IDLE;
import static edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel.WORKING;
import static java.util.concurrent.TimeUnit.MINUTES; import static java.util.concurrent.TimeUnit.MINUTES;
import java.util.ArrayList; import java.util.ArrayList;
@ -39,14 +42,25 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer;
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus;
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierListBasic;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierListDeveloper;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderListBasic;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderListDeveloper;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder; import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderList;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListBasic;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListDeveloper;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateStatementsTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateStatementsTask;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader; import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoaderException; import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoaderException;
import edu.cornell.mannlib.vitro.webapp.utils.developer.DeveloperSettings;
import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread; import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread;
import edu.cornell.mannlib.vitro.webapp.utils.threads.VitroBackgroundThread.WorkLevel;
/** /**
* An implementation of the SearchIndexer interface. * An implementation of the SearchIndexer interface.
@ -113,13 +127,14 @@ public class SearchIndexerImpl implements SearchIndexer {
if (changes == null || changes.isEmpty()) { if (changes == null || changes.isEmpty()) {
return; return;
} }
if (taskQueue.isShutdown()) { if (taskQueue.isShutdown()) {
throw new IllegalStateException("SearchIndexer is shut down."); throw new IllegalStateException("SearchIndexer is shut down.");
} }
Task task = new UpdateStatementsTask(changes, uriFinders, excluders, Task task = new UpdateStatementsTask(changes, createFindersList(),
modifiers, wadf.getIndividualDao(), listeners, pool); createExcludersList(), createModifiersList(),
wadf.getIndividualDao(), listeners, pool);
scheduler.scheduleTask(task); scheduler.scheduleTask(task);
log.debug("Scheduled updates for " + changes.size() + " statements."); log.debug("Scheduled updates for " + changes.size() + " statements.");
} }
@ -129,13 +144,13 @@ public class SearchIndexerImpl implements SearchIndexer {
if (uris == null || uris.isEmpty()) { if (uris == null || uris.isEmpty()) {
return; return;
} }
if (taskQueue.isShutdown()) { if (taskQueue.isShutdown()) {
throw new IllegalStateException("SearchIndexer is shut down."); throw new IllegalStateException("SearchIndexer is shut down.");
} }
Task task = new UpdateUrisTask(uris, excluders, modifiers, Task task = new UpdateUrisTask(uris, createExcludersList(),
wadf.getIndividualDao(), listeners, pool); createModifiersList(), wadf.getIndividualDao(), listeners, pool);
scheduler.scheduleTask(task); scheduler.scheduleTask(task);
log.debug("Scheduled updates for " + uris.size() + " uris."); log.debug("Scheduled updates for " + uris.size() + " uris.");
} }
@ -145,13 +160,42 @@ public class SearchIndexerImpl implements SearchIndexer {
if (taskQueue.isShutdown()) { if (taskQueue.isShutdown()) {
throw new IllegalStateException("SearchIndexer is shut down."); throw new IllegalStateException("SearchIndexer is shut down.");
} }
Task task = new RebuildIndexTask(excluders, modifiers, Task task = new RebuildIndexTask(createExcludersList(),
wadf.getIndividualDao(), listeners, pool); createModifiersList(), wadf.getIndividualDao(), listeners, pool);
scheduler.scheduleTask(task); scheduler.scheduleTask(task);
log.debug("Scheduled a full rebuild."); log.debug("Scheduled a full rebuild.");
} }
private SearchIndexExcluderList createExcludersList() {
if (isDeveloperOptionSet()) {
return new SearchIndexExcluderListDeveloper(excluders);
} else {
return new SearchIndexExcluderListBasic(excluders);
}
}
private DocumentModifierList createModifiersList() {
if (isDeveloperOptionSet()) {
return new DocumentModifierListDeveloper(modifiers);
} else {
return new DocumentModifierListBasic(modifiers);
}
}
private IndexingUriFinderList createFindersList() {
if (isDeveloperOptionSet()) {
return new IndexingUriFinderListDeveloper(uriFinders);
} else {
return new IndexingUriFinderListBasic(uriFinders);
}
}
private boolean isDeveloperOptionSet() {
return DeveloperSettings.getInstance().getBoolean(
SEARCH_INDEX_LOG_INDEXING_BREAKDOWN_TIMINGS);
}
@Override @Override
public void pause() { public void pause() {
scheduler.pause(); scheduler.pause();
@ -327,11 +371,22 @@ public class SearchIndexerImpl implements SearchIndexer {
@Override @Override
public void run() { public void run() {
current.set(new QueueStatus(task)); current.set(new QueueStatus(task));
setWorkLevel(WORKING);
log.debug("starting task: " + task); log.debug("starting task: " + task);
task.run(); task.run();
current.set(new QueueStatus(SearchIndexerStatus.idle())); current.set(new QueueStatus(SearchIndexerStatus.idle()));
setWorkLevel(IDLE);
log.debug("ended task: " + task); log.debug("ended task: " + task);
} }
private void setWorkLevel(WorkLevel level) {
if (Thread.currentThread() instanceof VitroBackgroundThread) {
((VitroBackgroundThread) Thread.currentThread())
.setWorkLevel(level);
}
}
} }
/** Either a specific status or a task to interrogate. */ /** Either a specific status or a task to interrogate. */
@ -429,12 +484,23 @@ public class SearchIndexerImpl implements SearchIndexer {
@Override @Override
public void run() { public void run() {
try { try {
setWorkLevel(WORKING);
workUnit.run(); workUnit.run();
setWorkLevel(IDLE);
} finally { } finally {
task.notifyWorkUnitCompletion(workUnit); task.notifyWorkUnitCompletion(workUnit);
} }
} }
private void setWorkLevel(WorkLevel level) {
if (Thread.currentThread() instanceof VitroBackgroundThread) {
((VitroBackgroundThread) Thread.currentThread())
.setWorkLevel(level);
}
}
} }
} }

View file

@ -0,0 +1,40 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
/**
* An ordered list of DocumentModifier objects, in a handy package.
*
* Implementations should make a protective copy of the list of
* DocumentModifiers. Implementations must be thread-safe.
*
* The life-cycle is:
*
* <pre>
* startIndexing(),
* 0 or more modifyDocument() by multiple threads,
* stopIndexing().
* </pre>
*/
public interface DocumentModifierList {
/**
* Do any required setup on the individual modifiers.
*/
void startIndexing();
/**
* Do any required teardown on the individual modifiers.
*/
void stopIndexing();
/**
* Exercise the list of modifiers, making changes to this document based on
* this individual.
*/
void modifyDocument(Individual ind, SearchInputDocument doc);
}

View file

@ -0,0 +1,42 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
/**
* The basic implementation.
*/
public class DocumentModifierListBasic implements DocumentModifierList {
private final List<DocumentModifier> modifiers;
public DocumentModifierListBasic(
Collection<? extends DocumentModifier> modifiers) {
this.modifiers = Collections
.unmodifiableList(new ArrayList<>(modifiers));
}
@Override
public void startIndexing() {
// Nothing to do.
}
@Override
public void stopIndexing() {
// Nothing to do.
}
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
for (DocumentModifier m : modifiers) {
m.modifyDocument(ind, doc);
}
}
}

View file

@ -0,0 +1,101 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
/**
* An implementation that accumulates timing figures for each modifier and
* writes them to the log.
*
* Note that this must be thread-safe.
*/
public class DocumentModifierListDeveloper implements DocumentModifierList {
private static final Log log = LogFactory
.getLog(DocumentModifierListDeveloper.class);
private final List<ModifierTiming> timings;
private final AtomicInteger count = new AtomicInteger();
public DocumentModifierListDeveloper(
Collection<? extends DocumentModifier> modifiers) {
List<ModifierTiming> list = new ArrayList<>();
for (DocumentModifier modifier : modifiers) {
list.add(new ModifierTiming(modifier));
}
this.timings = Collections.unmodifiableList(list);
}
@Override
public void startIndexing() {
// Nothing to do.
}
/**
* Each time a modifier is run, accumulate the timings for it.
*/
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
count.incrementAndGet();
for (ModifierTiming timing : timings) {
long startTime = System.currentTimeMillis();
timing.getModifier().modifyDocument(ind, doc);
timing.addElapsedTime(System.currentTimeMillis() - startTime);
}
}
/**
* Write the timings to the log.
*/
@Override
public void stopIndexing() {
String message = String.format(
"Timings for %d modifiers after %d calls:", timings.size(),
count.get());
for (ModifierTiming timing : timings) {
int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / count.get();
message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
count.get(), totalSeconds, average,
timing.getModifier());
}
log.info(message);
}
private static class ModifierTiming {
private final DocumentModifier modifier;
private final AtomicLong totalElapsedMillis = new AtomicLong();
public ModifierTiming(DocumentModifier modifier) {
this.modifier = modifier;
}
public DocumentModifier getModifier() {
return modifier;
}
public int getTotal() {
return (int) totalElapsedMillis.get();
}
public void addElapsedTime(long elapsed) {
totalElapsedMillis.addAndGet(elapsed);
}
}
}

View file

@ -0,0 +1,41 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
/**
* An ordered list of SearxchIndexExcluder objects, in a handy package.
*
* Implementations should make a protective copy of the list of
* SearxchIndexExcluders. Implementations must be thread-safe.
*
* The life-cycle is:
*
* <pre>
* startIndexing(),
* 0 or more isExcluded() by multiple threads,
* stopIndexing().
* </pre>
*/
public interface SearchIndexExcluderList {
/**
* Do any required setup on the individual Excluders.
*/
void startIndexing();
/**
* Do any required teardown on the individual Excluders.
*/
void stopIndexing();
/**
* Poll the list of excluders regarding this individual.
*
* If any returns non-null, the individual is excluded. If all return null,
* the individual is not excluded.
*/
boolean isExcluded(Individual ind);
}

View file

@ -0,0 +1,51 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
/**
* The basic implementation.
*/
public class SearchIndexExcluderListBasic implements SearchIndexExcluderList {
private static final Log log = LogFactory
.getLog(SearchIndexExcluderListBasic.class);
private final List<SearchIndexExcluder> excluders;
public SearchIndexExcluderListBasic(
Collection<? extends SearchIndexExcluder> excluders) {
this.excluders = Collections
.unmodifiableList(new ArrayList<>(excluders));
}
@Override
public void startIndexing() {
// Nothing to do.
}
@Override
public void stopIndexing() {
// Nothing to do.
}
@Override
public boolean isExcluded(Individual ind) {
for (SearchIndexExcluder excluder : excluders) {
String message = excluder.checkForExclusion(ind);
if (message != SearchIndexExcluder.DONT_EXCLUDE) {
log.debug("Excluded " + ind + " because " + message);
return true;
}
}
return false;
}
}

View file

@ -0,0 +1,122 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
/**
* An implementation that accumulates timing figures for each excluder and
* writes them to the log.
*
* Note that this must be thread-safe.
*/
public class SearchIndexExcluderListDeveloper implements
SearchIndexExcluderList {
private static final Log log = LogFactory
.getLog(SearchIndexExcluderListDeveloper.class);
private final List<ExcluderTiming> timings;
private final AtomicInteger count = new AtomicInteger();
public SearchIndexExcluderListDeveloper(
Collection<? extends SearchIndexExcluder> excluders) {
List<ExcluderTiming> list = new ArrayList<>();
for (SearchIndexExcluder excluder : excluders) {
list.add(new ExcluderTiming(excluder));
}
this.timings = Collections.unmodifiableList(list);
}
@Override
public void startIndexing() {
// Nothing to do.
}
/**
* Each time a excluder is run, accumulate the timings for it. Note that
* those at the top of the list will run more times than those at the
* bottom.
*/
@Override
public boolean isExcluded(Individual ind) {
count.incrementAndGet();
for (ExcluderTiming timing : timings) {
long startTime = System.currentTimeMillis();
String message = timing.getExcluder().checkForExclusion(ind);
timing.incrementCount();
timing.addElapsedTime(System.currentTimeMillis() - startTime);
if (message != SearchIndexExcluder.DONT_EXCLUDE) {
log.debug("Excluded " + ind + " because " + message);
return true;
}
}
return false;
}
/**
* Write the timings to the log.
*/
@Override
public void stopIndexing() {
String message = String.format(
"Timings for %d excluders after %d calls:", timings.size(),
count.get());
for (ExcluderTiming timing : timings) {
int thisCount = timing.getCount();
int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / thisCount;
message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
thisCount, totalSeconds, average,
timing.getExcluder());
}
log.info(message);
}
private static class ExcluderTiming {
private final SearchIndexExcluder excluder;
private final AtomicInteger invocationCount = new AtomicInteger();
private final AtomicLong totalElapsedMillis = new AtomicLong();
public ExcluderTiming(SearchIndexExcluder excluder) {
this.excluder = excluder;
}
public SearchIndexExcluder getExcluder() {
return excluder;
}
public void incrementCount() {
invocationCount.incrementAndGet();
}
public int getTotal() {
return (int) totalElapsedMillis.get();
}
public int getCount() {
return invocationCount.get();
}
public void addElapsedTime(long elapsed) {
totalElapsedMillis.addAndGet(elapsed);
}
}
}

View file

@ -0,0 +1,41 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.indexing;
import java.util.Set;
import com.hp.hpl.jena.rdf.model.Statement;
/**
* An ordered list of IndexingUriFinder objects, in a handy package.
*
* Implementations should make a protective copy of the list of
* IndexingUriFinders. Implementations must be thread-safe.
*
* The life-cycle is:
*
* <pre>
* startIndexing(),
* 0 or more findAdditionalUris() by multiple threads,
* stopIndexing().
* </pre>
*/
public interface IndexingUriFinderList {
/**
* Do any required setup on the individual finders.
*/
void startIndexing();
/**
* Do any required teardown on the individual finders.
*/
void stopIndexing();
/**
* Exercise the list of finders, and return a set of the URIs that they
* found for this statement.
*/
Set<String> findAdditionalUris(Statement stmt);
}

View file

@ -0,0 +1,48 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.indexing;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.hp.hpl.jena.rdf.model.Statement;
/**
* The basic implementation.
*/
public class IndexingUriFinderListBasic implements IndexingUriFinderList {
private final List<IndexingUriFinder> finders;
public IndexingUriFinderListBasic(
Collection<? extends IndexingUriFinder> finders) {
this.finders = Collections.synchronizedList(new ArrayList<>(finders));
}
@Override
public void startIndexing() {
for (IndexingUriFinder finder : finders) {
finder.startIndexing();
}
}
@Override
public void stopIndexing() {
for (IndexingUriFinder finder : finders) {
finder.endIndexing();
}
}
@Override
public Set<String> findAdditionalUris(Statement stmt) {
Set<String> uris = new HashSet<>();
for (IndexingUriFinder uriFinder : finders) {
uris.addAll(uriFinder.findAdditionalURIsToIndex(stmt));
}
return uris;
}
}

View file

@ -0,0 +1,115 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.indexing;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.rdf.model.Statement;
/**
* An implementation that accumulates timing figures for each finder and writes
* them to the log.
*
* Note that this must be thread-safe.
*/
public class IndexingUriFinderListDeveloper implements IndexingUriFinderList {
private static final Log log = LogFactory
.getLog(IndexingUriFinderListDeveloper.class);
private final List<FinderTiming> timings;
private final AtomicInteger count = new AtomicInteger();
public IndexingUriFinderListDeveloper(
Collection<? extends IndexingUriFinder> finders) {
List<FinderTiming> list = new ArrayList<>();
for (IndexingUriFinder finder : finders) {
list.add(new FinderTiming(finder));
}
this.timings = Collections.unmodifiableList(list);
}
@Override
public void startIndexing() {
for (FinderTiming timing : timings) {
timing.getFinder().startIndexing();
}
}
/**
* Each time a finder is run, accumulate the timings for it.
*/
@Override
public Set<String> findAdditionalUris(Statement stmt) {
count.incrementAndGet();
Set<String> uris = new HashSet<>();
for (FinderTiming timing : timings) {
long startTime = System.currentTimeMillis();
uris.addAll(timing.getFinder().findAdditionalURIsToIndex(stmt));
timing.addElapsedTime(System.currentTimeMillis() - startTime);
}
return uris;
}
/**
* Write the timings to the log.
*/
@Override
public void stopIndexing() {
for (FinderTiming timing : timings) {
timing.getFinder().endIndexing();
}
String message = String.format(
"Timings for %d modifiers after %d calls:", timings.size(),
count.get());
for (FinderTiming timing : timings) {
int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / count.get();
message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
count.get(), totalSeconds, average,
timing.getFinder());
}
log.info(message);
}
// ----------------------------------------------------------------------
// Helper classes
// ----------------------------------------------------------------------
private static class FinderTiming {
private final IndexingUriFinder finder;
private final AtomicLong totalElapsedMillis = new AtomicLong();
public FinderTiming(IndexingUriFinder finder) {
this.finder = finder;
}
public IndexingUriFinder getFinder() {
return finder;
}
public int getTotal() {
return (int) totalElapsedMillis.get();
}
public void addElapsedTime(long elapsed) {
totalElapsedMillis.addAndGet(elapsed);
}
}
}

View file

@ -2,13 +2,12 @@
package edu.cornell.mannlib.vitro.webapp.searchindex.tasks; package edu.cornell.mannlib.vitro.webapp.searchindex.tasks;
import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Set; import java.util.Set;
import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.Statement;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder; import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderList;
/** /**
* Ask all of the URI Finders to find URIs that might be affected by this * Ask all of the URI Finders to find URIs that might be affected by this
@ -16,21 +15,19 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder;
*/ */
public class FindUrisForStatementWorkUnit implements Runnable { public class FindUrisForStatementWorkUnit implements Runnable {
private final Statement stmt; private final Statement stmt;
private final Collection<IndexingUriFinder> uriFinders; private final IndexingUriFinderList finders;
private final Set<String> uris; private final Set<String> uris;
public FindUrisForStatementWorkUnit(Statement stmt, public FindUrisForStatementWorkUnit(Statement stmt,
Collection<IndexingUriFinder> uriFinders) { IndexingUriFinderList finders) {
this.stmt = stmt; this.stmt = stmt;
this.uriFinders = uriFinders; this.finders = finders;
this.uris = new HashSet<>(); this.uris = new HashSet<>();
} }
@Override @Override
public void run() { public void run() {
for (IndexingUriFinder uriFinder : uriFinders) { uris.addAll(finders.findAdditionalUris(stmt));
uris.addAll(uriFinder.findAdditionalURIsToIndex(stmt));
}
} }
public Statement getStatement() { public Statement getStatement() {

View file

@ -7,10 +7,8 @@ import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndex
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State.REBUILDING; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State.REBUILDING;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -26,8 +24,8 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatu
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList;
/** /**
* Get the URIs of all individuals in the model. Update each of their search * Get the URIs of all individuals in the model. Update each of their search
@ -40,8 +38,8 @@ public class RebuildIndexTask implements Task {
private static final Log log = LogFactory.getLog(RebuildIndexTask.class); private static final Log log = LogFactory.getLog(RebuildIndexTask.class);
private final IndividualDao indDao; private final IndividualDao indDao;
private final List<SearchIndexExcluder> excluders; private final SearchIndexExcluderList excluders;
private final List<DocumentModifier> modifiers; private final DocumentModifierList modifiers;
private final ListenerList listeners; private final ListenerList listeners;
private final WorkerThreadPool pool; private final WorkerThreadPool pool;
private final SearchEngine searchEngine; private final SearchEngine searchEngine;
@ -51,11 +49,11 @@ public class RebuildIndexTask implements Task {
private volatile SearchIndexerStatus status; private volatile SearchIndexerStatus status;
public RebuildIndexTask(Collection<SearchIndexExcluder> excluders, public RebuildIndexTask(SearchIndexExcluderList excluders,
Collection<DocumentModifier> modifiers, IndividualDao indDao, DocumentModifierList modifiers, IndividualDao indDao,
ListenerList listeners, WorkerThreadPool pool) { ListenerList listeners, WorkerThreadPool pool) {
this.excluders = new ArrayList<>(excluders); this.excluders = excluders;
this.modifiers = new ArrayList<>(modifiers); this.modifiers = modifiers;
this.indDao = indDao; this.indDao = indDao;
this.listeners = listeners; this.listeners = listeners;
this.pool = pool; this.pool = pool;

View file

@ -13,8 +13,6 @@ import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE; import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI; import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List; import java.util.List;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
@ -34,7 +32,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
public class UpdateDocumentWorkUnit implements Runnable { public class UpdateDocumentWorkUnit implements Runnable {
private static final Log log = LogFactory private static final Log log = LogFactory
@ -45,16 +43,15 @@ public class UpdateDocumentWorkUnit implements Runnable {
private static final String URI_RDFS_LABEL = RDFS.label.getURI(); private static final String URI_RDFS_LABEL = RDFS.label.getURI();
private final Individual ind; private final Individual ind;
private final List<DocumentModifier> modifiers; private final DocumentModifierList modifiers;
private final SearchEngine searchEngine; private final SearchEngine searchEngine;
public UpdateDocumentWorkUnit(Individual ind, public UpdateDocumentWorkUnit(Individual ind, DocumentModifierList modifiers) {
Collection<DocumentModifier> modifiers) {
this.ind = ind; this.ind = ind;
this.modifiers = new ArrayList<>(modifiers); this.modifiers = modifiers;
this.searchEngine = ApplicationUtils.instance().getSearchEngine(); this.searchEngine = ApplicationUtils.instance().getSearchEngine();
} }
public Individual getInd() { public Individual getInd() {
return ind; return ind;
} }
@ -72,9 +69,7 @@ public class UpdateDocumentWorkUnit implements Runnable {
addDataPropertyText(doc); addDataPropertyText(doc);
addEntityBoost(doc); addEntityBoost(doc);
for (DocumentModifier modifier : modifiers) { modifiers.modifyDocument(ind, doc);
modifier.modifyDocument(ind, doc);
}
addIndexedTime(doc); addIndexedTime(doc);
@ -170,10 +165,10 @@ public class UpdateDocumentWorkUnit implements Runnable {
} }
private void addEntityBoost(SearchInputDocument doc) { private void addEntityBoost(SearchInputDocument doc) {
Float boost = ind.getSearchBoost(); Float boost = ind.getSearchBoost();
if(boost != null && ! boost.equals(0.0F)) { if (boost != null && !boost.equals(0.0F)) {
doc.setDocumentBoost(boost); doc.setDocumentBoost(boost);
} }
} }
private void addIndexedTime(SearchInputDocument doc) { private void addIndexedTime(SearchInputDocument doc) {

View file

@ -26,9 +26,9 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatu
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinder; import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderList;
/** /**
* Receive a collection of statements that have been added to the model, or * Receive a collection of statements that have been added to the model, or
@ -53,9 +53,9 @@ public class UpdateStatementsTask implements Task {
.getLog(UpdateStatementsTask.class); .getLog(UpdateStatementsTask.class);
private final List<Statement> changes; private final List<Statement> changes;
private final Set<IndexingUriFinder> uriFinders; private final IndexingUriFinderList uriFinders;
private final Set<SearchIndexExcluder> excluders; private final SearchIndexExcluderList excluders;
private final Set<DocumentModifier> modifiers; private final DocumentModifierList modifiers;
private final IndividualDao indDao; private final IndividualDao indDao;
private final ListenerList listeners; private final ListenerList listeners;
private final WorkerThreadPool pool; private final WorkerThreadPool pool;
@ -64,10 +64,9 @@ public class UpdateStatementsTask implements Task {
private final Status status; private final Status status;
public UpdateStatementsTask(List<Statement> changes, public UpdateStatementsTask(List<Statement> changes,
Set<IndexingUriFinder> uriFinders, IndexingUriFinderList uriFinders,
Set<SearchIndexExcluder> excluders, SearchIndexExcluderList excluders, DocumentModifierList modifiers,
Set<DocumentModifier> modifiers, IndividualDao indDao, IndividualDao indDao, ListenerList listeners, WorkerThreadPool pool) {
ListenerList listeners, WorkerThreadPool pool) {
this.changes = new ArrayList<>(changes); this.changes = new ArrayList<>(changes);
this.uriFinders = uriFinders; this.uriFinders = uriFinders;
this.excluders = excluders; this.excluders = excluders;
@ -94,7 +93,8 @@ public class UpdateStatementsTask implements Task {
} }
private void findAffectedUris() { private void findAffectedUris() {
tellFindersWeAreStarting(); log.debug("Tell finders we are starting.");
uriFinders.startIndexing();
for (Statement stmt : changes) { for (Statement stmt : changes) {
if (isInterrupted()) { if (isInterrupted()) {
@ -106,21 +106,8 @@ public class UpdateStatementsTask implements Task {
} }
waitForWorkUnitsToComplete(); waitForWorkUnitsToComplete();
tellFindersWeAreStopping();
}
private void tellFindersWeAreStarting() {
log.debug("Tell finders we are starting.");
for (IndexingUriFinder uriFinder : uriFinders) {
uriFinder.startIndexing();
}
}
private void tellFindersWeAreStopping() {
log.debug("Tell finders we are stopping."); log.debug("Tell finders we are stopping.");
for (IndexingUriFinder uriFinder : uriFinders) { uriFinders.stopIndexing();
uriFinder.endIndexing();
}
} }
private boolean isInterrupted() { private boolean isInterrupted() {

View file

@ -7,7 +7,6 @@ import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndex
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STOP_PROCESSING_URIS; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexer.Event.Type.STOP_PROCESSING_URIS;
import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State.PROCESSING_URIS; import static edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerStatus.State.PROCESSING_URIS;
import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Date; import java.util.Date;
import java.util.HashSet; import java.util.HashSet;
@ -30,8 +29,8 @@ import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerList;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder; import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList;
/** /**
* Given a list of URIs, remove the ones that don't belong in the index and * Given a list of URIs, remove the ones that don't belong in the index and
@ -49,8 +48,8 @@ public class UpdateUrisTask implements Task {
private final Set<String> uris; private final Set<String> uris;
private final IndividualDao indDao; private final IndividualDao indDao;
private final List<SearchIndexExcluder> excluders; private final SearchIndexExcluderList excluders;
private final List<DocumentModifier> modifiers; private final DocumentModifierList modifiers;
private final ListenerList listeners; private final ListenerList listeners;
private final WorkerThreadPool pool; private final WorkerThreadPool pool;
@ -58,12 +57,11 @@ public class UpdateUrisTask implements Task {
private final SearchEngine searchEngine; private final SearchEngine searchEngine;
public UpdateUrisTask(Collection<String> uris, public UpdateUrisTask(Collection<String> uris,
Collection<SearchIndexExcluder> excluders, SearchIndexExcluderList excluders, DocumentModifierList modifiers,
Collection<DocumentModifier> modifiers, IndividualDao indDao, IndividualDao indDao, ListenerList listeners, WorkerThreadPool pool) {
ListenerList listeners, WorkerThreadPool pool) {
this.uris = new HashSet<>(uris); this.uris = new HashSet<>(uris);
this.excluders = new ArrayList<>(excluders); this.excluders = excluders;
this.modifiers = new ArrayList<>(modifiers); this.modifiers = modifiers;
this.indDao = indDao; this.indDao = indDao;
this.listeners = listeners; this.listeners = listeners;
this.pool = pool; this.pool = pool;
@ -77,6 +75,9 @@ public class UpdateUrisTask implements Task {
public void run() { public void run() {
listeners.fireEvent(new Event(START_PROCESSING_URIS, status listeners.fireEvent(new Event(START_PROCESSING_URIS, status
.getSearchIndexerStatus())); .getSearchIndexerStatus()));
excluders.startIndexing();
modifiers.startIndexing();
for (String uri : uris) { for (String uri : uris) {
if (isInterrupted()) { if (isInterrupted()) {
log.info("Interrupted: " + status.getSearchIndexerStatus()); log.info("Interrupted: " + status.getSearchIndexerStatus());
@ -92,6 +93,9 @@ public class UpdateUrisTask implements Task {
} }
pool.waitUntilIdle(); pool.waitUntilIdle();
commitChanges(); commitChanges();
excluders.stopIndexing();
modifiers.stopIndexing();
listeners.fireEvent(new Event(STOP_PROCESSING_URIS, status listeners.fireEvent(new Event(STOP_PROCESSING_URIS, status
.getSearchIndexerStatus())); .getSearchIndexerStatus()));
} }
@ -131,14 +135,7 @@ public class UpdateUrisTask implements Task {
} }
private boolean isExcluded(Individual ind) { private boolean isExcluded(Individual ind) {
for (SearchIndexExcluder excluder : excluders) { return excluders.isExcluded(ind);
String message = excluder.checkForExclusion(ind);
if (message != SearchIndexExcluder.DONT_EXCLUDE) {
log.debug("Excluded " + ind + " because " + message);
return true;
}
}
return false;
} }
/** A delete is fast enough to be done synchronously. */ /** A delete is fast enough to be done synchronously. */

View file

@ -110,6 +110,13 @@ public enum Key {
SEARCH_INDEX_DOCUMENT_RESTRICTION( SEARCH_INDEX_DOCUMENT_RESTRICTION(
"developer.searchIndex.documentRestriction", false), "developer.searchIndex.documentRestriction", false),
/**
* Accumulate breakdown timings for search indexing, and log them at the end
* of the indexing operation.
*/
SEARCH_INDEX_LOG_INDEXING_BREAKDOWN_TIMINGS(
"developer.searchIndex.logIndexingBreakdownTimings", true),
/** /**
* If set, don't pass model change events to the search indexer. * If set, don't pass model change events to the search indexer.
*/ */

View file

@ -54,6 +54,7 @@ function DeveloperPanel(developerAjaxUrl) {
document.getElementById("developer_i18n_logStringRequests").disabled = !developerEnabled; document.getElementById("developer_i18n_logStringRequests").disabled = !developerEnabled;
document.getElementById("developer_loggingRDFService_enable").disabled = !developerEnabled; document.getElementById("developer_loggingRDFService_enable").disabled = !developerEnabled;
document.getElementById("developer_searchIndex_enable").disabled = !developerEnabled; document.getElementById("developer_searchIndex_enable").disabled = !developerEnabled;
document.getElementById("developer_searchIndex_logIndexingBreakdownTimings").disabled = !developerEnabled;
document.getElementById("developer_searchIndex_suppressModelChangeListener").disabled = !developerEnabled; document.getElementById("developer_searchIndex_suppressModelChangeListener").disabled = !developerEnabled;
document.getElementById("developer_searchDeletions_enable").disabled = !developerEnabled; document.getElementById("developer_searchDeletions_enable").disabled = !developerEnabled;
document.getElementById("developer_searchEngine_enable").disabled = !developerEnabled; document.getElementById("developer_searchEngine_enable").disabled = !developerEnabled;

View file

@ -68,11 +68,9 @@
<#elseif countsType == "STATEMENT_COUNTS"> <#elseif countsType == "STATEMENT_COUNTS">
Processed: ${counts.processed}, remaining: ${counts.remaining}, total: ${counts.total} Processed: ${counts.processed}, remaining: ${counts.remaining}, total: ${counts.total}
<#elseif countsType == "REBUILD_COUNTS"> <#elseif countsType == "REBUILD_COUNTS">
Number of document before rebuild: ${counts.documentsBefore}, after rebuild: Number of document before rebuild: ${counts.documentsBefore}
<#if counts.documentsAfter == 0> <#if counts.documentsAfter != 0>
UNKNOWN - after rebuild: ${counts.documentsAfter}
<#else>
${counts.documentsAfter}
</#if> </#if>
</#if> </#if>
</#macro> </#macro>

View file

@ -129,9 +129,13 @@
<@showTextbox "developer_searchIndex_documentRestriction", <@showTextbox "developer_searchIndex_documentRestriction",
"Restrict by document contents" /> "Restrict by document contents" />
</div> </div>
<@showCheckbox "developer_searchIndex_logIndexingBreakdownTimings",
"Log breakdown timings for indexing operation." />
<@showCheckbox "developer_searchDeletions_enable", "Log deletions." /> <@showCheckbox "developer_searchDeletions_enable", "Log deletions." />
<@showCheckbox "developer_searchIndex_suppressModelChangeListener", <div class="container">
"Suppress the automatic indexing of changed triples." /> <@showCheckbox "developer_searchIndex_suppressModelChangeListener",
"Suppress the automatic indexing of changed triples." />
</div>
</div> </div>
</div> </div>
</div> </div>