VIVO-871 Break out more tasks to improve the timings

UpdateDocumentWorkUnit contains a list of required DocumentModifiers.
They are required but because they are in the list of DocumentModifiers, they are timed like the optional ones.

Similarly, UpdateUrisTask contains a required SearchIndexExcluder.
This commit is contained in:
Jim Blake 2015-01-16 11:34:31 -05:00
parent 0c477945a8
commit 27d3141bb6
6 changed files with 228 additions and 131 deletions

View file

@ -54,6 +54,7 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderLi
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListBasic; import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListBasic;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListDeveloper; import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListDeveloper;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateDocumentWorkUnit;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateStatementsTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateStatementsTask;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask; import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader; import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader;
@ -82,8 +83,8 @@ public class SearchIndexerImpl implements SearchIndexer {
private final WorkerThreadPool pool = new WorkerThreadPool(); private final WorkerThreadPool pool = new WorkerThreadPool();
private ServletContext ctx; private ServletContext ctx;
private Set<SearchIndexExcluder> excluders; private List<SearchIndexExcluder> excluders;
private Set<DocumentModifier> modifiers; private List<DocumentModifier> modifiers;
private Set<IndexingUriFinder> uriFinders; private Set<IndexingUriFinder> uriFinders;
private WebappDaoFactory wadf; private WebappDaoFactory wadf;
@ -107,9 +108,16 @@ public class SearchIndexerImpl implements SearchIndexer {
private void loadConfiguration() throws ConfigurationBeanLoaderException { private void loadConfiguration() throws ConfigurationBeanLoaderException {
ConfigurationBeanLoader beanLoader = new ConfigurationBeanLoader( ConfigurationBeanLoader beanLoader = new ConfigurationBeanLoader(
ModelAccess.on(ctx).getOntModel(DISPLAY), ctx); ModelAccess.on(ctx).getOntModel(DISPLAY), ctx);
excluders = beanLoader.loadAll(SearchIndexExcluder.class);
modifiers = beanLoader.loadAll(DocumentModifier.class);
uriFinders = beanLoader.loadAll(IndexingUriFinder.class); uriFinders = beanLoader.loadAll(IndexingUriFinder.class);
excluders = new ArrayList<>();
excluders.add(new UpdateUrisTask.ExcludeIfNoVClasses());
excluders.addAll(beanLoader.loadAll(SearchIndexExcluder.class));
modifiers = new ArrayList<>();
modifiers.addAll(new UpdateDocumentWorkUnit.MinimalDocumentModifiers()
.getList());
modifiers.addAll(beanLoader.loadAll(DocumentModifier.class));
} }
/** /**

View file

@ -67,7 +67,7 @@ public class DocumentModifierListDeveloper implements DocumentModifierList {
for (ModifierTiming timing : timings) { for (ModifierTiming timing : timings) {
int totalMillis = timing.getTotal(); int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F; float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / count.get(); int average = (count.get() == 0) ? 0 : totalMillis / count.get();
message += String message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s", .format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
count.get(), totalSeconds, average, count.get(), totalSeconds, average,

View file

@ -80,7 +80,7 @@ public class SearchIndexExcluderListDeveloper implements
int thisCount = timing.getCount(); int thisCount = timing.getCount();
int totalMillis = timing.getTotal(); int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F; float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / thisCount; int average = (thisCount == 0) ? 0 : totalMillis / thisCount;
message += String message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s", .format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
thisCount, totalSeconds, average, thisCount, totalSeconds, average,

View file

@ -71,12 +71,12 @@ public class IndexingUriFinderListDeveloper implements IndexingUriFinderList {
} }
String message = String.format( String message = String.format(
"Timings for %d modifiers after %d calls:", timings.size(), "Timings for %d URI finders after %d calls:", timings.size(),
count.get()); count.get());
for (FinderTiming timing : timings) { for (FinderTiming timing : timings) {
int totalMillis = timing.getTotal(); int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F; float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / count.get(); int average = (count.get() == 0) ? 0 : totalMillis / count.get();
message += String message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s", .format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
count.get(), totalSeconds, average, count.get(), totalSeconds, average,

View file

@ -13,6 +13,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE; import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI; import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI;
import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.StringUtils;
@ -32,6 +33,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils; import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
public class UpdateDocumentWorkUnit implements Runnable { public class UpdateDocumentWorkUnit implements Runnable {
@ -60,31 +62,74 @@ public class UpdateDocumentWorkUnit implements Runnable {
public void run() { public void run() {
try { try {
SearchInputDocument doc = searchEngine.createInputDocument(); SearchInputDocument doc = searchEngine.createInputDocument();
addIdAndUri(doc);
addLabel(doc);
addClasses(doc);
addMostSpecificTypes(doc);
addObjectPropertyText(doc);
addDataPropertyText(doc);
addEntityBoost(doc);
modifiers.modifyDocument(ind, doc); modifiers.modifyDocument(ind, doc);
addIndexedTime(doc); addIndexedTime(doc);
searchEngine.add(doc); searchEngine.add(doc);
} catch (Exception e) { } catch (Exception e) {
log.warn("Failed to add '" + ind + "' to the search index.", e); log.warn("Failed to add '" + ind + "' to the search index.", e);
} }
} }
private void addIdAndUri(SearchInputDocument doc) { private void addIndexedTime(SearchInputDocument doc) {
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis());
}
// ----------------------------------------------------------------------
// Helper classes
// ----------------------------------------------------------------------
/**
* These will be hardcoded at the beginning of the list of
* DocumentModifiers.
*/
public static class MinimalDocumentModifiers {
private final List<DocumentModifier> list;
public MinimalDocumentModifiers() {
this.list = Arrays.asList(new DocumentModifier[] {
new IdUriLabel(), new AddClasses(),
new AddMostSpecificTypes(), new AddObjectPropertyText(),
new AddDataPropertyText(), new AddEntityBoost() });
}
public List<DocumentModifier> getList() {
return list;
}
}
private abstract static class BaseDocumentModifier implements
DocumentModifier {
protected void addToAlltext(SearchInputDocument doc, String raw) {
if (StringUtils.isBlank(raw)) {
return;
}
String clean = Jsoup.parse(raw).text();
if (StringUtils.isBlank(clean)) {
return;
}
doc.addField(ALLTEXT, clean);
doc.addField(ALLTEXTUNSTEMMED, clean);
}
@Override
public void shutdown() {
// Nothing to do.
}
}
private static class IdUriLabel extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
addIdAndUri(ind, doc);
addLabel(ind, doc);
}
private void addIdAndUri(Individual ind, SearchInputDocument doc) {
doc.addField(DOCID, SearchIndexerUtils.getIdForUri(ind.getURI())); doc.addField(DOCID, SearchIndexerUtils.getIdForUri(ind.getURI()));
doc.addField(URI, ind.getURI()); doc.addField(URI, ind.getURI());
} }
private void addLabel(SearchInputDocument doc) { private void addLabel(Individual ind, SearchInputDocument doc) {
String name = ind.getRdfsLabel(); String name = ind.getRdfsLabel();
if (name == null) { if (name == null) {
name = ind.getLocalName(); name = ind.getLocalName();
@ -94,11 +139,19 @@ public class UpdateDocumentWorkUnit implements Runnable {
doc.addField(NAME_LOWERCASE_SINGLE_VALUED, name); doc.addField(NAME_LOWERCASE_SINGLE_VALUED, name);
} }
@Override
public String toString() {
return "REQUIRED: IdUriLabel";
}
}
/** /**
* For each class that the individual belongs to, record the class URI, the * For each class that the individual belongs to, record the class URI, the
* class group URI, the class Name, and the class boost. * class group URI, the class Name, and the class boost.
*/ */
private void addClasses(SearchInputDocument doc) { private static class AddClasses extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<VClass> vclasses = ind.getVClasses(false); List<VClass> vclasses = ind.getVClasses(false);
if (vclasses == null) { if (vclasses == null) {
return; return;
@ -125,7 +178,15 @@ public class UpdateDocumentWorkUnit implements Runnable {
} }
} }
private void addMostSpecificTypes(SearchInputDocument doc) { @Override
public String toString() {
return "REQUIRED: AddClasses";
}
}
private static class AddMostSpecificTypes extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<String> mstURIs = ind.getMostSpecificTypeURIs(); List<String> mstURIs = ind.getMostSpecificTypeURIs();
if (mstURIs != null) { if (mstURIs != null) {
for (String typeURI : mstURIs) { for (String typeURI : mstURIs) {
@ -136,8 +197,17 @@ public class UpdateDocumentWorkUnit implements Runnable {
} }
} }
private void addObjectPropertyText(SearchInputDocument doc) { @Override
List<ObjectPropertyStatement> stmts = ind.getObjectPropertyStatements(); public String toString() {
return "REQUIRED: AddMostSpecificTypes";
}
}
private static class AddObjectPropertyText extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<ObjectPropertyStatement> stmts = ind
.getObjectPropertyStatements();
if (stmts == null) { if (stmts == null) {
return; return;
} }
@ -150,7 +220,15 @@ public class UpdateDocumentWorkUnit implements Runnable {
} }
} }
private void addDataPropertyText(SearchInputDocument doc) { @Override
public String toString() {
return "REQUIRED: AddObjectPropertyText";
}
}
private static class AddDataPropertyText extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<DataPropertyStatement> stmts = ind.getDataPropertyStatements(); List<DataPropertyStatement> stmts = ind.getDataPropertyStatements();
if (stmts == null) { if (stmts == null) {
return; return;
@ -164,27 +242,25 @@ public class UpdateDocumentWorkUnit implements Runnable {
} }
} }
private void addEntityBoost(SearchInputDocument doc) { @Override
public String toString() {
return "REQUIRED: AddDataPropertyText";
}
}
private static class AddEntityBoost extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
Float boost = ind.getSearchBoost(); Float boost = ind.getSearchBoost();
if (boost != null && !boost.equals(0.0F)) { if (boost != null && !boost.equals(0.0F)) {
doc.setDocumentBoost(boost); doc.setDocumentBoost(boost);
} }
} }
private void addIndexedTime(SearchInputDocument doc) { @Override
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis()); public String toString() {
return "REQUIRED: AddEntityBoost";
}
} }
private void addToAlltext(SearchInputDocument doc, String raw) {
if (StringUtils.isBlank(raw)) {
return;
}
String clean = Jsoup.parse(raw).text();
if (StringUtils.isBlank(clean)) {
return;
}
doc.addField(ALLTEXT, clean);
doc.addField(ALLTEXTUNSTEMMED, clean);
}
} }

View file

@ -29,6 +29,7 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerLi
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool; import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList; import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList; import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList;
/** /**
@ -68,6 +69,8 @@ public class UpdateUrisTask implements Task {
this.status = new Status(uris.size(), 200, listeners); this.status = new Status(uris.size(), 200, listeners);
this.searchEngine = ApplicationUtils.instance().getSearchEngine(); this.searchEngine = ApplicationUtils.instance().getSearchEngine();
} }
@Override @Override
@ -83,7 +86,7 @@ public class UpdateUrisTask implements Task {
break; break;
} else { } else {
Individual ind = getIndividual(uri); Individual ind = getIndividual(uri);
if (ind == null || hasNoClass(ind) || isExcluded(ind)) { if (ind == null || isExcluded(ind)) {
deleteDocument(uri); deleteDocument(uri);
} else { } else {
updateDocument(ind); updateDocument(ind);
@ -124,15 +127,6 @@ public class UpdateUrisTask implements Task {
return ind; return ind;
} }
private boolean hasNoClass(Individual ind) {
List<VClass> vclasses = ind.getVClasses(false);
if (vclasses == null || vclasses.isEmpty()) {
log.debug("Individual " + ind + " has no classes.");
return true;
}
return false;
}
private boolean isExcluded(Individual ind) { private boolean isExcluded(Individual ind) {
return excluders.isExcluded(ind); return excluders.isExcluded(ind);
} }
@ -214,4 +208,23 @@ public class UpdateUrisTask implements Task {
} }
/**
* This will be first in the list of SearchIndexExcluders.
*/
public static class ExcludeIfNoVClasses implements SearchIndexExcluder {
@Override
public String checkForExclusion(Individual ind) {
List<VClass> vclasses = ind.getVClasses(false);
if (vclasses == null || vclasses.isEmpty()) {
return "Individual " + ind + " has no classes.";
}
return null;
}
@Override
public String toString() {
return "ExcludeIfNoVClasses";
}
}
} }