VIVO-871 Break out more tasks to improve the timings

UpdateDocumentWorkUnit contains a list of required DocumentModifiers.
They are required but because they are in the list of DocumentModifiers, they are timed like the optional ones.

Similarly, UpdateUrisTask contains a required SearchIndexExcluder.
This commit is contained in:
Jim Blake 2015-01-16 11:34:31 -05:00
parent 0c477945a8
commit 27d3141bb6
6 changed files with 228 additions and 131 deletions

View file

@ -54,6 +54,7 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderLi
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListBasic;
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListDeveloper;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateDocumentWorkUnit;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateStatementsTask;
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader;
@ -82,8 +83,8 @@ public class SearchIndexerImpl implements SearchIndexer {
private final WorkerThreadPool pool = new WorkerThreadPool();
private ServletContext ctx;
private Set<SearchIndexExcluder> excluders;
private Set<DocumentModifier> modifiers;
private List<SearchIndexExcluder> excluders;
private List<DocumentModifier> modifiers;
private Set<IndexingUriFinder> uriFinders;
private WebappDaoFactory wadf;
@ -107,9 +108,16 @@ public class SearchIndexerImpl implements SearchIndexer {
private void loadConfiguration() throws ConfigurationBeanLoaderException {
ConfigurationBeanLoader beanLoader = new ConfigurationBeanLoader(
ModelAccess.on(ctx).getOntModel(DISPLAY), ctx);
excluders = beanLoader.loadAll(SearchIndexExcluder.class);
modifiers = beanLoader.loadAll(DocumentModifier.class);
uriFinders = beanLoader.loadAll(IndexingUriFinder.class);
excluders = new ArrayList<>();
excluders.add(new UpdateUrisTask.ExcludeIfNoVClasses());
excluders.addAll(beanLoader.loadAll(SearchIndexExcluder.class));
modifiers = new ArrayList<>();
modifiers.addAll(new UpdateDocumentWorkUnit.MinimalDocumentModifiers()
.getList());
modifiers.addAll(beanLoader.loadAll(DocumentModifier.class));
}
/**

View file

@ -67,7 +67,7 @@ public class DocumentModifierListDeveloper implements DocumentModifierList {
for (ModifierTiming timing : timings) {
int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / count.get();
int average = (count.get() == 0) ? 0 : totalMillis / count.get();
message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
count.get(), totalSeconds, average,

View file

@ -80,7 +80,7 @@ public class SearchIndexExcluderListDeveloper implements
int thisCount = timing.getCount();
int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / thisCount;
int average = (thisCount == 0) ? 0 : totalMillis / thisCount;
message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
thisCount, totalSeconds, average,

View file

@ -71,12 +71,12 @@ public class IndexingUriFinderListDeveloper implements IndexingUriFinderList {
}
String message = String.format(
"Timings for %d modifiers after %d calls:", timings.size(),
"Timings for %d URI finders after %d calls:", timings.size(),
count.get());
for (FinderTiming timing : timings) {
int totalMillis = timing.getTotal();
float totalSeconds = totalMillis / 1000.0F;
int average = totalMillis / count.get();
int average = (count.get() == 0) ? 0 : totalMillis / count.get();
message += String
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
count.get(), totalSeconds, average,

View file

@ -13,6 +13,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang.StringUtils;
@ -32,6 +33,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
public class UpdateDocumentWorkUnit implements Runnable {
@ -60,31 +62,74 @@ public class UpdateDocumentWorkUnit implements Runnable {
public void run() {
try {
SearchInputDocument doc = searchEngine.createInputDocument();
addIdAndUri(doc);
addLabel(doc);
addClasses(doc);
addMostSpecificTypes(doc);
addObjectPropertyText(doc);
addDataPropertyText(doc);
addEntityBoost(doc);
modifiers.modifyDocument(ind, doc);
addIndexedTime(doc);
searchEngine.add(doc);
} catch (Exception e) {
log.warn("Failed to add '" + ind + "' to the search index.", e);
}
}
private void addIdAndUri(SearchInputDocument doc) {
private void addIndexedTime(SearchInputDocument doc) {
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis());
}
// ----------------------------------------------------------------------
// Helper classes
// ----------------------------------------------------------------------
/**
* These will be hardcoded at the beginning of the list of
* DocumentModifiers.
*/
public static class MinimalDocumentModifiers {
private final List<DocumentModifier> list;
public MinimalDocumentModifiers() {
this.list = Arrays.asList(new DocumentModifier[] {
new IdUriLabel(), new AddClasses(),
new AddMostSpecificTypes(), new AddObjectPropertyText(),
new AddDataPropertyText(), new AddEntityBoost() });
}
public List<DocumentModifier> getList() {
return list;
}
}
private abstract static class BaseDocumentModifier implements
DocumentModifier {
protected void addToAlltext(SearchInputDocument doc, String raw) {
if (StringUtils.isBlank(raw)) {
return;
}
String clean = Jsoup.parse(raw).text();
if (StringUtils.isBlank(clean)) {
return;
}
doc.addField(ALLTEXT, clean);
doc.addField(ALLTEXTUNSTEMMED, clean);
}
@Override
public void shutdown() {
// Nothing to do.
}
}
private static class IdUriLabel extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
addIdAndUri(ind, doc);
addLabel(ind, doc);
}
private void addIdAndUri(Individual ind, SearchInputDocument doc) {
doc.addField(DOCID, SearchIndexerUtils.getIdForUri(ind.getURI()));
doc.addField(URI, ind.getURI());
}
private void addLabel(SearchInputDocument doc) {
private void addLabel(Individual ind, SearchInputDocument doc) {
String name = ind.getRdfsLabel();
if (name == null) {
name = ind.getLocalName();
@ -94,11 +139,19 @@ public class UpdateDocumentWorkUnit implements Runnable {
doc.addField(NAME_LOWERCASE_SINGLE_VALUED, name);
}
@Override
public String toString() {
return "REQUIRED: IdUriLabel";
}
}
/**
* For each class that the individual belongs to, record the class URI, the
* class group URI, the class Name, and the class boost.
*/
private void addClasses(SearchInputDocument doc) {
private static class AddClasses extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<VClass> vclasses = ind.getVClasses(false);
if (vclasses == null) {
return;
@ -125,7 +178,15 @@ public class UpdateDocumentWorkUnit implements Runnable {
}
}
private void addMostSpecificTypes(SearchInputDocument doc) {
@Override
public String toString() {
return "REQUIRED: AddClasses";
}
}
private static class AddMostSpecificTypes extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<String> mstURIs = ind.getMostSpecificTypeURIs();
if (mstURIs != null) {
for (String typeURI : mstURIs) {
@ -136,8 +197,17 @@ public class UpdateDocumentWorkUnit implements Runnable {
}
}
private void addObjectPropertyText(SearchInputDocument doc) {
List<ObjectPropertyStatement> stmts = ind.getObjectPropertyStatements();
@Override
public String toString() {
return "REQUIRED: AddMostSpecificTypes";
}
}
private static class AddObjectPropertyText extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<ObjectPropertyStatement> stmts = ind
.getObjectPropertyStatements();
if (stmts == null) {
return;
}
@ -150,7 +220,15 @@ public class UpdateDocumentWorkUnit implements Runnable {
}
}
private void addDataPropertyText(SearchInputDocument doc) {
@Override
public String toString() {
return "REQUIRED: AddObjectPropertyText";
}
}
private static class AddDataPropertyText extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
List<DataPropertyStatement> stmts = ind.getDataPropertyStatements();
if (stmts == null) {
return;
@ -164,27 +242,25 @@ public class UpdateDocumentWorkUnit implements Runnable {
}
}
private void addEntityBoost(SearchInputDocument doc) {
@Override
public String toString() {
return "REQUIRED: AddDataPropertyText";
}
}
private static class AddEntityBoost extends BaseDocumentModifier {
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
Float boost = ind.getSearchBoost();
if (boost != null && !boost.equals(0.0F)) {
doc.setDocumentBoost(boost);
}
}
private void addIndexedTime(SearchInputDocument doc) {
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis());
@Override
public String toString() {
return "REQUIRED: AddEntityBoost";
}
}
private void addToAlltext(SearchInputDocument doc, String raw) {
if (StringUtils.isBlank(raw)) {
return;
}
String clean = Jsoup.parse(raw).text();
if (StringUtils.isBlank(clean)) {
return;
}
doc.addField(ALLTEXT, clean);
doc.addField(ALLTEXTUNSTEMMED, clean);
}
}

View file

@ -29,6 +29,7 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerLi
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool;
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList;
/**
@ -68,6 +69,8 @@ public class UpdateUrisTask implements Task {
this.status = new Status(uris.size(), 200, listeners);
this.searchEngine = ApplicationUtils.instance().getSearchEngine();
}
@Override
@ -83,7 +86,7 @@ public class UpdateUrisTask implements Task {
break;
} else {
Individual ind = getIndividual(uri);
if (ind == null || hasNoClass(ind) || isExcluded(ind)) {
if (ind == null || isExcluded(ind)) {
deleteDocument(uri);
} else {
updateDocument(ind);
@ -124,15 +127,6 @@ public class UpdateUrisTask implements Task {
return ind;
}
private boolean hasNoClass(Individual ind) {
List<VClass> vclasses = ind.getVClasses(false);
if (vclasses == null || vclasses.isEmpty()) {
log.debug("Individual " + ind + " has no classes.");
return true;
}
return false;
}
private boolean isExcluded(Individual ind) {
return excluders.isExcluded(ind);
}
@ -214,4 +208,23 @@ public class UpdateUrisTask implements Task {
}
/**
* This will be first in the list of SearchIndexExcluders.
*/
public static class ExcludeIfNoVClasses implements SearchIndexExcluder {
@Override
public String checkForExclusion(Individual ind) {
List<VClass> vclasses = ind.getVClasses(false);
if (vclasses == null || vclasses.isEmpty()) {
return "Individual " + ind + " has no classes.";
}
return null;
}
@Override
public String toString() {
return "ExcludeIfNoVClasses";
}
}
}