VIVO-871 Break out more tasks to improve the timings
UpdateDocumentWorkUnit contains a list of required DocumentModifiers. They are required but because they are in the list of DocumentModifiers, they are timed like the optional ones. Similarly, UpdateUrisTask contains a required SearchIndexExcluder.
This commit is contained in:
parent
0c477945a8
commit
27d3141bb6
6 changed files with 228 additions and 131 deletions
|
@ -54,6 +54,7 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderLi
|
|||
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListBasic;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.indexing.IndexingUriFinderListDeveloper;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.RebuildIndexTask;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateDocumentWorkUnit;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateStatementsTask;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.tasks.UpdateUrisTask;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ConfigurationBeanLoader;
|
||||
|
@ -82,8 +83,8 @@ public class SearchIndexerImpl implements SearchIndexer {
|
|||
private final WorkerThreadPool pool = new WorkerThreadPool();
|
||||
|
||||
private ServletContext ctx;
|
||||
private Set<SearchIndexExcluder> excluders;
|
||||
private Set<DocumentModifier> modifiers;
|
||||
private List<SearchIndexExcluder> excluders;
|
||||
private List<DocumentModifier> modifiers;
|
||||
private Set<IndexingUriFinder> uriFinders;
|
||||
private WebappDaoFactory wadf;
|
||||
|
||||
|
@ -107,9 +108,16 @@ public class SearchIndexerImpl implements SearchIndexer {
|
|||
private void loadConfiguration() throws ConfigurationBeanLoaderException {
|
||||
ConfigurationBeanLoader beanLoader = new ConfigurationBeanLoader(
|
||||
ModelAccess.on(ctx).getOntModel(DISPLAY), ctx);
|
||||
excluders = beanLoader.loadAll(SearchIndexExcluder.class);
|
||||
modifiers = beanLoader.loadAll(DocumentModifier.class);
|
||||
uriFinders = beanLoader.loadAll(IndexingUriFinder.class);
|
||||
|
||||
excluders = new ArrayList<>();
|
||||
excluders.add(new UpdateUrisTask.ExcludeIfNoVClasses());
|
||||
excluders.addAll(beanLoader.loadAll(SearchIndexExcluder.class));
|
||||
|
||||
modifiers = new ArrayList<>();
|
||||
modifiers.addAll(new UpdateDocumentWorkUnit.MinimalDocumentModifiers()
|
||||
.getList());
|
||||
modifiers.addAll(beanLoader.loadAll(DocumentModifier.class));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -67,7 +67,7 @@ public class DocumentModifierListDeveloper implements DocumentModifierList {
|
|||
for (ModifierTiming timing : timings) {
|
||||
int totalMillis = timing.getTotal();
|
||||
float totalSeconds = totalMillis / 1000.0F;
|
||||
int average = totalMillis / count.get();
|
||||
int average = (count.get() == 0) ? 0 : totalMillis / count.get();
|
||||
message += String
|
||||
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
|
||||
count.get(), totalSeconds, average,
|
||||
|
|
|
@ -80,7 +80,7 @@ public class SearchIndexExcluderListDeveloper implements
|
|||
int thisCount = timing.getCount();
|
||||
int totalMillis = timing.getTotal();
|
||||
float totalSeconds = totalMillis / 1000.0F;
|
||||
int average = totalMillis / thisCount;
|
||||
int average = (thisCount == 0) ? 0 : totalMillis / thisCount;
|
||||
message += String
|
||||
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
|
||||
thisCount, totalSeconds, average,
|
||||
|
|
|
@ -71,12 +71,12 @@ public class IndexingUriFinderListDeveloper implements IndexingUriFinderList {
|
|||
}
|
||||
|
||||
String message = String.format(
|
||||
"Timings for %d modifiers after %d calls:", timings.size(),
|
||||
"Timings for %d URI finders after %d calls:", timings.size(),
|
||||
count.get());
|
||||
for (FinderTiming timing : timings) {
|
||||
int totalMillis = timing.getTotal();
|
||||
float totalSeconds = totalMillis / 1000.0F;
|
||||
int average = totalMillis / count.get();
|
||||
int average = (count.get() == 0) ? 0 : totalMillis / count.get();
|
||||
message += String
|
||||
.format("\n count: %7d, total: %9.3fsec, average: %4dms-- %1.200s",
|
||||
count.get(), totalSeconds, average,
|
||||
|
|
|
@ -13,6 +13,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_
|
|||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.RDFTYPE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.URI;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
@ -32,6 +33,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
|||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchIndexer.SearchIndexerUtils;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
|
||||
|
||||
public class UpdateDocumentWorkUnit implements Runnable {
|
||||
|
@ -60,31 +62,74 @@ public class UpdateDocumentWorkUnit implements Runnable {
|
|||
public void run() {
|
||||
try {
|
||||
SearchInputDocument doc = searchEngine.createInputDocument();
|
||||
|
||||
addIdAndUri(doc);
|
||||
addLabel(doc);
|
||||
addClasses(doc);
|
||||
addMostSpecificTypes(doc);
|
||||
addObjectPropertyText(doc);
|
||||
addDataPropertyText(doc);
|
||||
addEntityBoost(doc);
|
||||
|
||||
modifiers.modifyDocument(ind, doc);
|
||||
|
||||
addIndexedTime(doc);
|
||||
|
||||
searchEngine.add(doc);
|
||||
} catch (Exception e) {
|
||||
log.warn("Failed to add '" + ind + "' to the search index.", e);
|
||||
}
|
||||
}
|
||||
|
||||
private void addIdAndUri(SearchInputDocument doc) {
|
||||
private void addIndexedTime(SearchInputDocument doc) {
|
||||
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis());
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper classes
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* These will be hardcoded at the beginning of the list of
|
||||
* DocumentModifiers.
|
||||
*/
|
||||
public static class MinimalDocumentModifiers {
|
||||
private final List<DocumentModifier> list;
|
||||
|
||||
public MinimalDocumentModifiers() {
|
||||
this.list = Arrays.asList(new DocumentModifier[] {
|
||||
new IdUriLabel(), new AddClasses(),
|
||||
new AddMostSpecificTypes(), new AddObjectPropertyText(),
|
||||
new AddDataPropertyText(), new AddEntityBoost() });
|
||||
}
|
||||
|
||||
public List<DocumentModifier> getList() {
|
||||
return list;
|
||||
}
|
||||
}
|
||||
|
||||
private abstract static class BaseDocumentModifier implements
|
||||
DocumentModifier {
|
||||
protected void addToAlltext(SearchInputDocument doc, String raw) {
|
||||
if (StringUtils.isBlank(raw)) {
|
||||
return;
|
||||
}
|
||||
String clean = Jsoup.parse(raw).text();
|
||||
if (StringUtils.isBlank(clean)) {
|
||||
return;
|
||||
}
|
||||
doc.addField(ALLTEXT, clean);
|
||||
doc.addField(ALLTEXTUNSTEMMED, clean);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
// Nothing to do.
|
||||
}
|
||||
}
|
||||
|
||||
private static class IdUriLabel extends BaseDocumentModifier {
|
||||
@Override
|
||||
public void modifyDocument(Individual ind, SearchInputDocument doc) {
|
||||
addIdAndUri(ind, doc);
|
||||
addLabel(ind, doc);
|
||||
}
|
||||
|
||||
private void addIdAndUri(Individual ind, SearchInputDocument doc) {
|
||||
doc.addField(DOCID, SearchIndexerUtils.getIdForUri(ind.getURI()));
|
||||
doc.addField(URI, ind.getURI());
|
||||
}
|
||||
|
||||
private void addLabel(SearchInputDocument doc) {
|
||||
private void addLabel(Individual ind, SearchInputDocument doc) {
|
||||
String name = ind.getRdfsLabel();
|
||||
if (name == null) {
|
||||
name = ind.getLocalName();
|
||||
|
@ -94,11 +139,19 @@ public class UpdateDocumentWorkUnit implements Runnable {
|
|||
doc.addField(NAME_LOWERCASE_SINGLE_VALUED, name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "REQUIRED: IdUriLabel";
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For each class that the individual belongs to, record the class URI, the
|
||||
* class group URI, the class Name, and the class boost.
|
||||
*/
|
||||
private void addClasses(SearchInputDocument doc) {
|
||||
private static class AddClasses extends BaseDocumentModifier {
|
||||
@Override
|
||||
public void modifyDocument(Individual ind, SearchInputDocument doc) {
|
||||
List<VClass> vclasses = ind.getVClasses(false);
|
||||
if (vclasses == null) {
|
||||
return;
|
||||
|
@ -125,7 +178,15 @@ public class UpdateDocumentWorkUnit implements Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
private void addMostSpecificTypes(SearchInputDocument doc) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "REQUIRED: AddClasses";
|
||||
}
|
||||
}
|
||||
|
||||
private static class AddMostSpecificTypes extends BaseDocumentModifier {
|
||||
@Override
|
||||
public void modifyDocument(Individual ind, SearchInputDocument doc) {
|
||||
List<String> mstURIs = ind.getMostSpecificTypeURIs();
|
||||
if (mstURIs != null) {
|
||||
for (String typeURI : mstURIs) {
|
||||
|
@ -136,8 +197,17 @@ public class UpdateDocumentWorkUnit implements Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
private void addObjectPropertyText(SearchInputDocument doc) {
|
||||
List<ObjectPropertyStatement> stmts = ind.getObjectPropertyStatements();
|
||||
@Override
|
||||
public String toString() {
|
||||
return "REQUIRED: AddMostSpecificTypes";
|
||||
}
|
||||
}
|
||||
|
||||
private static class AddObjectPropertyText extends BaseDocumentModifier {
|
||||
@Override
|
||||
public void modifyDocument(Individual ind, SearchInputDocument doc) {
|
||||
List<ObjectPropertyStatement> stmts = ind
|
||||
.getObjectPropertyStatements();
|
||||
if (stmts == null) {
|
||||
return;
|
||||
}
|
||||
|
@ -150,7 +220,15 @@ public class UpdateDocumentWorkUnit implements Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
private void addDataPropertyText(SearchInputDocument doc) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "REQUIRED: AddObjectPropertyText";
|
||||
}
|
||||
}
|
||||
|
||||
private static class AddDataPropertyText extends BaseDocumentModifier {
|
||||
@Override
|
||||
public void modifyDocument(Individual ind, SearchInputDocument doc) {
|
||||
List<DataPropertyStatement> stmts = ind.getDataPropertyStatements();
|
||||
if (stmts == null) {
|
||||
return;
|
||||
|
@ -164,27 +242,25 @@ public class UpdateDocumentWorkUnit implements Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
private void addEntityBoost(SearchInputDocument doc) {
|
||||
@Override
|
||||
public String toString() {
|
||||
return "REQUIRED: AddDataPropertyText";
|
||||
}
|
||||
}
|
||||
|
||||
private static class AddEntityBoost extends BaseDocumentModifier {
|
||||
@Override
|
||||
public void modifyDocument(Individual ind, SearchInputDocument doc) {
|
||||
Float boost = ind.getSearchBoost();
|
||||
if (boost != null && !boost.equals(0.0F)) {
|
||||
doc.setDocumentBoost(boost);
|
||||
}
|
||||
}
|
||||
|
||||
private void addIndexedTime(SearchInputDocument doc) {
|
||||
doc.addField(INDEXEDTIME, (Object) new DateTime().getMillis());
|
||||
@Override
|
||||
public String toString() {
|
||||
return "REQUIRED: AddEntityBoost";
|
||||
}
|
||||
}
|
||||
|
||||
private void addToAlltext(SearchInputDocument doc, String raw) {
|
||||
if (StringUtils.isBlank(raw)) {
|
||||
return;
|
||||
}
|
||||
String clean = Jsoup.parse(raw).text();
|
||||
if (StringUtils.isBlank(clean)) {
|
||||
return;
|
||||
}
|
||||
doc.addField(ALLTEXT, clean);
|
||||
doc.addField(ALLTEXTUNSTEMMED, clean);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.ListenerLi
|
|||
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.Task;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.SearchIndexerImpl.WorkerThreadPool;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifierList;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluder;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchindex.exclusions.SearchIndexExcluderList;
|
||||
|
||||
/**
|
||||
|
@ -68,6 +69,8 @@ public class UpdateUrisTask implements Task {
|
|||
this.status = new Status(uris.size(), 200, listeners);
|
||||
|
||||
this.searchEngine = ApplicationUtils.instance().getSearchEngine();
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -83,7 +86,7 @@ public class UpdateUrisTask implements Task {
|
|||
break;
|
||||
} else {
|
||||
Individual ind = getIndividual(uri);
|
||||
if (ind == null || hasNoClass(ind) || isExcluded(ind)) {
|
||||
if (ind == null || isExcluded(ind)) {
|
||||
deleteDocument(uri);
|
||||
} else {
|
||||
updateDocument(ind);
|
||||
|
@ -124,15 +127,6 @@ public class UpdateUrisTask implements Task {
|
|||
return ind;
|
||||
}
|
||||
|
||||
private boolean hasNoClass(Individual ind) {
|
||||
List<VClass> vclasses = ind.getVClasses(false);
|
||||
if (vclasses == null || vclasses.isEmpty()) {
|
||||
log.debug("Individual " + ind + " has no classes.");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isExcluded(Individual ind) {
|
||||
return excluders.isExcluded(ind);
|
||||
}
|
||||
|
@ -214,4 +208,23 @@ public class UpdateUrisTask implements Task {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* This will be first in the list of SearchIndexExcluders.
|
||||
*/
|
||||
public static class ExcludeIfNoVClasses implements SearchIndexExcluder {
|
||||
@Override
|
||||
public String checkForExclusion(Individual ind) {
|
||||
List<VClass> vclasses = ind.getVClasses(false);
|
||||
if (vclasses == null || vclasses.isEmpty()) {
|
||||
return "Individual " + ind + " has no classes.";
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ExcludeIfNoVClasses";
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue