[VIVO-1031] Small caching layer in the ABoxRecomputer to reduce overhead of continually querying for the same type information.

This commit is contained in:
grahamtriggs 2015-10-31 12:46:45 +00:00
parent 11ce991913
commit 5d6f4dada7

View file

@ -7,11 +7,16 @@ import java.io.ByteArrayOutputStream;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import com.hp.hpl.jena.rdf.model.NodeIterator;
import com.hp.hpl.jena.rdf.model.Property;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -109,7 +114,9 @@ public class ABoxRecomputer {
// This allows the indexer to optimize behaviour whilst paused // This allows the indexer to optimize behaviour whilst paused
searchIndexer.rebuildIndex(); searchIndexer.rebuildIndex();
} }
recomputeABox(); // Create a type cache for this execution and pass it to the recompute function
// Ensures that caches are only valid for the length of one recompute
recomputeABox(new TypeCaches());
} finally { } finally {
if (searchIndexer != null) { if (searchIndexer != null) {
searchIndexer.unpause(); searchIndexer.unpause();
@ -123,7 +130,7 @@ public class ABoxRecomputer {
/* /*
* Recompute the entire ABox inference graph. * Recompute the entire ABox inference graph.
*/ */
protected void recomputeABox() { protected void recomputeABox(TypeCaches caches) {
log.info("Recomputing ABox inferences."); log.info("Recomputing ABox inferences.");
log.info("Finding individuals in ABox."); log.info("Finding individuals in ABox.");
Collection<String> individuals = this.getAllIndividualURIs(); Collection<String> individuals = this.getAllIndividualURIs();
@ -138,7 +145,7 @@ public class ABoxRecomputer {
String individualURI = individualIt.next(); String individualURI = individualIt.next();
try { try {
additionalInferences.add(recomputeIndividual( additionalInferences.add(recomputeIndividual(
individualURI, rebuildModel)); individualURI, rebuildModel, caches));
numInds++; numInds++;
individualsInBatch.add(individualURI); individualsInBatch.add(individualURI);
boolean batchFilled = (numInds % BATCH_SIZE) == 0; boolean batchFilled = (numInds % BATCH_SIZE) == 0;
@ -180,12 +187,12 @@ public class ABoxRecomputer {
private static final boolean SKIP_PLUGINS = !RUN_PLUGINS; private static final boolean SKIP_PLUGINS = !RUN_PLUGINS;
private Model recomputeIndividual(String individualURI, private Model recomputeIndividual(String individualURI,
Model rebuildModel) throws RDFServiceException { Model rebuildModel, TypeCaches caches) throws RDFServiceException {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
Model assertions = getAssertions(individualURI); Model assertions = getAssertions(individualURI);
log.trace((System.currentTimeMillis() - start) + " ms to get assertions."); log.trace((System.currentTimeMillis() - start) + " ms to get assertions.");
Model additionalInferences = recomputeIndividual( Model additionalInferences = recomputeIndividual(
individualURI, null, assertions, rebuildModel, RUN_PLUGINS); individualURI, null, assertions, rebuildModel, caches, RUN_PLUGINS);
if (simpleReasoner.getSameAsEnabled()) { if (simpleReasoner.getSameAsEnabled()) {
Set<String> sameAsInds = getSameAsIndividuals(individualURI); Set<String> sameAsInds = getSameAsIndividuals(individualURI);
@ -193,7 +200,7 @@ public class ABoxRecomputer {
// sameAs for plugins is handled by the SimpleReasoner // sameAs for plugins is handled by the SimpleReasoner
Model sameAsIndAssertions = getAssertions(sameAsInd); Model sameAsIndAssertions = getAssertions(sameAsInd);
recomputeIndividual( recomputeIndividual(
sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, SKIP_PLUGINS); sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, caches, SKIP_PLUGINS);
rebuildModel.add( rebuildModel.add(
rewriteInferences(getAssertions(sameAsInd), individualURI)); rewriteInferences(getAssertions(sameAsInd), individualURI));
Resource indRes = ResourceFactory.createResource(individualURI); Resource indRes = ResourceFactory.createResource(individualURI);
@ -214,7 +221,7 @@ public class ABoxRecomputer {
* individuals * individuals
*/ */
private Model recomputeIndividual(String individualURI, String aliasURI, private Model recomputeIndividual(String individualURI, String aliasURI,
Model assertions, Model rebuildModel, boolean runPlugins) Model assertions, Model rebuildModel, TypeCaches caches, boolean runPlugins)
throws RDFServiceException { throws RDFServiceException {
Model additionalInferences = ModelFactory.createDefaultModel(); Model additionalInferences = ModelFactory.createDefaultModel();
@ -223,13 +230,13 @@ public class ABoxRecomputer {
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
Model types = ModelFactory.createDefaultModel(); Model types = ModelFactory.createDefaultModel();
types.add(assertions.listStatements(null, RDF.type, (RDFNode) null)); types.add(assertions.listStatements(null, RDF.type, (RDFNode) null));
Model inferredTypes = rewriteInferences(getInferredTypes(individual, types), aliasURI); Model inferredTypes = rewriteInferences(getInferredTypes(individual, types, caches), aliasURI);
rebuildModel.add(inferredTypes); rebuildModel.add(inferredTypes);
log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types"); log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types");
start = System.currentTimeMillis(); start = System.currentTimeMillis();
types.add(inferredTypes); types.add(inferredTypes);
Model mst = getMostSpecificTypes(individual, types); Model mst = getMostSpecificTypes(individual, types, caches);
rebuildModel.add(rewriteInferences(mst, aliasURI)); rebuildModel.add(rewriteInferences(mst, aliasURI));
log.trace((System.currentTimeMillis() - start) + " to infer " + mst.size() + " mostSpecificTypes"); log.trace((System.currentTimeMillis() - start) + " to infer " + mst.size() + " mostSpecificTypes");
@ -277,7 +284,23 @@ public class ABoxRecomputer {
, RDFService.ModelSerializationFormat.N3); , RDFService.ModelSerializationFormat.N3);
} }
private Model getInferredTypes(Resource individual, Model assertedTypes, TypeCaches caches) {
if (caches == null) {
return getInferredTypes(individual, assertedTypes);
}
TypeList key = new TypeList(assertedTypes, RDF.type);
Model inferredTypes = caches.getInferredTypesToModel(key, individual);
if (inferredTypes == null) {
inferredTypes = getInferredTypes(individual, assertedTypes);
caches.cacheInferredTypes(key, inferredTypes);
}
return inferredTypes;
}
private Model getInferredTypes(Resource individual, Model assertedTypes) { private Model getInferredTypes(Resource individual, Model assertedTypes) {
new TypeList(assertedTypes, RDF.type);
String queryStr = "CONSTRUCT { \n" + String queryStr = "CONSTRUCT { \n" +
" <" + individual.getURI() + "> a ?type \n" + " <" + individual.getURI() + "> a ?type \n" +
"} WHERE { \n" + "} WHERE { \n" +
@ -301,6 +324,21 @@ public class ABoxRecomputer {
} }
} }
private Model getMostSpecificTypes(Resource individual, Model assertedTypes, TypeCaches caches) {
if (caches == null) {
return getMostSpecificTypes(individual, assertedTypes);
}
TypeList key = new TypeList(assertedTypes, RDF.type);
Model mostSpecificTypes = caches.getMostSpecificTypesToModel(key, individual);
if (mostSpecificTypes == null) {
mostSpecificTypes = getMostSpecificTypes(individual, assertedTypes);
caches.cacheMostSpecificTypes(key, mostSpecificTypes);
}
return mostSpecificTypes;
}
private Model getMostSpecificTypes(Resource individual, Model assertedTypes) { private Model getMostSpecificTypes(Resource individual, Model assertedTypes) {
String queryStr = "CONSTRUCT { \n" + String queryStr = "CONSTRUCT { \n" +
" <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" + " <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" +
@ -493,4 +531,114 @@ public class ABoxRecomputer {
public void setStopRequested() { public void setStopRequested() {
this.stopRequested = true; this.stopRequested = true;
} }
/**
* Caches for types -> inferred types, and types -> most specific type
*/
private static class TypeCaches {
private Map<TypeList, TypeList> inferredTypes = new HashMap<TypeList, TypeList>();
private Map<TypeList, TypeList> mostSpecificTypes = new HashMap<TypeList, TypeList>();
void cacheInferredTypes(TypeList key, Model model) {
inferredTypes.put(key, new TypeList(model, RDF.type));
}
Model getInferredTypesToModel(TypeList key, Resource individual) {
TypeList types = inferredTypes.get(key);
if (types != null) {
return types.constructModel(individual, RDF.type);
}
return null;
}
void cacheMostSpecificTypes(TypeList key, Model model) {
mostSpecificTypes.put(key, new TypeList(model, model.createProperty(VitroVocabulary.MOST_SPECIFIC_TYPE)));
}
Model getMostSpecificTypesToModel(TypeList key, Resource individual) {
TypeList types = mostSpecificTypes.get(key);
if (types != null) {
return types.constructModel(individual, VitroVocabulary.MOST_SPECIFIC_TYPE);
}
return null;
}
}
/**
* Bundle of type URIs
*/
private static class TypeList {
private List<String> typeUris = new ArrayList<String>();
private Integer hashCode = null;
/**
* Extract type uris - either RDF type or most specific type - from a Model
*/
TypeList(Model model, Property property) {
NodeIterator iterator = model.listObjectsOfProperty(property);
while (iterator.hasNext()) {
RDFNode node = iterator.next();
String uri = node.asResource().getURI();
if (!typeUris.contains(uri)) {
typeUris.add(uri);
}
}
}
Model constructModel(Resource individual, Property property) {
Model model = ModelFactory.createDefaultModel();
for (String uri : typeUris) {
model.add(individual, property, model.createResource(uri));
}
return model;
}
Model constructModel(Resource individual, String property) {
Model model = ModelFactory.createDefaultModel();
for (String uri : typeUris) {
model.add(individual, model.createProperty(property), model.createResource(uri));
}
return model;
}
public void addUri(String uri) {
if (!typeUris.contains(uri)) {
typeUris.add(uri);
hashCode = null;
}
}
@Override
public boolean equals(Object obj) {
if (!(obj instanceof TypeList)) {
return false;
}
TypeList otherKey = (TypeList)obj;
if (typeUris.size() != otherKey.typeUris.size()) {
return false;
}
return typeUris.containsAll(otherKey.typeUris);
}
@Override
public int hashCode() {
if (hashCode == null) {
Collections.sort(typeUris);
StringBuilder builder = new StringBuilder();
for (String key : typeUris) {
builder.append('<').append(key).append('>');
}
hashCode = builder.toString().hashCode();
}
return hashCode;
}
}
} }