[VIVO-1031] Small caching layer in the ABoxRecomputer to reduce overhead of continually querying for the same type information.
This commit is contained in:
parent
11ce991913
commit
5d6f4dada7
1 changed files with 157 additions and 9 deletions
|
@ -7,11 +7,16 @@ import java.io.ByteArrayOutputStream;
|
|||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import com.hp.hpl.jena.rdf.model.NodeIterator;
|
||||
import com.hp.hpl.jena.rdf.model.Property;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
|
@ -109,7 +114,9 @@ public class ABoxRecomputer {
|
|||
// This allows the indexer to optimize behaviour whilst paused
|
||||
searchIndexer.rebuildIndex();
|
||||
}
|
||||
recomputeABox();
|
||||
// Create a type cache for this execution and pass it to the recompute function
|
||||
// Ensures that caches are only valid for the length of one recompute
|
||||
recomputeABox(new TypeCaches());
|
||||
} finally {
|
||||
if (searchIndexer != null) {
|
||||
searchIndexer.unpause();
|
||||
|
@ -123,7 +130,7 @@ public class ABoxRecomputer {
|
|||
/*
|
||||
* Recompute the entire ABox inference graph.
|
||||
*/
|
||||
protected void recomputeABox() {
|
||||
protected void recomputeABox(TypeCaches caches) {
|
||||
log.info("Recomputing ABox inferences.");
|
||||
log.info("Finding individuals in ABox.");
|
||||
Collection<String> individuals = this.getAllIndividualURIs();
|
||||
|
@ -138,7 +145,7 @@ public class ABoxRecomputer {
|
|||
String individualURI = individualIt.next();
|
||||
try {
|
||||
additionalInferences.add(recomputeIndividual(
|
||||
individualURI, rebuildModel));
|
||||
individualURI, rebuildModel, caches));
|
||||
numInds++;
|
||||
individualsInBatch.add(individualURI);
|
||||
boolean batchFilled = (numInds % BATCH_SIZE) == 0;
|
||||
|
@ -180,12 +187,12 @@ public class ABoxRecomputer {
|
|||
private static final boolean SKIP_PLUGINS = !RUN_PLUGINS;
|
||||
|
||||
private Model recomputeIndividual(String individualURI,
|
||||
Model rebuildModel) throws RDFServiceException {
|
||||
Model rebuildModel, TypeCaches caches) throws RDFServiceException {
|
||||
long start = System.currentTimeMillis();
|
||||
Model assertions = getAssertions(individualURI);
|
||||
log.trace((System.currentTimeMillis() - start) + " ms to get assertions.");
|
||||
Model additionalInferences = recomputeIndividual(
|
||||
individualURI, null, assertions, rebuildModel, RUN_PLUGINS);
|
||||
individualURI, null, assertions, rebuildModel, caches, RUN_PLUGINS);
|
||||
|
||||
if (simpleReasoner.getSameAsEnabled()) {
|
||||
Set<String> sameAsInds = getSameAsIndividuals(individualURI);
|
||||
|
@ -193,7 +200,7 @@ public class ABoxRecomputer {
|
|||
// sameAs for plugins is handled by the SimpleReasoner
|
||||
Model sameAsIndAssertions = getAssertions(sameAsInd);
|
||||
recomputeIndividual(
|
||||
sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, SKIP_PLUGINS);
|
||||
sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, caches, SKIP_PLUGINS);
|
||||
rebuildModel.add(
|
||||
rewriteInferences(getAssertions(sameAsInd), individualURI));
|
||||
Resource indRes = ResourceFactory.createResource(individualURI);
|
||||
|
@ -214,7 +221,7 @@ public class ABoxRecomputer {
|
|||
* individuals
|
||||
*/
|
||||
private Model recomputeIndividual(String individualURI, String aliasURI,
|
||||
Model assertions, Model rebuildModel, boolean runPlugins)
|
||||
Model assertions, Model rebuildModel, TypeCaches caches, boolean runPlugins)
|
||||
throws RDFServiceException {
|
||||
|
||||
Model additionalInferences = ModelFactory.createDefaultModel();
|
||||
|
@ -223,13 +230,13 @@ public class ABoxRecomputer {
|
|||
long start = System.currentTimeMillis();
|
||||
Model types = ModelFactory.createDefaultModel();
|
||||
types.add(assertions.listStatements(null, RDF.type, (RDFNode) null));
|
||||
Model inferredTypes = rewriteInferences(getInferredTypes(individual, types), aliasURI);
|
||||
Model inferredTypes = rewriteInferences(getInferredTypes(individual, types, caches), aliasURI);
|
||||
rebuildModel.add(inferredTypes);
|
||||
log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types");
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
types.add(inferredTypes);
|
||||
Model mst = getMostSpecificTypes(individual, types);
|
||||
Model mst = getMostSpecificTypes(individual, types, caches);
|
||||
rebuildModel.add(rewriteInferences(mst, aliasURI));
|
||||
log.trace((System.currentTimeMillis() - start) + " to infer " + mst.size() + " mostSpecificTypes");
|
||||
|
||||
|
@ -277,7 +284,23 @@ public class ABoxRecomputer {
|
|||
, RDFService.ModelSerializationFormat.N3);
|
||||
}
|
||||
|
||||
private Model getInferredTypes(Resource individual, Model assertedTypes, TypeCaches caches) {
|
||||
if (caches == null) {
|
||||
return getInferredTypes(individual, assertedTypes);
|
||||
}
|
||||
|
||||
TypeList key = new TypeList(assertedTypes, RDF.type);
|
||||
Model inferredTypes = caches.getInferredTypesToModel(key, individual);
|
||||
if (inferredTypes == null) {
|
||||
inferredTypes = getInferredTypes(individual, assertedTypes);
|
||||
caches.cacheInferredTypes(key, inferredTypes);
|
||||
}
|
||||
|
||||
return inferredTypes;
|
||||
}
|
||||
|
||||
private Model getInferredTypes(Resource individual, Model assertedTypes) {
|
||||
new TypeList(assertedTypes, RDF.type);
|
||||
String queryStr = "CONSTRUCT { \n" +
|
||||
" <" + individual.getURI() + "> a ?type \n" +
|
||||
"} WHERE { \n" +
|
||||
|
@ -301,6 +324,21 @@ public class ABoxRecomputer {
|
|||
}
|
||||
}
|
||||
|
||||
private Model getMostSpecificTypes(Resource individual, Model assertedTypes, TypeCaches caches) {
|
||||
if (caches == null) {
|
||||
return getMostSpecificTypes(individual, assertedTypes);
|
||||
}
|
||||
|
||||
TypeList key = new TypeList(assertedTypes, RDF.type);
|
||||
Model mostSpecificTypes = caches.getMostSpecificTypesToModel(key, individual);
|
||||
if (mostSpecificTypes == null) {
|
||||
mostSpecificTypes = getMostSpecificTypes(individual, assertedTypes);
|
||||
caches.cacheMostSpecificTypes(key, mostSpecificTypes);
|
||||
}
|
||||
|
||||
return mostSpecificTypes;
|
||||
}
|
||||
|
||||
private Model getMostSpecificTypes(Resource individual, Model assertedTypes) {
|
||||
String queryStr = "CONSTRUCT { \n" +
|
||||
" <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" +
|
||||
|
@ -493,4 +531,114 @@ public class ABoxRecomputer {
|
|||
public void setStopRequested() {
|
||||
this.stopRequested = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Caches for types -> inferred types, and types -> most specific type
|
||||
*/
|
||||
private static class TypeCaches {
|
||||
private Map<TypeList, TypeList> inferredTypes = new HashMap<TypeList, TypeList>();
|
||||
private Map<TypeList, TypeList> mostSpecificTypes = new HashMap<TypeList, TypeList>();
|
||||
|
||||
void cacheInferredTypes(TypeList key, Model model) {
|
||||
inferredTypes.put(key, new TypeList(model, RDF.type));
|
||||
}
|
||||
|
||||
Model getInferredTypesToModel(TypeList key, Resource individual) {
|
||||
TypeList types = inferredTypes.get(key);
|
||||
if (types != null) {
|
||||
return types.constructModel(individual, RDF.type);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
void cacheMostSpecificTypes(TypeList key, Model model) {
|
||||
mostSpecificTypes.put(key, new TypeList(model, model.createProperty(VitroVocabulary.MOST_SPECIFIC_TYPE)));
|
||||
}
|
||||
|
||||
Model getMostSpecificTypesToModel(TypeList key, Resource individual) {
|
||||
TypeList types = mostSpecificTypes.get(key);
|
||||
if (types != null) {
|
||||
return types.constructModel(individual, VitroVocabulary.MOST_SPECIFIC_TYPE);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bundle of type URIs
|
||||
*/
|
||||
private static class TypeList {
|
||||
private List<String> typeUris = new ArrayList<String>();
|
||||
|
||||
private Integer hashCode = null;
|
||||
|
||||
/**
|
||||
* Extract type uris - either RDF type or most specific type - from a Model
|
||||
*/
|
||||
TypeList(Model model, Property property) {
|
||||
NodeIterator iterator = model.listObjectsOfProperty(property);
|
||||
while (iterator.hasNext()) {
|
||||
RDFNode node = iterator.next();
|
||||
String uri = node.asResource().getURI();
|
||||
if (!typeUris.contains(uri)) {
|
||||
typeUris.add(uri);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Model constructModel(Resource individual, Property property) {
|
||||
Model model = ModelFactory.createDefaultModel();
|
||||
for (String uri : typeUris) {
|
||||
model.add(individual, property, model.createResource(uri));
|
||||
}
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
Model constructModel(Resource individual, String property) {
|
||||
Model model = ModelFactory.createDefaultModel();
|
||||
for (String uri : typeUris) {
|
||||
model.add(individual, model.createProperty(property), model.createResource(uri));
|
||||
}
|
||||
|
||||
return model;
|
||||
}
|
||||
|
||||
public void addUri(String uri) {
|
||||
if (!typeUris.contains(uri)) {
|
||||
typeUris.add(uri);
|
||||
hashCode = null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof TypeList)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
TypeList otherKey = (TypeList)obj;
|
||||
|
||||
if (typeUris.size() != otherKey.typeUris.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return typeUris.containsAll(otherKey.typeUris);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if (hashCode == null) {
|
||||
Collections.sort(typeUris);
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for (String key : typeUris) {
|
||||
builder.append('<').append(key).append('>');
|
||||
}
|
||||
|
||||
hashCode = builder.toString().hashCode();
|
||||
}
|
||||
|
||||
return hashCode;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue