From 5d6f4dada71fb1bf52239ef36d3e65507afd53c7 Mon Sep 17 00:00:00 2001 From: grahamtriggs Date: Sat, 31 Oct 2015 12:46:45 +0000 Subject: [PATCH] [VIVO-1031] Small caching layer in the ABoxRecomputer to reduce overhead of continually querying for the same type information. --- .../vitro/webapp/reasoner/ABoxRecomputer.java | 166 +++++++++++++++++- 1 file changed, 157 insertions(+), 9 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java index 595516859..f03a122fc 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java @@ -7,11 +7,16 @@ import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.Set; +import com.hp.hpl.jena.rdf.model.NodeIterator; +import com.hp.hpl.jena.rdf.model.Property; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -109,7 +114,9 @@ public class ABoxRecomputer { // This allows the indexer to optimize behaviour whilst paused searchIndexer.rebuildIndex(); } - recomputeABox(); + // Create a type cache for this execution and pass it to the recompute function + // Ensures that caches are only valid for the length of one recompute + recomputeABox(new TypeCaches()); } finally { if (searchIndexer != null) { searchIndexer.unpause(); @@ -123,7 +130,7 @@ public class ABoxRecomputer { /* * Recompute the entire ABox inference graph. */ - protected void recomputeABox() { + protected void recomputeABox(TypeCaches caches) { log.info("Recomputing ABox inferences."); log.info("Finding individuals in ABox."); Collection individuals = this.getAllIndividualURIs(); @@ -138,7 +145,7 @@ public class ABoxRecomputer { String individualURI = individualIt.next(); try { additionalInferences.add(recomputeIndividual( - individualURI, rebuildModel)); + individualURI, rebuildModel, caches)); numInds++; individualsInBatch.add(individualURI); boolean batchFilled = (numInds % BATCH_SIZE) == 0; @@ -180,12 +187,12 @@ public class ABoxRecomputer { private static final boolean SKIP_PLUGINS = !RUN_PLUGINS; private Model recomputeIndividual(String individualURI, - Model rebuildModel) throws RDFServiceException { + Model rebuildModel, TypeCaches caches) throws RDFServiceException { long start = System.currentTimeMillis(); Model assertions = getAssertions(individualURI); log.trace((System.currentTimeMillis() - start) + " ms to get assertions."); Model additionalInferences = recomputeIndividual( - individualURI, null, assertions, rebuildModel, RUN_PLUGINS); + individualURI, null, assertions, rebuildModel, caches, RUN_PLUGINS); if (simpleReasoner.getSameAsEnabled()) { Set sameAsInds = getSameAsIndividuals(individualURI); @@ -193,7 +200,7 @@ public class ABoxRecomputer { // sameAs for plugins is handled by the SimpleReasoner Model sameAsIndAssertions = getAssertions(sameAsInd); recomputeIndividual( - sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, SKIP_PLUGINS); + sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, caches, SKIP_PLUGINS); rebuildModel.add( rewriteInferences(getAssertions(sameAsInd), individualURI)); Resource indRes = ResourceFactory.createResource(individualURI); @@ -214,7 +221,7 @@ public class ABoxRecomputer { * individuals */ private Model recomputeIndividual(String individualURI, String aliasURI, - Model assertions, Model rebuildModel, boolean runPlugins) + Model assertions, Model rebuildModel, TypeCaches caches, boolean runPlugins) throws RDFServiceException { Model additionalInferences = ModelFactory.createDefaultModel(); @@ -223,13 +230,13 @@ public class ABoxRecomputer { long start = System.currentTimeMillis(); Model types = ModelFactory.createDefaultModel(); types.add(assertions.listStatements(null, RDF.type, (RDFNode) null)); - Model inferredTypes = rewriteInferences(getInferredTypes(individual, types), aliasURI); + Model inferredTypes = rewriteInferences(getInferredTypes(individual, types, caches), aliasURI); rebuildModel.add(inferredTypes); log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types"); start = System.currentTimeMillis(); types.add(inferredTypes); - Model mst = getMostSpecificTypes(individual, types); + Model mst = getMostSpecificTypes(individual, types, caches); rebuildModel.add(rewriteInferences(mst, aliasURI)); log.trace((System.currentTimeMillis() - start) + " to infer " + mst.size() + " mostSpecificTypes"); @@ -277,7 +284,23 @@ public class ABoxRecomputer { , RDFService.ModelSerializationFormat.N3); } + private Model getInferredTypes(Resource individual, Model assertedTypes, TypeCaches caches) { + if (caches == null) { + return getInferredTypes(individual, assertedTypes); + } + + TypeList key = new TypeList(assertedTypes, RDF.type); + Model inferredTypes = caches.getInferredTypesToModel(key, individual); + if (inferredTypes == null) { + inferredTypes = getInferredTypes(individual, assertedTypes); + caches.cacheInferredTypes(key, inferredTypes); + } + + return inferredTypes; + } + private Model getInferredTypes(Resource individual, Model assertedTypes) { + new TypeList(assertedTypes, RDF.type); String queryStr = "CONSTRUCT { \n" + " <" + individual.getURI() + "> a ?type \n" + "} WHERE { \n" + @@ -301,6 +324,21 @@ public class ABoxRecomputer { } } + private Model getMostSpecificTypes(Resource individual, Model assertedTypes, TypeCaches caches) { + if (caches == null) { + return getMostSpecificTypes(individual, assertedTypes); + } + + TypeList key = new TypeList(assertedTypes, RDF.type); + Model mostSpecificTypes = caches.getMostSpecificTypesToModel(key, individual); + if (mostSpecificTypes == null) { + mostSpecificTypes = getMostSpecificTypes(individual, assertedTypes); + caches.cacheMostSpecificTypes(key, mostSpecificTypes); + } + + return mostSpecificTypes; + } + private Model getMostSpecificTypes(Resource individual, Model assertedTypes) { String queryStr = "CONSTRUCT { \n" + " <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" + @@ -493,4 +531,114 @@ public class ABoxRecomputer { public void setStopRequested() { this.stopRequested = true; } + + /** + * Caches for types -> inferred types, and types -> most specific type + */ + private static class TypeCaches { + private Map inferredTypes = new HashMap(); + private Map mostSpecificTypes = new HashMap(); + + void cacheInferredTypes(TypeList key, Model model) { + inferredTypes.put(key, new TypeList(model, RDF.type)); + } + + Model getInferredTypesToModel(TypeList key, Resource individual) { + TypeList types = inferredTypes.get(key); + if (types != null) { + return types.constructModel(individual, RDF.type); + } + return null; + } + + void cacheMostSpecificTypes(TypeList key, Model model) { + mostSpecificTypes.put(key, new TypeList(model, model.createProperty(VitroVocabulary.MOST_SPECIFIC_TYPE))); + } + + Model getMostSpecificTypesToModel(TypeList key, Resource individual) { + TypeList types = mostSpecificTypes.get(key); + if (types != null) { + return types.constructModel(individual, VitroVocabulary.MOST_SPECIFIC_TYPE); + } + return null; + } + } + + /** + * Bundle of type URIs + */ + private static class TypeList { + private List typeUris = new ArrayList(); + + private Integer hashCode = null; + + /** + * Extract type uris - either RDF type or most specific type - from a Model + */ + TypeList(Model model, Property property) { + NodeIterator iterator = model.listObjectsOfProperty(property); + while (iterator.hasNext()) { + RDFNode node = iterator.next(); + String uri = node.asResource().getURI(); + if (!typeUris.contains(uri)) { + typeUris.add(uri); + } + } + } + + Model constructModel(Resource individual, Property property) { + Model model = ModelFactory.createDefaultModel(); + for (String uri : typeUris) { + model.add(individual, property, model.createResource(uri)); + } + + return model; + } + + Model constructModel(Resource individual, String property) { + Model model = ModelFactory.createDefaultModel(); + for (String uri : typeUris) { + model.add(individual, model.createProperty(property), model.createResource(uri)); + } + + return model; + } + + public void addUri(String uri) { + if (!typeUris.contains(uri)) { + typeUris.add(uri); + hashCode = null; + } + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof TypeList)) { + return false; + } + + TypeList otherKey = (TypeList)obj; + + if (typeUris.size() != otherKey.typeUris.size()) { + return false; + } + + return typeUris.containsAll(otherKey.typeUris); + } + + @Override + public int hashCode() { + if (hashCode == null) { + Collections.sort(typeUris); + StringBuilder builder = new StringBuilder(); + for (String key : typeUris) { + builder.append('<').append(key).append('>'); + } + + hashCode = builder.toString().hashCode(); + } + + return hashCode; + } + } }