[VIVO-1668] Efficiency improvements in AboxRecomputer (#108)

* Ensure we only use statements for the current individual in Recomputer * Make AboxRecomputer more efficient when acquiring the list of individuals Resolves: https://jira.duraspace.org/browse/VIVO-1668
2019-02-12 14:40:02 +00:00 · 2019-02-12 14:40:02 +00:00 · c3e8cc739b
commit c3e8cc739b
parent 6e717446b4
1 changed files with 29 additions and 21 deletions
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java
@ -14,6 +14,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Queue;
 import java.util.Set;
 import java.util.concurrent.atomic.AtomicBoolean;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@ -259,8 +260,8 @@ public class ABoxRecomputer {
        long start = System.currentTimeMillis();
        Model types = ModelFactory.createDefaultModel();
-        types.add(assertions.listStatements(null, RDF.type, (RDFNode) null));
+        types.add(assertions.listStatements(individual, RDF.type, (RDFNode) null));
-        types.add(rebuildModel.listStatements(null, RDF.type, (RDFNode) null));
+        types.add(rebuildModel.listStatements(individual, RDF.type, (RDFNode) null));
        Model inferredTypes = rewriteInferences(getInferredTypes(individual, types, caches), aliasURI);
        rebuildModel.add(inferredTypes);
        log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types");
@ -457,39 +458,46 @@ public class ABoxRecomputer {
        return individualURIs;
    }
-    protected void getIndividualURIs(String queryString, Queue<String> individuals) {
+    protected void getIndividualURIs(String queryString, final Queue<String> individuals) {
-        int batchSize = 50000;
+        final int batchSize = 50000;
        int offset = 0;
-        boolean done = false;
+        final AtomicBoolean done = new AtomicBoolean(false);
-        while (!done) {
+        while (!done.get()) {
            String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset;
            if(log.isDebugEnabled()) {
                log.debug(queryStr);
            }
            ResultSet results = null;
            try {
-                InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON);
+                rdfService.sparqlSelectQuery(queryStr, new ResultSetConsumer() {
-                results = ResultSetFactory.fromJSON(in);
+                    private int count = 0;
-            } catch (RDFServiceException e) {
+
-                throw new RuntimeException(e);
+                    @Override
-            }
+                    protected void processQuerySolution(QuerySolution qs) {
-            if (!results.hasNext()) {
+                        count++;
-                done = true;
+                        Resource resource = qs.getResource("s");
            }
            while (results.hasNext()) {
                QuerySolution solution = results.next();
                Resource resource = solution.getResource("s");
                        if ((resource != null) && !resource.isAnon()) {
                            individuals.add(resource.getURI());
                        }
                    }
                    @Override
                    protected void endProcessing() {
                        super.endProcessing();
                        if (count < batchSize) {
                            done.set(true);
                        }
                    }
                });
            } catch (RDFServiceException e) {
                throw new RuntimeException(e);
            }
            if(log.isDebugEnabled()) {
                log.debug(individuals.size() + " in set");
            }
            offset += batchSize;
        }
    }
    protected void addInferenceStatementsFor(String individualUri, Model addTo) throws RDFServiceException {