reduced memory use in getIndividualURIs() in ABox inference recomputation

This commit is contained in:
brianjlowe 2012-09-26 19:27:32 +00:00
parent 9cdffd0c09
commit 6314cd364d
2 changed files with 59 additions and 36 deletions

View file

@ -2,22 +2,20 @@
package edu.cornell.mannlib.vitro.webapp.reasoner; package edu.cornell.mannlib.vitro.webapp.reasoner;
import java.util.ArrayList; import java.io.InputStream;
import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator; import java.util.Iterator;
import java.util.Set;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntProperty; import com.hp.hpl.jena.ontology.OntProperty;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.query.ResultSetFactory;
import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Resource;
@ -29,6 +27,9 @@ import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDF;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
public class ABoxRecomputer { public class ABoxRecomputer {
private static final Log log = LogFactory.getLog(ABoxRecomputer.class); private static final Log log = LogFactory.getLog(ABoxRecomputer.class);
@ -38,6 +39,7 @@ public class ABoxRecomputer {
private Model inferenceModel; // ABox inferences private Model inferenceModel; // ABox inferences
private Model inferenceRebuildModel; // work area for recomputing all ABox inferences private Model inferenceRebuildModel; // work area for recomputing all ABox inferences
private Model scratchpadModel; // work area for recomputing all ABox inferences private Model scratchpadModel; // work area for recomputing all ABox inferences
private RDFService rdfService;
private SimpleReasoner simpleReasoner; private SimpleReasoner simpleReasoner;
private Object lock1 = new Object(); private Object lock1 = new Object();
@ -56,12 +58,14 @@ public class ABoxRecomputer {
Model inferenceModel, Model inferenceModel,
Model inferenceRebuildModel, Model inferenceRebuildModel,
Model scratchpadModel, Model scratchpadModel,
RDFService rdfService,
SimpleReasoner simpleReasoner) { SimpleReasoner simpleReasoner) {
this.tboxModel = tboxModel; this.tboxModel = tboxModel;
this.aboxModel = aboxModel; this.aboxModel = aboxModel;
this.inferenceModel = inferenceModel; this.inferenceModel = inferenceModel;
this.inferenceRebuildModel = inferenceRebuildModel; this.inferenceRebuildModel = inferenceRebuildModel;
this.scratchpadModel = scratchpadModel; this.scratchpadModel = scratchpadModel;
this.rdfService = rdfService;
this.simpleReasoner = simpleReasoner; this.simpleReasoner = simpleReasoner;
recomputing = false; recomputing = false;
stopRequested = false; stopRequested = false;
@ -116,7 +120,9 @@ public class ABoxRecomputer {
log.info("Computing class subsumption ABox inferences."); log.info("Computing class subsumption ABox inferences.");
int numStmts = 0; int numStmts = 0;
ArrayList<String> individuals = this.getAllIndividualURIs(); Collection<String> individuals = this.getAllIndividualURIs();
log.info("Recomputing inferences for " + individuals.size() + " individuals");
for (String individualURI : individuals) { for (String individualURI : individuals) {
Resource individual = ResourceFactory.createResource(individualURI); Resource individual = ResourceFactory.createResource(individualURI);
@ -143,6 +149,8 @@ public class ABoxRecomputer {
log.error("Exception while recomputing ABox inference model: ", je); log.error("Exception while recomputing ABox inference model: ", je);
} catch (Exception e) { } catch (Exception e) {
log.error("Exception while recomputing ABox inference model: ", e); log.error("Exception while recomputing ABox inference model: ", e);
} catch (OutOfMemoryError e) {
log.error(individualURI + " out of memory", e);
} }
numStmts++; numStmts++;
@ -274,38 +282,52 @@ public class ABoxRecomputer {
/* /*
* Get the URIs for all individuals in the system * Get the URIs for all individuals in the system
*/ */
protected ArrayList<String> getAllIndividualURIs() { protected Collection<String> getAllIndividualURIs() {
String queryString = "select distinct ?subject where {?subject <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> ?type}"; String queryString = "select ?s where {?s a ?type}";
return getIndividualURIs(queryString); return getIndividualURIs(queryString);
} }
protected ArrayList<String> getIndividualURIs(String queryString) { protected Collection<String> getIndividualURIs(String queryString) {
ArrayList<String> individuals = new ArrayList<String>(); Set<String> individuals = new HashSet<String>();
aboxModel.enterCriticalSection(Lock.READ);
try { int batchSize = 50000;
try { int offset = 0;
Query query = QueryFactory.create(queryString, Syntax.syntaxARQ); boolean done = false;
QueryExecution qe = QueryExecutionFactory.create(query, aboxModel);
ResultSet results = qe.execSelect(); while (!done) {
String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset;
if(log.isDebugEnabled()) {
log.debug(queryStr);
}
while (results.hasNext()) { ResultSet results = null;
QuerySolution solution = results.next();
Resource resource = solution.getResource("subject");
if ((resource != null) && !resource.isAnon()) { try {
individuals.add(resource.getURI()); InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON);
} results = ResultSetFactory.fromJSON(in);
} } catch (RDFServiceException e) {
throw new RuntimeException(e);
}
} catch (Exception e) { if (!results.hasNext()) {
log.error("exception while retrieving list of individuals ",e); done = true;
} }
} finally {
aboxModel.leaveCriticalSection(); while (results.hasNext()) {
QuerySolution solution = results.next();
Resource resource = solution.getResource("s");
if ((resource != null) && !resource.isAnon()) {
individuals.add(resource.getURI());
}
}
if(log.isDebugEnabled()) {
log.info(individuals.size() + " in set");
}
offset += batchSize;
} }
return individuals; return individuals;

View file

@ -41,6 +41,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.event.BulkUpdateEvent; import edu.cornell.mannlib.vitro.webapp.dao.jena.event.BulkUpdateEvent;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel;
/** /**
* Allows for real-time incremental materialization or retraction of RDFS- * Allows for real-time incremental materialization or retraction of RDFS-
@ -102,7 +103,7 @@ public class SimpleReasoner extends StatementListener {
this.batchMode = 0; this.batchMode = 0;
aBoxDeltaModeler1 = new CumulativeDeltaModeler(); aBoxDeltaModeler1 = new CumulativeDeltaModeler();
aBoxDeltaModeler2 = new CumulativeDeltaModeler(); aBoxDeltaModeler2 = new CumulativeDeltaModeler();
recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,inferenceRebuildModel,scratchpadModel,this); recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,inferenceRebuildModel,scratchpadModel,rdfService,this);
stopRequested = false; stopRequested = false;
if (rdfService == null) { if (rdfService == null) {
@ -134,7 +135,7 @@ public class SimpleReasoner extends StatementListener {
aBoxDeltaModeler2 = new CumulativeDeltaModeler(); aBoxDeltaModeler2 = new CumulativeDeltaModeler();
this.batchMode = 0; this.batchMode = 0;
stopRequested = false; stopRequested = false;
recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,ModelFactory.createDefaultModel(),ModelFactory.createDefaultModel(),this); recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,ModelFactory.createDefaultModel(), ModelFactory.createDefaultModel(), new RDFServiceModel(aboxModel), this);
} }
public void setPluginList(List<ReasonerPlugin> pluginList) { public void setPluginList(List<ReasonerPlugin> pluginList) {