NIHVIVO-3978 chunking statement iteration to avoid OutOfMemoryErrors when recomputing inferences
This commit is contained in:
parent
b7e2c71cc1
commit
ec06bfd240
1 changed files with 102 additions and 21 deletions
|
@ -18,6 +18,7 @@ import com.hp.hpl.jena.query.QuerySolution;
|
||||||
import com.hp.hpl.jena.query.ResultSet;
|
import com.hp.hpl.jena.query.ResultSet;
|
||||||
import com.hp.hpl.jena.query.ResultSetFactory;
|
import com.hp.hpl.jena.query.ResultSetFactory;
|
||||||
import com.hp.hpl.jena.rdf.model.Model;
|
import com.hp.hpl.jena.rdf.model.Model;
|
||||||
|
import com.hp.hpl.jena.rdf.model.ModelFactory;
|
||||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||||
import com.hp.hpl.jena.rdf.model.Resource;
|
import com.hp.hpl.jena.rdf.model.Resource;
|
||||||
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
||||||
|
@ -30,6 +31,9 @@ import com.hp.hpl.jena.vocabulary.RDF;
|
||||||
|
|
||||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
|
||||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.servlet.setup.JenaDataSourceSetupBase;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.servlet.setup.SimpleReasonerSetup;
|
||||||
|
|
||||||
public class ABoxRecomputer {
|
public class ABoxRecomputer {
|
||||||
|
|
||||||
|
@ -278,6 +282,7 @@ public class ABoxRecomputer {
|
||||||
log.error("Exception while reconciling the current and recomputed ABox inference model for class subsumption inferences. Halting processing." , e);
|
log.error("Exception while reconciling the current and recomputed ABox inference model for class subsumption inferences. Halting processing." , e);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
e.printStackTrace();
|
||||||
log.error("Exception while recomputing ABox inferences. Halting processing.", e);
|
log.error("Exception while recomputing ABox inferences. Halting processing.", e);
|
||||||
} finally {
|
} finally {
|
||||||
inferenceRebuildModel.removeAll();
|
inferenceRebuildModel.removeAll();
|
||||||
|
@ -362,20 +367,21 @@ public class ABoxRecomputer {
|
||||||
* reconcile a set of inferences into the application inference model
|
* reconcile a set of inferences into the application inference model
|
||||||
*/
|
*/
|
||||||
protected boolean updateInferenceModel(Model inferenceRebuildModel) {
|
protected boolean updateInferenceModel(Model inferenceRebuildModel) {
|
||||||
|
|
||||||
log.info("Updating ABox inference model");
|
log.info("Updating ABox inference model");
|
||||||
StmtIterator iter = null;
|
Iterator<Statement> iter = null;
|
||||||
|
|
||||||
// Remove everything from the current inference model that is not
|
// Remove everything from the current inference model that is not
|
||||||
// in the recomputed inference model
|
// in the recomputed inference model
|
||||||
int num = 0;
|
int num = 0;
|
||||||
scratchpadModel.enterCriticalSection(Lock.WRITE);
|
scratchpadModel.enterCriticalSection(Lock.WRITE);
|
||||||
scratchpadModel.removeAll();
|
scratchpadModel.removeAll();
|
||||||
|
log.info("Updating ABox inference model (checking for outdated inferences)");
|
||||||
try {
|
try {
|
||||||
inferenceModel.enterCriticalSection(Lock.READ);
|
inferenceModel.enterCriticalSection(Lock.READ);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
iter = inferenceModel.listStatements();
|
iter = listModelStatements(inferenceModel, JenaDataSourceSetupBase.JENA_INF_MODEL);
|
||||||
|
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
Statement stmt = iter.next();
|
Statement stmt = iter.next();
|
||||||
if (!inferenceRebuildModel.contains(stmt)) {
|
if (!inferenceRebuildModel.contains(stmt)) {
|
||||||
|
@ -384,7 +390,7 @@ public class ABoxRecomputer {
|
||||||
|
|
||||||
num++;
|
num++;
|
||||||
if ((num % 10000) == 0) {
|
if ((num % 10000) == 0) {
|
||||||
log.info("Still updating ABox inference model (removing outdated inferences)...");
|
log.info("Still updating ABox inference model (checking for outdated inferences)...");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stopRequested) {
|
if (stopRequested) {
|
||||||
|
@ -392,14 +398,14 @@ public class ABoxRecomputer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (iter != null) {
|
// if (iter != null) {
|
||||||
iter.close();
|
// iter.close();
|
||||||
}
|
// }
|
||||||
inferenceModel.leaveCriticalSection();
|
inferenceModel.leaveCriticalSection();
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
iter = scratchpadModel.listStatements();
|
iter = listModelStatements(scratchpadModel, SimpleReasonerSetup.JENA_INF_MODEL_SCRATCHPAD);
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
Statement stmt = iter.next();
|
Statement stmt = iter.next();
|
||||||
|
|
||||||
|
@ -411,16 +417,17 @@ public class ABoxRecomputer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (iter != null) {
|
// if (iter != null) {
|
||||||
iter.close();
|
// iter.close();
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add everything from the recomputed inference model that is not already
|
// Add everything from the recomputed inference model that is not already
|
||||||
// in the current inference model to the current inference model.
|
// in the current inference model to the current inference model.
|
||||||
try {
|
try {
|
||||||
scratchpadModel.removeAll();
|
scratchpadModel.removeAll();
|
||||||
iter = inferenceRebuildModel.listStatements();
|
log.info("Updating ABox inference model (adding any new inferences)");
|
||||||
|
iter = listModelStatements(inferenceRebuildModel, SimpleReasonerSetup.JENA_INF_MODEL_REBUILD);
|
||||||
|
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
Statement stmt = iter.next();
|
Statement stmt = iter.next();
|
||||||
|
@ -436,7 +443,7 @@ public class ABoxRecomputer {
|
||||||
|
|
||||||
num++;
|
num++;
|
||||||
if ((num % 10000) == 0) {
|
if ((num % 10000) == 0) {
|
||||||
log.info("Still updating ABox inference model (adding new inferences)...");
|
log.info("Still updating ABox inference model (adding any new inferences)...");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (stopRequested) {
|
if (stopRequested) {
|
||||||
|
@ -444,12 +451,12 @@ public class ABoxRecomputer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (iter != null) {
|
// if (iter != null) {
|
||||||
iter.close();
|
// iter.close();
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
iter = scratchpadModel.listStatements();
|
iter = listModelStatements(scratchpadModel, SimpleReasonerSetup.JENA_INF_MODEL_SCRATCHPAD);
|
||||||
try {
|
try {
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
Statement stmt = iter.next();
|
Statement stmt = iter.next();
|
||||||
|
@ -462,9 +469,9 @@ public class ABoxRecomputer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
if (iter != null) {
|
// if (iter != null) {
|
||||||
iter.close();
|
// iter.close();
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
scratchpadModel.removeAll();
|
scratchpadModel.removeAll();
|
||||||
|
@ -475,6 +482,80 @@ public class ABoxRecomputer {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private Iterator<Statement> listModelStatements(Model model, String graphURI) {
|
||||||
|
// the RDFServices supplied by the unit tests won't have the right
|
||||||
|
// named graphs. So if the graphURI-based chunked iterator is empty,
|
||||||
|
// we'll try listStatements() on the model instead.
|
||||||
|
Iterator<Statement> it = new ChunkedStatementIterator(graphURI);
|
||||||
|
if (it.hasNext()) {
|
||||||
|
return it;
|
||||||
|
} else {
|
||||||
|
return model.listStatements();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// avoids OutOfMemory errors by retrieving triples in batches
|
||||||
|
private class ChunkedStatementIterator implements Iterator<Statement> {
|
||||||
|
|
||||||
|
final int CHUNK_SIZE = 50000;
|
||||||
|
private int offset = 0;
|
||||||
|
|
||||||
|
private String queryString;
|
||||||
|
|
||||||
|
private Model temp = ModelFactory.createDefaultModel();
|
||||||
|
private StmtIterator tempIt;
|
||||||
|
|
||||||
|
public ChunkedStatementIterator(String graphURI) {
|
||||||
|
this.queryString = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <" +
|
||||||
|
graphURI + "> { ?s ?p ?o } }";
|
||||||
|
}
|
||||||
|
|
||||||
|
public Statement next() {
|
||||||
|
if (tempIt.hasNext()) {
|
||||||
|
return tempIt.nextStatement();
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void remove() {
|
||||||
|
throw new UnsupportedOperationException(this.getClass().getName() +
|
||||||
|
" does not support .remove()");
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
if (tempIt != null && tempIt.hasNext()) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
getNextChunk();
|
||||||
|
if (temp.size() > 0) {
|
||||||
|
tempIt = temp.listStatements();
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void getNextChunk() {
|
||||||
|
|
||||||
|
String chunkQueryString = queryString + " LIMIT " + CHUNK_SIZE + " OFFSET " + offset;
|
||||||
|
offset += CHUNK_SIZE;
|
||||||
|
|
||||||
|
try {
|
||||||
|
InputStream in = rdfService.sparqlConstructQuery(
|
||||||
|
chunkQueryString, RDFService.ModelSerializationFormat.NTRIPLE);
|
||||||
|
temp.removeAll();
|
||||||
|
temp.add(RDFServiceUtils.parseModel(
|
||||||
|
in, RDFService.ModelSerializationFormat.NTRIPLE));
|
||||||
|
} catch (RDFServiceException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is called when the application shuts down.
|
* This is called when the application shuts down.
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Add table
Reference in a new issue