Merge branch 'develop' into feature/searchIndexerTry

This commit is contained in:
Jim Blake 2015-01-09 16:41:36 -05:00
commit 99c03ce49a
5 changed files with 505 additions and 529 deletions

View file

@ -27,6 +27,7 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService {
private final static Log log = LogFactory.getLog(RDFServiceModel.class); private final static Log log = LogFactory.getLog(RDFServiceModel.class);
private Model model; private Model model;
private Dataset dataset;
private String modelName; private String modelName;
/** /**
@ -37,14 +38,27 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService {
this.model = model; this.model = model;
} }
/**
* Create an RDFService to access a Jena Dataset
* @param dataset
*/
public RDFServiceModel(Dataset dataset) {
this.dataset = dataset;
}
@Override @Override
protected DatasetWrapper getDatasetWrapper() { protected DatasetWrapper getDatasetWrapper() {
Dataset d = DatasetFactory.createMem(); Dataset d = null;
if (dataset != null) {
d = dataset;
} else {
d = DatasetFactory.createMem();
if (modelName == null) { if (modelName == null) {
d.setDefaultModel(this.model); d.setDefaultModel(this.model);
} else { } else {
d.addNamedModel(this.modelName, model); d.addNamedModel(this.modelName, model);
} }
}
DatasetWrapper datasetWrapper = new DatasetWrapper(d); DatasetWrapper datasetWrapper = new DatasetWrapper(d);
return datasetWrapper; return datasetWrapper;
} }
@ -75,7 +89,16 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService {
modelChange.setSerializedModel(new ByteArrayInputStream(bytes)); modelChange.setSerializedModel(new ByteArrayInputStream(bytes));
} }
modelChange.getSerializedModel().mark(Integer.MAX_VALUE); modelChange.getSerializedModel().mark(Integer.MAX_VALUE);
operateOnModel(model, modelChange, null); Model m = this.model;
if (m == null && dataset != null) {
String changeGraphURI = modelChange.getGraphURI();
if (changeGraphURI != null) {
m = dataset.getNamedModel(changeGraphURI);
} else {
m = dataset.getDefaultModel();
}
}
operateOnModel(m, modelChange, null);
} }
// notify listeners of triple changes // notify listeners of triple changes

View file

@ -595,13 +595,13 @@ public class RDFServiceSparql extends RDFServiceImpl implements RDFService {
private void performChange(ModelChange modelChange) throws RDFServiceException { private void performChange(ModelChange modelChange) throws RDFServiceException {
Model model = parseModel(modelChange); Model model = parseModel(modelChange);
if (modelChange.getOperation() == ModelChange.Operation.ADD) {
Model[] separatedModel = separateStatementsWithBlankNodes(model); Model[] separatedModel = separateStatementsWithBlankNodes(model);
if (modelChange.getOperation() == ModelChange.Operation.ADD) {
addModel(separatedModel[1], modelChange.getGraphURI()); addModel(separatedModel[1], modelChange.getGraphURI());
addBlankNodesWithSparqlUpdate(separatedModel[0], modelChange.getGraphURI()); addBlankNodesWithSparqlUpdate(separatedModel[0], modelChange.getGraphURI());
} else if (modelChange.getOperation() == ModelChange.Operation.REMOVE) { } else if (modelChange.getOperation() == ModelChange.Operation.REMOVE) {
deleteModel(model, modelChange.getGraphURI()); deleteModel(separatedModel[1], modelChange.getGraphURI());
removeBlankNodesWithSparqlUpdate(model, modelChange.getGraphURI()); removeBlankNodesWithSparqlUpdate(separatedModel[0], modelChange.getGraphURI());
} else { } else {
log.error("unrecognized operation type"); log.error("unrecognized operation type");
} }

View file

@ -2,6 +2,8 @@
package edu.cornell.mannlib.vitro.webapp.reasoner; package edu.cornell.mannlib.vitro.webapp.reasoner;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
@ -14,7 +16,10 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntProperty; import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFactory; import com.hp.hpl.jena.query.ResultSetFactory;
@ -25,56 +30,51 @@ import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.JenaException;
import com.hp.hpl.jena.shared.Lock; import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF; import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.JenaDataSourceSetupBase;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.SimpleReasonerSetup;
public class ABoxRecomputer { public class ABoxRecomputer {
private static final Log log = LogFactory.getLog(ABoxRecomputer.class); private static final Log log = LogFactory.getLog(ABoxRecomputer.class);
private OntModel tboxModel; // asserted and inferred TBox axioms private OntModel tboxModel; // asserted and inferred TBox axioms
private OntModel aboxModel; // ABox assertions private OntModel aboxModel;
private Model inferenceModel; // ABox inferences
private Model inferenceRebuildModel; // work area for recomputing all ABox inferences
private Model scratchpadModel; // work area for recomputing all ABox inferences
private RDFService rdfService; private RDFService rdfService;
private SimpleReasoner simpleReasoner; private SimpleReasoner simpleReasoner;
private Object lock1 = new Object(); private Object lock1 = new Object();
private volatile boolean recomputing = false; private volatile boolean recomputing = false;
private boolean stopRequested = false; private boolean stopRequested = false;
private boolean handleSameAs = false;
private final int BATCH_SIZE = 100;
private final int REPORTING_INTERVAL = 1000;
/** /**
* @param tboxModel - input. This model contains both asserted and inferred TBox axioms * @param tboxModel - input. This model contains both asserted and inferred TBox axioms
* @param aboxModel - input. This model contains asserted ABox statements * @param aboxModel - input. This model contains asserted ABox statements
* @param inferenceModel - output. This is the model in which inferred (materialized) ABox statements are maintained (added or retracted). * @param inferenceModel - output. This is the model in which inferred (materialized) ABox statements are maintained (added or retracted).
* @param inferenceRebuildModel - output. This the model is temporarily used when the whole ABox inference model is rebuilt
* @param inferenceScratchpadModel - output. This the model is temporarily used when the whole ABox inference model is rebuilt
*/ */
public ABoxRecomputer(OntModel tboxModel, public ABoxRecomputer(OntModel tboxModel,
OntModel aboxModel, OntModel aboxModel,
Model inferenceModel,
Model inferenceRebuildModel,
Model scratchpadModel,
RDFService rdfService, RDFService rdfService,
SimpleReasoner simpleReasoner) { SimpleReasoner simpleReasoner) {
this.tboxModel = tboxModel; this.tboxModel = tboxModel;
this.aboxModel = aboxModel; this.aboxModel = aboxModel;
this.inferenceModel = inferenceModel;
this.inferenceRebuildModel = inferenceRebuildModel;
this.scratchpadModel = scratchpadModel;
this.rdfService = rdfService; this.rdfService = rdfService;
this.simpleReasoner = simpleReasoner; this.simpleReasoner = simpleReasoner;
recomputing = false; recomputing = false;
stopRequested = false; stopRequested = false;
handleSameAs = simpleReasoner.getSameAsEnabled();
} }
/** /**
@ -89,7 +89,6 @@ public class ABoxRecomputer {
* Recompute all inferences. * Recompute all inferences.
*/ */
public void recompute() { public void recompute() {
synchronized (lock1) { synchronized (lock1) {
if (recomputing) { if (recomputing) {
return; return;
@ -97,7 +96,6 @@ public class ABoxRecomputer {
recomputing = true; recomputing = true;
} }
} }
try { try {
recomputeABox(); recomputeABox();
} finally { } finally {
@ -107,207 +105,250 @@ public class ABoxRecomputer {
} }
} }
// don't check for existing inferences in the rebuild model
private boolean DO_CHECK = true;
/* /*
* Recompute the entire ABox inference graph. The new * Recompute the entire ABox inference graph.
* inference graph is built in a separate model and
* then reconciled with the inference graph in active
* use. The model reconciliation must be done
* without reading the whole inference models into
* memory in order to support very large ABox
* inference models.
*/ */
protected void recomputeABox() { protected void recomputeABox() {
log.info("Recomputing ABox inferences.");
// recompute class subsumption inferences log.info("Finding individuals in ABox.");
inferenceRebuildModel.enterCriticalSection(Lock.WRITE);
try {
log.info("Clearing inference rebuild model.");
HashSet<String> unknownTypes = new HashSet<String>();
inferenceRebuildModel.removeAll();
log.info("Computing class subsumption ABox inferences.");
int numStmts = 0;
Collection<String> individuals = this.getAllIndividualURIs(); Collection<String> individuals = this.getAllIndividualURIs();
log.info("Recomputing inferences for " + individuals.size() + " individuals"); log.info("Recomputing inferences for " + individuals.size() + " individuals");
long start = System.currentTimeMillis(); long start = System.currentTimeMillis();
int numInds = 0;
Model rebuildModel = ModelFactory.createDefaultModel();
Model additionalInferences = ModelFactory.createDefaultModel();
List<String> individualsInBatch = new ArrayList<String>();
Iterator<String> individualIt = individuals.iterator();
while (individualIt.hasNext()) {
String individualURI = individualIt.next();
try {
additionalInferences.add(recomputeIndividual(
individualURI, rebuildModel));
numInds++;
individualsInBatch.add(individualURI);
boolean batchFilled = (numInds % BATCH_SIZE) == 0;
boolean reportingInterval = (numInds % REPORTING_INTERVAL) == 0;
if (batchFilled || !individualIt.hasNext()) {
log.debug(rebuildModel.size() + " total inferences");
updateInferenceModel(rebuildModel, individualsInBatch);
rebuildModel.removeAll();
individualsInBatch.clear();
}
if (reportingInterval) {
log.info("Still recomputing inferences ("
+ numInds + "/" + individuals.size() + " individuals)");
log.info((System.currentTimeMillis() - start) / numInds + " ms per individual");
}
if (stopRequested) {
log.info("a stopRequested signal was received during recomputeABox. Halting Processing.");
return;
}
} catch (Exception e) {
log.error("Error recomputing inferences for individual <" + individualURI + ">", e);
}
}
if(additionalInferences.size() > 0) {
log.info("Writing additional inferences generated by reasoner plugins.");
ChangeSet change = rdfService.manufactureChangeSet();
change.addAddition(makeN3InputStream(additionalInferences),
RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES);
try {
rdfService.changeSetUpdate(change);
} catch (RDFServiceException e) {
log.error("Unable to write additional inferences from reasoner plugins", e);
}
}
log.info("Finished recomputing inferences");
}
for (String individualURI : individuals) { private static final boolean RUN_PLUGINS = true;
private static final boolean SKIP_PLUGINS = !RUN_PLUGINS;
private Model recomputeIndividual(String individualURI,
Model rebuildModel) throws RDFServiceException {
long start = System.currentTimeMillis();
Model assertions = getAssertions(individualURI);
log.trace((System.currentTimeMillis() - start) + " ms to get assertions.");
Model additionalInferences = recomputeIndividual(
individualURI, null, assertions, rebuildModel, RUN_PLUGINS);
if (handleSameAs) {
Set<String> sameAsInds = getSameAsIndividuals(individualURI);
for (String sameAsInd : sameAsInds) {
// sameAs for plugins is handled by the SimpleReasoner
Model sameAsIndAssertions = getAssertions(sameAsInd);
recomputeIndividual(
sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, SKIP_PLUGINS);
rebuildModel.add(
rewriteInferences(getAssertions(sameAsInd), individualURI));
Resource indRes = ResourceFactory.createResource(individualURI);
Resource sameAsIndRes = ResourceFactory.createResource(sameAsInd);
if(!assertions.contains(indRes, OWL.sameAs, sameAsIndRes)) {
rebuildModel.add(indRes, OWL.sameAs, sameAsIndRes);
}
}
}
return additionalInferences;
}
/**
* Adds inferences to temporary rebuildmodel
* @param individualURI
* @param rebuildModel
* @return any additional inferences produced by plugins that affect other
* individuals
*/
private Model recomputeIndividual(String individualURI, String aliasURI,
Model assertions, Model rebuildModel, boolean runPlugins)
throws RDFServiceException {
Model additionalInferences = ModelFactory.createDefaultModel();
Resource individual = ResourceFactory.createResource(individualURI); Resource individual = ResourceFactory.createResource(individualURI);
try { long start = System.currentTimeMillis();
addedABoxTypeAssertion(individual, inferenceRebuildModel, unknownTypes, DO_CHECK); Model types = ModelFactory.createDefaultModel();
simpleReasoner.setMostSpecificTypes(individual, inferenceRebuildModel, unknownTypes); types.add(assertions.listStatements(null, RDF.type, (RDFNode) null));
Model inferredTypes = rewriteInferences(getInferredTypes(individual, types), aliasURI);
rebuildModel.add(inferredTypes);
log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types");
start = System.currentTimeMillis();
types.add(inferredTypes);
Model mst = getMostSpecificTypes(individual, types);
rebuildModel.add(rewriteInferences(mst, aliasURI));
log.trace((System.currentTimeMillis() - start) + " to infer " + mst.size() + " mostSpecificTypes");
start = System.currentTimeMillis();
Model inferredInvs = getInferredInverseStatements(individualURI);
inferredInvs.remove(assertions);
rebuildModel.add(rewriteInferences(inferredInvs, aliasURI));
log.trace((System.currentTimeMillis() - start) + " to infer " + inferredInvs.size() + " inverses");
List<ReasonerPlugin> pluginList = simpleReasoner.getPluginList(); List<ReasonerPlugin> pluginList = simpleReasoner.getPluginList();
if (pluginList.size() > 0) { if (runPlugins && pluginList.size() > 0) {
StmtIterator sit = aboxModel.listStatements(individual, null, (RDFNode) null); Model tmpModel = ModelFactory.createDefaultModel();
StmtIterator sit = assertions.listStatements();
while (sit.hasNext()) { while (sit.hasNext()) {
Statement s = sit.nextStatement(); Statement s = sit.nextStatement();
for (ReasonerPlugin plugin : pluginList) { for (ReasonerPlugin plugin : pluginList) {
plugin.addedABoxStatement(s, aboxModel, inferenceRebuildModel, tboxModel); plugin.addedABoxStatement(s, aboxModel, tmpModel, tboxModel);
}
}
StmtIterator tmpIt = tmpModel.listStatements();
while(tmpIt.hasNext()) {
Statement tmpStmt = tmpIt.nextStatement();
if(individual.equals(tmpStmt.getSubject())) {
rebuildModel.add(tmpStmt);
} else {
additionalInferences.add(tmpStmt);
} }
} }
} }
} catch (NullPointerException npe) { return additionalInferences;
log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation.");
npe.printStackTrace();
return;
} catch (JenaException je) {
if (je.getMessage().equals("Statement models must no be null")) {
log.error("Exception while recomputing ABox inference model. Halting inference computation.", je);
return;
}
log.error("Exception while recomputing ABox inference model: ", je);
} catch (Exception e) {
log.error("Exception while recomputing ABox inference model: ", e);
} catch (OutOfMemoryError e) {
log.error(individualURI + " out of memory", e);
} }
numStmts++; private Model getAssertions(String individualURI) throws RDFServiceException {
if ((numStmts % 1000) == 0) { String queryStr = "CONSTRUCT { \n" +
log.info("Still computing class subsumption ABox inferences (" " <" + individualURI + "> ?p ?value \n" +
+ numStmts + "/" + individuals.size() + " individuals)"); "} WHERE { \n" +
log.info((System.currentTimeMillis() - start) / 1000 + " ms per individual"); " GRAPH ?g { \n" +
start = System.currentTimeMillis(); " <" + individualURI + "> ?p ?value \n" +
" } \n" +
" FILTER (?g != <" + ModelNames.ABOX_INFERENCES + ">)\n" +
"} \n";
return RDFServiceUtils.parseModel(
rdfService.sparqlConstructQuery(
queryStr, RDFService.ModelSerializationFormat.N3)
, RDFService.ModelSerializationFormat.N3);
} }
if (stopRequested) { private Model getInferredTypes(Resource individual, Model assertedTypes) {
log.info("a stopRequested signal was received during recomputeABox. Halting Processing."); String queryStr = "CONSTRUCT { \n" +
return; " <" + individual.getURI() + "> a ?type \n" +
} "} WHERE { \n" +
} " <" + individual.getURI() + "> a ?assertedType .\n" +
" { ?assertedType <" + RDFS.subClassOf.getURI() + "> ?type } \n" +
log.info("Finished computing class subsumption ABox inferences"); " UNION \n" +
log.info("Computing inverse property ABox inferences"); " { ?assertedType <" + OWL.equivalentClass.getURI() + "> ?type } \n" +
" FILTER (isURI(?type)) \n" +
Iterator<Statement> invStatements = null; " FILTER NOT EXISTS { \n" +
" <" + individual.getURI() + "> a ?type \n" +
" } \n" +
"} \n";
Model union = ModelFactory.createUnion(assertedTypes, tboxModel);
tboxModel.enterCriticalSection(Lock.READ); tboxModel.enterCriticalSection(Lock.READ);
try { try {
invStatements = tboxModel.listStatements((Resource) null, OWL.inverseOf, (Resource) null); Query q = QueryFactory.create(queryStr);
QueryExecution qe = QueryExecutionFactory.create(q, union);
return qe.execConstruct();
} finally { } finally {
tboxModel.leaveCriticalSection(); tboxModel.leaveCriticalSection();
} }
}
numStmts = 0; private Model getMostSpecificTypes(Resource individual, Model assertedTypes) {
while (invStatements.hasNext()) { String queryStr = "CONSTRUCT { \n" +
Statement stmt = invStatements.next(); " <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" +
"} WHERE { \n" +
" <" + individual.getURI() + "> a ?type .\n" +
" FILTER (isURI(?type)) \n" +
" FILTER NOT EXISTS { \n" +
" <" + individual.getURI() + "> a ?type2 . \n" +
" ?type2 <" + RDFS.subClassOf.getURI() + "> ?type. \n" +
" } \n" +
" FILTER NOT EXISTS { \n" +
" <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" +
" } \n" +
"} \n";
Model union = ModelFactory.createUnion(assertedTypes, tboxModel);
tboxModel.enterCriticalSection(Lock.READ);
try { try {
OntProperty prop1 = tboxModel.getOntProperty((stmt.getSubject()).getURI()); Query q = QueryFactory.create(queryStr);
if (prop1 == null) { QueryExecution qe = QueryExecutionFactory.create(q, union);
//TODO make sure not to print out a million of these for the same property return qe.execConstruct();
log.debug("didn't find subject property in the tbox: " + (stmt.getSubject()).getURI());
continue;
}
OntProperty prop2 = tboxModel.getOntProperty(((Resource)stmt.getObject()).getURI());
if (prop2 == null) {
//TODO make sure not to print out a million of these for the same property
log.debug("didn't find object property in the tbox: " + ((Resource)stmt.getObject()).getURI());
continue;
}
simpleReasoner.addedInverseProperty(prop1, prop2, inferenceRebuildModel);
} catch (NullPointerException npe) {
log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation.");
npe.printStackTrace();
return;
} catch (JenaException je) {
if (je.getMessage().equals("Statement models must no be null")) {
log.error("Exception while recomputing ABox inference model. Halting inference computation.", je);
return;
}
log.error("Exception while recomputing ABox inference model: ", je);
} catch (Exception e) {
log.error("Exception while recomputing ABox inference model: ", e);
}
numStmts++;
if ((numStmts % 10000) == 0) {
log.info("Still computing inverse property ABox inferences...");
}
if (stopRequested) {
log.info("a stopRequested signal was received during recomputeABox. Halting Processing.");
return;
}
}
log.info("Finished computing inverse property ABox inferences");
log.info("Computing sameAs ABox inferences");
Iterator<Statement> sameAsStatements = null;
aboxModel.enterCriticalSection(Lock.READ);
try {
sameAsStatements = aboxModel.listStatements((Resource) null, OWL.sameAs, (Resource) null);
} finally { } finally {
aboxModel.leaveCriticalSection(); tboxModel.leaveCriticalSection();
}
} }
numStmts = 0; private Model getInferredInverseStatements(String individualURI) throws RDFServiceException {
while (sameAsStatements.hasNext()) { String queryStr = "CONSTRUCT { \n" +
Statement stmt = sameAsStatements.next(); " <" + individualURI + "> ?inv ?value \n" +
"} WHERE { \n" +
try { " GRAPH ?gr { \n" +
simpleReasoner.addedABoxSameAsAssertion(stmt, inferenceRebuildModel); " ?value ?prop <" + individualURI + "> \n" +
} catch (NullPointerException npe) { " } \n" +
log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation."); " FILTER (isURI(?value)) \n" +
npe.printStackTrace(); " FILTER (?gr != <" + ModelNames.ABOX_INFERENCES + ">) \n" +
return; " { ?prop <" + OWL.inverseOf.getURI() + "> ?inv } \n" +
} catch (JenaException je) { " UNION \n" +
if (je.getMessage().equals("Statement models must no be null")) { " { ?inv <" + OWL.inverseOf.getURI() + "> ?prop } \n" +
log.error("Exception while recomputing ABox inference model. Halting inference computation.", je); "} \n";
return; return RDFServiceUtils.parseModel(
} rdfService.sparqlConstructQuery(
log.error("Exception while recomputing ABox inference model: ", je); queryStr, RDFService.ModelSerializationFormat.N3)
} catch (Exception e) { , RDFService.ModelSerializationFormat.N3);
log.error("Exception while recomputing ABox inference model: ", e);
} }
numStmts++; private Model rewriteInferences(Model inferences, String aliasURI) {
if ((numStmts % 10000) == 0) { if (aliasURI == null) {
log.info("Still computing sameAs ABox inferences..."); return inferences;
} }
Model rewrite = ModelFactory.createDefaultModel();
if (stopRequested) { Resource alias = ResourceFactory.createResource(aliasURI);
log.info("a stopRequested signal was received during recomputeABox. Halting Processing."); StmtIterator sit = inferences.listStatements();
return; while(sit.hasNext()) {
} Statement stmt = sit.nextStatement();
} rewrite.add(alias, stmt.getPredicate(), stmt.getObject());
log.info("Finished computing sameAs ABox inferences");
try {
if (updateInferenceModel(inferenceRebuildModel, individuals)) {
log.info("a stopRequested signal was received during updateInferenceModel. Halting Processing.");
return;
}
} catch (Exception e) {
log.error("Exception while reconciling the current and recomputed ABox inference model for class subsumption inferences. Halting processing." , e);
}
} catch (Exception e) {
e.printStackTrace();
log.error("Exception while recomputing ABox inferences. Halting processing.", e);
} finally {
inferenceRebuildModel.removeAll();
inferenceRebuildModel.leaveCriticalSection();
} }
return rewrite;
} }
/* /*
* Get the URIs for all individuals in the system * Get the URIs for all individuals in the system
*/ */
protected Collection<String> getAllIndividualURIs() { protected Collection<String> getAllIndividualURIs() {
HashSet<String> individualURIs = new HashSet<String>(); HashSet<String> individualURIs = new HashSet<String>();
List<String> classList = new ArrayList<String>(); List<String> classList = new ArrayList<String>();
tboxModel.enterCriticalSection(Lock.READ); tboxModel.enterCriticalSection(Lock.READ);
try { try {
StmtIterator classIt = tboxModel.listStatements( StmtIterator classIt = tboxModel.listStatements(
@ -323,40 +364,32 @@ public class ABoxRecomputer {
} finally { } finally {
tboxModel.leaveCriticalSection(); tboxModel.leaveCriticalSection();
} }
for (String classURI : classList) { for (String classURI : classList) {
String queryString = "SELECT ?s WHERE { ?s a <" + classURI + "> } "; String queryString = "SELECT ?s WHERE { ?s a <" + classURI + "> } ";
getIndividualURIs(queryString, individualURIs); getIndividualURIs(queryString, individualURIs);
} }
return individualURIs; return individualURIs;
} }
protected void getIndividualURIs(String queryString, Set<String> individuals) { protected void getIndividualURIs(String queryString, Set<String> individuals) {
int batchSize = 50000; int batchSize = 50000;
int offset = 0; int offset = 0;
boolean done = false; boolean done = false;
while (!done) { while (!done) {
String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset; String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset;
if(log.isDebugEnabled()) { if(log.isDebugEnabled()) {
log.debug(queryStr); log.debug(queryStr);
} }
ResultSet results = null; ResultSet results = null;
try { try {
InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON); InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON);
results = ResultSetFactory.fromJSON(in); results = ResultSetFactory.fromJSON(in);
} catch (RDFServiceException e) { } catch (RDFServiceException e) {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
if (!results.hasNext()) { if (!results.hasNext()) {
done = true; done = true;
} }
while (results.hasNext()) { while (results.hasNext()) {
QuerySolution solution = results.next(); QuerySolution solution = results.next();
Resource resource = solution.getResource("s"); Resource resource = solution.getResource("s");
@ -365,185 +398,85 @@ public class ABoxRecomputer {
individuals.add(resource.getURI()); individuals.add(resource.getURI());
} }
} }
if(log.isDebugEnabled()) { if(log.isDebugEnabled()) {
log.info(individuals.size() + " in set"); log.debug(individuals.size() + " in set");
} }
offset += batchSize; offset += batchSize;
} }
} }
protected void addedABoxTypeAssertion(Resource individual, Model inferenceModel,
HashSet<String> unknownTypes) {
addedABoxTypeAssertion(individual, inferenceModel, unknownTypes, true);
}
protected void addedABoxTypeAssertion(Resource individual, Model inferenceModel,
HashSet<String> unknownTypes, boolean checkRedundancy) {
StmtIterator iter = null;
aboxModel.enterCriticalSection(Lock.READ);
try {
iter = aboxModel.listStatements(individual, RDF.type, (RDFNode) null);
while (iter.hasNext()) {
Statement stmt = iter.next();
simpleReasoner.addedABoxTypeAssertion(
stmt, inferenceModel, unknownTypes, checkRedundancy);
}
} finally {
if (iter != null) {
iter.close();
}
aboxModel.leaveCriticalSection();
}
}
/* /*
* reconcile a set of inferences into the application inference model * reconcile a set of inferences into the application inference model
*/ */
protected boolean updateInferenceModel(Model inferenceRebuildModel, protected void updateInferenceModel(Model rebuildModel,
Collection<String> individuals) { Collection<String> individuals) throws RDFServiceException {
Model inferenceModel = RDFServiceGraph.createRDFServiceModel(
log.info("Updating ABox inference model"); new RDFServiceGraph(rdfService, ModelNames.ABOX_INFERENCES));
// Remove everything from the current inference model that is not
// in the recomputed inference model
int num = 0;
scratchpadModel.enterCriticalSection(Lock.WRITE);
scratchpadModel.removeAll();
Model rebuild = ModelFactory.createDefaultModel();
Model existing = ModelFactory.createDefaultModel(); Model existing = ModelFactory.createDefaultModel();
long start = System.currentTimeMillis();
for (String individualURI : individuals) { for (String individualURI : individuals) {
rebuild.removeAll();
existing.removeAll();
Resource subjInd = ResourceFactory.createResource(individualURI); Resource subjInd = ResourceFactory.createResource(individualURI);
inferenceModel.enterCriticalSection(Lock.READ);
try {
existing.add(inferenceModel.listStatements(subjInd, null, (RDFNode) null)); existing.add(inferenceModel.listStatements(subjInd, null, (RDFNode) null));
} finally {
inferenceModel.leaveCriticalSection();
} }
inferenceRebuildModel.enterCriticalSection(Lock.READ); Model retractions = existing.difference(rebuildModel);
try { Model additions = rebuildModel.difference(existing);
rebuild.add(inferenceRebuildModel.listStatements(subjInd, null, (RDFNode) null));
} finally {
inferenceRebuildModel.leaveCriticalSection();
}
Model retractions = existing.difference(rebuild);
Model additions = rebuild.difference(existing);
inferenceModel.enterCriticalSection(Lock.WRITE); inferenceModel.enterCriticalSection(Lock.WRITE);
try { try {
inferenceModel.remove(retractions); long start = System.currentTimeMillis();
inferenceModel.add(additions); ChangeSet change = rdfService.manufactureChangeSet();
change.addRemoval(makeN3InputStream(retractions),
RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES);
change.addAddition(makeN3InputStream(additions),
RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES);
rdfService.changeSetUpdate(change);
log.debug((System.currentTimeMillis() - start) +
" ms to retract " + retractions.size() +
" statements and add " + additions.size() + " statements");
} finally { } finally {
inferenceModel.leaveCriticalSection(); inferenceModel.leaveCriticalSection();
} }
inferenceRebuildModel.enterCriticalSection(Lock.WRITE);
try {
inferenceRebuildModel.remove(rebuild);
} finally {
inferenceRebuildModel.leaveCriticalSection();
} }
num++; private InputStream makeN3InputStream(Model m) {
if ((num % 1000) == 0) { ByteArrayOutputStream out = new ByteArrayOutputStream();
log.info("Still updating ABox inference model (" + m.write(out, "N3");
+ num + "/" + individuals.size() + " individuals)"); return new ByteArrayInputStream(out.toByteArray());
log.info((System.currentTimeMillis() - start) / 1000 + " ms per individual");
start = System.currentTimeMillis();
} }
if (stopRequested) { private Set<String> getSameAsIndividuals(String individualURI) {
return true; HashSet<String> sameAsInds = new HashSet<String>();
sameAsInds.add(individualURI);
getSameAsIndividuals(individualURI, sameAsInds);
sameAsInds.remove(individualURI);
return sameAsInds;
} }
} private void getSameAsIndividuals(String individualURI, Set<String> sameAsInds) {
Model m = RDFServiceGraph.createRDFServiceModel(new RDFServiceGraph(rdfService));
log.info("ABox inference model updated"); Resource individual = ResourceFactory.createResource(individualURI);
return false; StmtIterator sit = m.listStatements(individual, OWL.sameAs, (RDFNode) null);
} while(sit.hasNext()) {
Statement stmt = sit.nextStatement();
private Iterator<Statement> listModelStatements(Model model, String graphURI) { if (stmt.getObject().isURIResource()) {
// the RDFServices supplied by the unit tests won't have the right String sameAsURI = stmt.getObject().asResource().getURI();
// named graphs. So if the graphURI-based chunked iterator is empty, if (!sameAsInds.contains(sameAsURI)) {
// we'll try listStatements() on the model instead. sameAsInds.add(sameAsURI);
Iterator<Statement> it = new ChunkedStatementIterator(graphURI); getSameAsIndividuals(sameAsURI, sameAsInds);
if (it.hasNext()) {
return it;
} else {
return model.listStatements();
}
}
// avoids OutOfMemory errors by retrieving triples in batches
private class ChunkedStatementIterator implements Iterator<Statement> {
final int CHUNK_SIZE = 50000;
private int offset = 0;
private String queryString;
private Model temp = ModelFactory.createDefaultModel();
private StmtIterator tempIt;
public ChunkedStatementIterator(String graphURI) {
this.queryString = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <" +
graphURI + "> { ?s ?p ?o } }";
}
public Statement next() {
if (tempIt.hasNext()) {
return tempIt.nextStatement();
} else {
return null;
}
}
public void remove() {
throw new UnsupportedOperationException(this.getClass().getName() +
" does not support .remove()");
}
public boolean hasNext() {
if (tempIt != null && tempIt.hasNext()) {
return true;
} else {
getNextChunk();
if (temp.size() > 0) {
tempIt = temp.listStatements();
return true;
} else {
return false;
} }
} }
} }
sit = m.listStatements(null, OWL.sameAs, individual);
private void getNextChunk() { while(sit.hasNext()) {
Statement stmt = sit.nextStatement();
String chunkQueryString = queryString + " LIMIT " + CHUNK_SIZE + " OFFSET " + offset; if (stmt.getSubject().isURIResource()) {
offset += CHUNK_SIZE; String sameAsURI = stmt.getSubject().asResource().getURI();
if (!sameAsInds.contains(sameAsURI)) {
try { sameAsInds.add(sameAsURI);
InputStream in = rdfService.sparqlConstructQuery( getSameAsIndividuals(sameAsURI, sameAsInds);
chunkQueryString, RDFService.ModelSerializationFormat.NTRIPLE); }
temp.removeAll();
temp.add(RDFServiceUtils.parseModel(
in, RDFService.ModelSerializationFormat.NTRIPLE));
} catch (RDFServiceException e) {
throw new RuntimeException(e);
} }
} }
} }
/** /**
* This is called when the application shuts down. * This is called when the application shuts down.

View file

@ -18,6 +18,8 @@ import com.hp.hpl.jena.ontology.AnnotationProperty;
import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntProperty; import com.hp.hpl.jena.ontology.OntProperty;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.DatasetFactory;
import com.hp.hpl.jena.rdf.listeners.StatementListener; import com.hp.hpl.jena.rdf.listeners.StatementListener;
import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Model;
@ -40,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.CumulativeDeltaModeler;
import edu.cornell.mannlib.vitro.webapp.dao.jena.DifferenceGraph; import edu.cornell.mannlib.vitro.webapp.dao.jena.DifferenceGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph; import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.event.BulkUpdateEvent; import edu.cornell.mannlib.vitro.webapp.dao.jena.event.BulkUpdateEvent;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.adapters.VitroModelFactory; import edu.cornell.mannlib.vitro.webapp.rdfservice.adapters.VitroModelFactory;
@ -114,7 +117,7 @@ public class SimpleReasoner extends StatementListener {
this.batchMode = 0; this.batchMode = 0;
aBoxDeltaModeler1 = new CumulativeDeltaModeler(); aBoxDeltaModeler1 = new CumulativeDeltaModeler();
aBoxDeltaModeler2 = new CumulativeDeltaModeler(); aBoxDeltaModeler2 = new CumulativeDeltaModeler();
recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,inferenceRebuildModel,scratchpadModel,rdfService,this); recomputer = new ABoxRecomputer(tboxModel, aboxModel, rdfService, this);
stopRequested = false; stopRequested = false;
if (rdfService == null) { if (rdfService == null) {
@ -146,7 +149,13 @@ public class SimpleReasoner extends StatementListener {
aBoxDeltaModeler2 = new CumulativeDeltaModeler(); aBoxDeltaModeler2 = new CumulativeDeltaModeler();
this.batchMode = 0; this.batchMode = 0;
stopRequested = false; stopRequested = false;
recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,ModelFactory.createDefaultModel(), ModelFactory.createDefaultModel(), new RDFServiceModel(aboxModel), this); Dataset ds = DatasetFactory.createMem();
ds.addNamedModel(ModelNames.ABOX_ASSERTIONS, aboxModel);
ds.addNamedModel(ModelNames.ABOX_INFERENCES, inferenceModel);
ds.addNamedModel(ModelNames.TBOX_ASSERTIONS, tboxModel);
ds.setDefaultModel(ModelFactory.createUnion(fullModel, tboxModel));
recomputer = new ABoxRecomputer(tboxModel, aboxModel, new RDFServiceModel(ds), this);
} }
public void setPluginList(List<ReasonerPlugin> pluginList) { public void setPluginList(List<ReasonerPlugin> pluginList) {
@ -805,16 +814,7 @@ public class SimpleReasoner extends StatementListener {
Resource subject = sameIter.next(); Resource subject = sameIter.next();
Statement sameStmt = Statement sameStmt =
ResourceFactory.createStatement(subject,stmt.getPredicate(),stmt.getObject()); ResourceFactory.createStatement(subject,stmt.getPredicate(),stmt.getObject());
addInference(sameStmt,inferenceModel,false); addInference(sameStmt,inferenceModel, doSameAs);
}
inferenceModel.enterCriticalSection(Lock.WRITE);
try {
if (inferenceModel.contains(stmt)) {
inferenceModel.remove(stmt);
}
} finally {
inferenceModel.leaveCriticalSection();
} }
} }
@ -842,8 +842,19 @@ public class SimpleReasoner extends StatementListener {
} }
} }
inferenceModel.enterCriticalSection(Lock.WRITE);
try {
if (inferenceModel.contains(stmt)) {
inferenceModel.remove(stmt);
}
} finally {
inferenceModel.leaveCriticalSection();
}
if(doSameAs) {
doSameAsForAddedABoxAssertion( stmt, inferenceModel); doSameAsForAddedABoxAssertion( stmt, inferenceModel);
} }
}
void doSameAsForRemovedABoxAssertion(Statement stmt, Model inferenceModel){ void doSameAsForRemovedABoxAssertion(Statement stmt, Model inferenceModel){
List<Resource> sameIndividuals = List<Resource> sameIndividuals =

View file

@ -21,6 +21,7 @@ import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Dataset; import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames;
@ -56,7 +57,15 @@ public class SimpleReasonerSetup implements ServletContextListener {
Dataset dataset = ModelAccess.on(ctx).getDataset(); Dataset dataset = ModelAccess.on(ctx).getDataset();
Model rebuildModel = dataset.getNamedModel(JENA_INF_MODEL_REBUILD); Model rebuildModel = dataset.getNamedModel(JENA_INF_MODEL_REBUILD);
if(rebuildModel.contains(null, null, (RDFNode) null)) {
log.info("Clearing obsolete data from inference rebuild model");
rebuildModel.removeAll();
}
Model scratchModel = dataset.getNamedModel(JENA_INF_MODEL_SCRATCHPAD); Model scratchModel = dataset.getNamedModel(JENA_INF_MODEL_SCRATCHPAD);
if(scratchModel.contains(null, null, (RDFNode) null)) {
log.info("Clearing obsolete data from inference scratchpad model");
scratchModel.removeAll();
}
Model inferenceModel = dataset.getNamedModel(ABOX_INFERENCES); Model inferenceModel = dataset.getNamedModel(ABOX_INFERENCES);
// the simple reasoner will register itself as a listener to the ABox assertions // the simple reasoner will register itself as a listener to the ABox assertions