Merge branch 'develop' into feature/searchIndexerTry

This commit is contained in:
Jim Blake 2015-01-09 16:41:36 -05:00
commit 99c03ce49a
5 changed files with 505 additions and 529 deletions

View file

@ -27,6 +27,7 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService {
private final static Log log = LogFactory.getLog(RDFServiceModel.class);
private Model model;
private Dataset dataset;
private String modelName;
/**
@ -37,13 +38,26 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService {
this.model = model;
}
/**
* Create an RDFService to access a Jena Dataset
* @param dataset
*/
public RDFServiceModel(Dataset dataset) {
this.dataset = dataset;
}
@Override
protected DatasetWrapper getDatasetWrapper() {
Dataset d = DatasetFactory.createMem();
if (modelName == null) {
d.setDefaultModel(this.model);
Dataset d = null;
if (dataset != null) {
d = dataset;
} else {
d.addNamedModel(this.modelName, model);
d = DatasetFactory.createMem();
if (modelName == null) {
d.setDefaultModel(this.model);
} else {
d.addNamedModel(this.modelName, model);
}
}
DatasetWrapper datasetWrapper = new DatasetWrapper(d);
return datasetWrapper;
@ -75,7 +89,16 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService {
modelChange.setSerializedModel(new ByteArrayInputStream(bytes));
}
modelChange.getSerializedModel().mark(Integer.MAX_VALUE);
operateOnModel(model, modelChange, null);
Model m = this.model;
if (m == null && dataset != null) {
String changeGraphURI = modelChange.getGraphURI();
if (changeGraphURI != null) {
m = dataset.getNamedModel(changeGraphURI);
} else {
m = dataset.getDefaultModel();
}
}
operateOnModel(m, modelChange, null);
}
// notify listeners of triple changes

View file

@ -595,13 +595,13 @@ public class RDFServiceSparql extends RDFServiceImpl implements RDFService {
private void performChange(ModelChange modelChange) throws RDFServiceException {
Model model = parseModel(modelChange);
Model[] separatedModel = separateStatementsWithBlankNodes(model);
if (modelChange.getOperation() == ModelChange.Operation.ADD) {
Model[] separatedModel = separateStatementsWithBlankNodes(model);
addModel(separatedModel[1], modelChange.getGraphURI());
addBlankNodesWithSparqlUpdate(separatedModel[0], modelChange.getGraphURI());
} else if (modelChange.getOperation() == ModelChange.Operation.REMOVE) {
deleteModel(model, modelChange.getGraphURI());
removeBlankNodesWithSparqlUpdate(model, modelChange.getGraphURI());
deleteModel(separatedModel[1], modelChange.getGraphURI());
removeBlankNodesWithSparqlUpdate(separatedModel[0], modelChange.getGraphURI());
} else {
log.error("unrecognized operation type");
}

View file

@ -2,6 +2,8 @@
package edu.cornell.mannlib.vitro.webapp.reasoner;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
@ -14,7 +16,10 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntProperty;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFactory;
@ -25,530 +30,458 @@ import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.shared.JenaException;
import com.hp.hpl.jena.shared.Lock;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.JenaDataSourceSetupBase;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.SimpleReasonerSetup;
public class ABoxRecomputer {
private static final Log log = LogFactory.getLog(ABoxRecomputer.class);
private static final Log log = LogFactory.getLog(ABoxRecomputer.class);
private OntModel tboxModel; // asserted and inferred TBox axioms
private OntModel aboxModel; // ABox assertions
private Model inferenceModel; // ABox inferences
private Model inferenceRebuildModel; // work area for recomputing all ABox inferences
private Model scratchpadModel; // work area for recomputing all ABox inferences
private RDFService rdfService;
private SimpleReasoner simpleReasoner;
private Object lock1 = new Object();
private OntModel tboxModel; // asserted and inferred TBox axioms
private OntModel aboxModel;
private RDFService rdfService;
private SimpleReasoner simpleReasoner;
private Object lock1 = new Object();
private volatile boolean recomputing = false;
private boolean stopRequested = false;
private boolean handleSameAs = false;
private volatile boolean recomputing = false;
private boolean stopRequested = false;
private final int BATCH_SIZE = 100;
private final int REPORTING_INTERVAL = 1000;
/**
* @param tboxModel - input. This model contains both asserted and inferred TBox axioms
* @param aboxModel - input. This model contains asserted ABox statements
* @param inferenceModel - output. This is the model in which inferred (materialized) ABox statements are maintained (added or retracted).
* @param inferenceRebuildModel - output. This the model is temporarily used when the whole ABox inference model is rebuilt
* @param inferenceScratchpadModel - output. This the model is temporarily used when the whole ABox inference model is rebuilt
*/
public ABoxRecomputer(OntModel tboxModel,
OntModel aboxModel,
Model inferenceModel,
Model inferenceRebuildModel,
Model scratchpadModel,
RDFService rdfService,
SimpleReasoner simpleReasoner) {
this.tboxModel = tboxModel;
/**
* @param tboxModel - input. This model contains both asserted and inferred TBox axioms
* @param aboxModel - input. This model contains asserted ABox statements
* @param inferenceModel - output. This is the model in which inferred (materialized) ABox statements are maintained (added or retracted).
*/
public ABoxRecomputer(OntModel tboxModel,
OntModel aboxModel,
RDFService rdfService,
SimpleReasoner simpleReasoner) {
this.tboxModel = tboxModel;
this.aboxModel = aboxModel;
this.inferenceModel = inferenceModel;
this.inferenceRebuildModel = inferenceRebuildModel;
this.scratchpadModel = scratchpadModel;
this.rdfService = rdfService;
this.simpleReasoner = simpleReasoner;
recomputing = false;
stopRequested = false;
}
this.rdfService = rdfService;
this.simpleReasoner = simpleReasoner;
recomputing = false;
stopRequested = false;
handleSameAs = simpleReasoner.getSameAsEnabled();
}
/**
* Returns true if the recomputer is in the process of recomputing
* all inferences.
*/
public boolean isRecomputing() {
return recomputing;
}
/**
* Returns true if the recomputer is in the process of recomputing
* all inferences.
*/
public boolean isRecomputing() {
return recomputing;
}
/**
* Recompute all inferences.
*/
public void recompute() {
synchronized (lock1) {
if (recomputing) {
return;
} else {
recomputing = true;
}
}
try {
recomputeABox();
} finally {
synchronized (lock1) {
recomputing = false;
}
}
}
// don't check for existing inferences in the rebuild model
private boolean DO_CHECK = true;
/*
* Recompute the entire ABox inference graph. The new
* inference graph is built in a separate model and
* then reconciled with the inference graph in active
* use. The model reconciliation must be done
* without reading the whole inference models into
* memory in order to support very large ABox
* inference models.
*/
protected void recomputeABox() {
// recompute class subsumption inferences
inferenceRebuildModel.enterCriticalSection(Lock.WRITE);
try {
log.info("Clearing inference rebuild model.");
HashSet<String> unknownTypes = new HashSet<String>();
inferenceRebuildModel.removeAll();
log.info("Computing class subsumption ABox inferences.");
int numStmts = 0;
Collection<String> individuals = this.getAllIndividualURIs();
log.info("Recomputing inferences for " + individuals.size() + " individuals");
long start = System.currentTimeMillis();
for (String individualURI : individuals) {
Resource individual = ResourceFactory.createResource(individualURI);
try {
addedABoxTypeAssertion(individual, inferenceRebuildModel, unknownTypes, DO_CHECK);
simpleReasoner.setMostSpecificTypes(individual, inferenceRebuildModel, unknownTypes);
List<ReasonerPlugin> pluginList = simpleReasoner.getPluginList();
if (pluginList.size() > 0) {
StmtIterator sit = aboxModel.listStatements(individual, null, (RDFNode) null);
while (sit.hasNext()) {
Statement s = sit.nextStatement();
for (ReasonerPlugin plugin : pluginList) {
plugin.addedABoxStatement(s, aboxModel, inferenceRebuildModel, tboxModel);
}
}
}
} catch (NullPointerException npe) {
log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation.");
npe.printStackTrace();
return;
} catch (JenaException je) {
if (je.getMessage().equals("Statement models must no be null")) {
log.error("Exception while recomputing ABox inference model. Halting inference computation.", je);
return;
}
log.error("Exception while recomputing ABox inference model: ", je);
} catch (Exception e) {
log.error("Exception while recomputing ABox inference model: ", e);
} catch (OutOfMemoryError e) {
log.error(individualURI + " out of memory", e);
}
numStmts++;
if ((numStmts % 1000) == 0) {
log.info("Still computing class subsumption ABox inferences ("
+ numStmts + "/" + individuals.size() + " individuals)");
log.info((System.currentTimeMillis() - start) / 1000 + " ms per individual");
start = System.currentTimeMillis();
}
if (stopRequested) {
log.info("a stopRequested signal was received during recomputeABox. Halting Processing.");
return;
}
}
log.info("Finished computing class subsumption ABox inferences");
log.info("Computing inverse property ABox inferences");
Iterator<Statement> invStatements = null;
tboxModel.enterCriticalSection(Lock.READ);
try {
invStatements = tboxModel.listStatements((Resource) null, OWL.inverseOf, (Resource) null);
} finally {
tboxModel.leaveCriticalSection();
}
numStmts = 0;
while (invStatements.hasNext()) {
Statement stmt = invStatements.next();
try {
OntProperty prop1 = tboxModel.getOntProperty((stmt.getSubject()).getURI());
if (prop1 == null) {
//TODO make sure not to print out a million of these for the same property
log.debug("didn't find subject property in the tbox: " + (stmt.getSubject()).getURI());
continue;
}
OntProperty prop2 = tboxModel.getOntProperty(((Resource)stmt.getObject()).getURI());
if (prop2 == null) {
//TODO make sure not to print out a million of these for the same property
log.debug("didn't find object property in the tbox: " + ((Resource)stmt.getObject()).getURI());
continue;
}
simpleReasoner.addedInverseProperty(prop1, prop2, inferenceRebuildModel);
} catch (NullPointerException npe) {
log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation.");
npe.printStackTrace();
return;
} catch (JenaException je) {
if (je.getMessage().equals("Statement models must no be null")) {
log.error("Exception while recomputing ABox inference model. Halting inference computation.", je);
return;
}
log.error("Exception while recomputing ABox inference model: ", je);
} catch (Exception e) {
log.error("Exception while recomputing ABox inference model: ", e);
}
numStmts++;
if ((numStmts % 10000) == 0) {
log.info("Still computing inverse property ABox inferences...");
}
if (stopRequested) {
log.info("a stopRequested signal was received during recomputeABox. Halting Processing.");
return;
}
}
log.info("Finished computing inverse property ABox inferences");
log.info("Computing sameAs ABox inferences");
Iterator<Statement> sameAsStatements = null;
aboxModel.enterCriticalSection(Lock.READ);
try {
sameAsStatements = aboxModel.listStatements((Resource) null, OWL.sameAs, (Resource) null);
} finally {
aboxModel.leaveCriticalSection();
}
numStmts = 0;
while (sameAsStatements.hasNext()) {
Statement stmt = sameAsStatements.next();
try {
simpleReasoner.addedABoxSameAsAssertion(stmt, inferenceRebuildModel);
} catch (NullPointerException npe) {
log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation.");
npe.printStackTrace();
return;
} catch (JenaException je) {
if (je.getMessage().equals("Statement models must no be null")) {
log.error("Exception while recomputing ABox inference model. Halting inference computation.", je);
return;
}
log.error("Exception while recomputing ABox inference model: ", je);
} catch (Exception e) {
log.error("Exception while recomputing ABox inference model: ", e);
}
numStmts++;
if ((numStmts % 10000) == 0) {
log.info("Still computing sameAs ABox inferences...");
}
if (stopRequested) {
log.info("a stopRequested signal was received during recomputeABox. Halting Processing.");
return;
}
}
log.info("Finished computing sameAs ABox inferences");
try {
if (updateInferenceModel(inferenceRebuildModel, individuals)) {
log.info("a stopRequested signal was received during updateInferenceModel. Halting Processing.");
return;
}
} catch (Exception e) {
log.error("Exception while reconciling the current and recomputed ABox inference model for class subsumption inferences. Halting processing." , e);
}
} catch (Exception e) {
e.printStackTrace();
log.error("Exception while recomputing ABox inferences. Halting processing.", e);
} finally {
inferenceRebuildModel.removeAll();
inferenceRebuildModel.leaveCriticalSection();
}
}
/*
* Get the URIs for all individuals in the system
*/
protected Collection<String> getAllIndividualURIs() {
HashSet<String> individualURIs = new HashSet<String>();
List<String> classList = new ArrayList<String>();
tboxModel.enterCriticalSection(Lock.READ);
try {
StmtIterator classIt = tboxModel.listStatements(
(Resource) null, RDF.type, OWL.Class);
while(classIt.hasNext()) {
Statement stmt = classIt.nextStatement();
if(stmt.getSubject().isURIResource()
&& stmt.getSubject().getURI() != null
&& !stmt.getSubject().getURI().isEmpty()) {
classList.add(stmt.getSubject().getURI());
}
}
} finally {
tboxModel.leaveCriticalSection();
}
for (String classURI : classList) {
String queryString = "SELECT ?s WHERE { ?s a <" + classURI + "> } ";
getIndividualURIs(queryString, individualURIs);
}
return individualURIs;
}
protected void getIndividualURIs(String queryString, Set<String> individuals) {
int batchSize = 50000;
int offset = 0;
boolean done = false;
while (!done) {
String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset;
if(log.isDebugEnabled()) {
log.debug(queryStr);
}
ResultSet results = null;
try {
InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON);
results = ResultSetFactory.fromJSON(in);
} catch (RDFServiceException e) {
throw new RuntimeException(e);
}
if (!results.hasNext()) {
done = true;
}
while (results.hasNext()) {
QuerySolution solution = results.next();
Resource resource = solution.getResource("s");
if ((resource != null) && !resource.isAnon()) {
individuals.add(resource.getURI());
}
}
if(log.isDebugEnabled()) {
log.info(individuals.size() + " in set");
}
offset += batchSize;
}
}
protected void addedABoxTypeAssertion(Resource individual, Model inferenceModel,
HashSet<String> unknownTypes) {
addedABoxTypeAssertion(individual, inferenceModel, unknownTypes, true);
}
protected void addedABoxTypeAssertion(Resource individual, Model inferenceModel,
HashSet<String> unknownTypes, boolean checkRedundancy) {
StmtIterator iter = null;
aboxModel.enterCriticalSection(Lock.READ);
try {
iter = aboxModel.listStatements(individual, RDF.type, (RDFNode) null);
while (iter.hasNext()) {
Statement stmt = iter.next();
simpleReasoner.addedABoxTypeAssertion(
stmt, inferenceModel, unknownTypes, checkRedundancy);
}
} finally {
if (iter != null) {
iter.close();
}
aboxModel.leaveCriticalSection();
}
}
/*
* reconcile a set of inferences into the application inference model
*/
protected boolean updateInferenceModel(Model inferenceRebuildModel,
Collection<String> individuals) {
log.info("Updating ABox inference model");
// Remove everything from the current inference model that is not
// in the recomputed inference model
int num = 0;
scratchpadModel.enterCriticalSection(Lock.WRITE);
scratchpadModel.removeAll();
Model rebuild = ModelFactory.createDefaultModel();
Model existing = ModelFactory.createDefaultModel();
long start = System.currentTimeMillis();
for (String individualURI : individuals) {
rebuild.removeAll();
existing.removeAll();
Resource subjInd = ResourceFactory.createResource(individualURI);
inferenceModel.enterCriticalSection(Lock.READ);
try {
existing.add(inferenceModel.listStatements(subjInd, null, (RDFNode) null));
} finally {
inferenceModel.leaveCriticalSection();
}
inferenceRebuildModel.enterCriticalSection(Lock.READ);
try {
rebuild.add(inferenceRebuildModel.listStatements(subjInd, null, (RDFNode) null));
} finally {
inferenceRebuildModel.leaveCriticalSection();
/**
* Recompute all inferences.
*/
public void recompute() {
synchronized (lock1) {
if (recomputing) {
return;
} else {
recomputing = true;
}
Model retractions = existing.difference(rebuild);
Model additions = rebuild.difference(existing);
inferenceModel.enterCriticalSection(Lock.WRITE);
try {
inferenceModel.remove(retractions);
inferenceModel.add(additions);
} finally {
inferenceModel.leaveCriticalSection();
}
inferenceRebuildModel.enterCriticalSection(Lock.WRITE);
try {
inferenceRebuildModel.remove(rebuild);
} finally {
inferenceRebuildModel.leaveCriticalSection();
}
try {
recomputeABox();
} finally {
synchronized (lock1) {
recomputing = false;
}
}
}
num++;
if ((num % 1000) == 0) {
log.info("Still updating ABox inference model (" +
+ num + "/" + individuals.size() + " individuals)");
log.info((System.currentTimeMillis() - start) / 1000 + " ms per individual");
start = System.currentTimeMillis();
}
if (stopRequested) {
return true;
}
}
log.info("ABox inference model updated");
return false;
}
private Iterator<Statement> listModelStatements(Model model, String graphURI) {
// the RDFServices supplied by the unit tests won't have the right
// named graphs. So if the graphURI-based chunked iterator is empty,
// we'll try listStatements() on the model instead.
Iterator<Statement> it = new ChunkedStatementIterator(graphURI);
if (it.hasNext()) {
return it;
} else {
return model.listStatements();
}
}
// avoids OutOfMemory errors by retrieving triples in batches
private class ChunkedStatementIterator implements Iterator<Statement> {
final int CHUNK_SIZE = 50000;
private int offset = 0;
private String queryString;
private Model temp = ModelFactory.createDefaultModel();
private StmtIterator tempIt;
public ChunkedStatementIterator(String graphURI) {
this.queryString = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <" +
graphURI + "> { ?s ?p ?o } }";
}
public Statement next() {
if (tempIt.hasNext()) {
return tempIt.nextStatement();
} else {
return null;
}
}
public void remove() {
throw new UnsupportedOperationException(this.getClass().getName() +
" does not support .remove()");
}
public boolean hasNext() {
if (tempIt != null && tempIt.hasNext()) {
return true;
} else {
getNextChunk();
if (temp.size() > 0) {
tempIt = temp.listStatements();
return true;
} else {
return false;
}
}
}
private void getNextChunk() {
String chunkQueryString = queryString + " LIMIT " + CHUNK_SIZE + " OFFSET " + offset;
offset += CHUNK_SIZE;
/*
* Recompute the entire ABox inference graph.
*/
protected void recomputeABox() {
log.info("Recomputing ABox inferences.");
log.info("Finding individuals in ABox.");
Collection<String> individuals = this.getAllIndividualURIs();
log.info("Recomputing inferences for " + individuals.size() + " individuals");
long start = System.currentTimeMillis();
int numInds = 0;
Model rebuildModel = ModelFactory.createDefaultModel();
Model additionalInferences = ModelFactory.createDefaultModel();
List<String> individualsInBatch = new ArrayList<String>();
Iterator<String> individualIt = individuals.iterator();
while (individualIt.hasNext()) {
String individualURI = individualIt.next();
try {
InputStream in = rdfService.sparqlConstructQuery(
chunkQueryString, RDFService.ModelSerializationFormat.NTRIPLE);
temp.removeAll();
temp.add(RDFServiceUtils.parseModel(
in, RDFService.ModelSerializationFormat.NTRIPLE));
additionalInferences.add(recomputeIndividual(
individualURI, rebuildModel));
numInds++;
individualsInBatch.add(individualURI);
boolean batchFilled = (numInds % BATCH_SIZE) == 0;
boolean reportingInterval = (numInds % REPORTING_INTERVAL) == 0;
if (batchFilled || !individualIt.hasNext()) {
log.debug(rebuildModel.size() + " total inferences");
updateInferenceModel(rebuildModel, individualsInBatch);
rebuildModel.removeAll();
individualsInBatch.clear();
}
if (reportingInterval) {
log.info("Still recomputing inferences ("
+ numInds + "/" + individuals.size() + " individuals)");
log.info((System.currentTimeMillis() - start) / numInds + " ms per individual");
}
if (stopRequested) {
log.info("a stopRequested signal was received during recomputeABox. Halting Processing.");
return;
}
} catch (Exception e) {
log.error("Error recomputing inferences for individual <" + individualURI + ">", e);
}
}
if(additionalInferences.size() > 0) {
log.info("Writing additional inferences generated by reasoner plugins.");
ChangeSet change = rdfService.manufactureChangeSet();
change.addAddition(makeN3InputStream(additionalInferences),
RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES);
try {
rdfService.changeSetUpdate(change);
} catch (RDFServiceException e) {
log.error("Unable to write additional inferences from reasoner plugins", e);
}
}
log.info("Finished recomputing inferences");
}
private static final boolean RUN_PLUGINS = true;
private static final boolean SKIP_PLUGINS = !RUN_PLUGINS;
private Model recomputeIndividual(String individualURI,
Model rebuildModel) throws RDFServiceException {
long start = System.currentTimeMillis();
Model assertions = getAssertions(individualURI);
log.trace((System.currentTimeMillis() - start) + " ms to get assertions.");
Model additionalInferences = recomputeIndividual(
individualURI, null, assertions, rebuildModel, RUN_PLUGINS);
if (handleSameAs) {
Set<String> sameAsInds = getSameAsIndividuals(individualURI);
for (String sameAsInd : sameAsInds) {
// sameAs for plugins is handled by the SimpleReasoner
Model sameAsIndAssertions = getAssertions(sameAsInd);
recomputeIndividual(
sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, SKIP_PLUGINS);
rebuildModel.add(
rewriteInferences(getAssertions(sameAsInd), individualURI));
Resource indRes = ResourceFactory.createResource(individualURI);
Resource sameAsIndRes = ResourceFactory.createResource(sameAsInd);
if(!assertions.contains(indRes, OWL.sameAs, sameAsIndRes)) {
rebuildModel.add(indRes, OWL.sameAs, sameAsIndRes);
}
}
}
return additionalInferences;
}
/**
* Adds inferences to temporary rebuildmodel
* @param individualURI
* @param rebuildModel
* @return any additional inferences produced by plugins that affect other
* individuals
*/
private Model recomputeIndividual(String individualURI, String aliasURI,
Model assertions, Model rebuildModel, boolean runPlugins)
throws RDFServiceException {
Model additionalInferences = ModelFactory.createDefaultModel();
Resource individual = ResourceFactory.createResource(individualURI);
long start = System.currentTimeMillis();
Model types = ModelFactory.createDefaultModel();
types.add(assertions.listStatements(null, RDF.type, (RDFNode) null));
Model inferredTypes = rewriteInferences(getInferredTypes(individual, types), aliasURI);
rebuildModel.add(inferredTypes);
log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types");
start = System.currentTimeMillis();
types.add(inferredTypes);
Model mst = getMostSpecificTypes(individual, types);
rebuildModel.add(rewriteInferences(mst, aliasURI));
log.trace((System.currentTimeMillis() - start) + " to infer " + mst.size() + " mostSpecificTypes");
start = System.currentTimeMillis();
Model inferredInvs = getInferredInverseStatements(individualURI);
inferredInvs.remove(assertions);
rebuildModel.add(rewriteInferences(inferredInvs, aliasURI));
log.trace((System.currentTimeMillis() - start) + " to infer " + inferredInvs.size() + " inverses");
List<ReasonerPlugin> pluginList = simpleReasoner.getPluginList();
if (runPlugins && pluginList.size() > 0) {
Model tmpModel = ModelFactory.createDefaultModel();
StmtIterator sit = assertions.listStatements();
while (sit.hasNext()) {
Statement s = sit.nextStatement();
for (ReasonerPlugin plugin : pluginList) {
plugin.addedABoxStatement(s, aboxModel, tmpModel, tboxModel);
}
}
StmtIterator tmpIt = tmpModel.listStatements();
while(tmpIt.hasNext()) {
Statement tmpStmt = tmpIt.nextStatement();
if(individual.equals(tmpStmt.getSubject())) {
rebuildModel.add(tmpStmt);
} else {
additionalInferences.add(tmpStmt);
}
}
}
return additionalInferences;
}
private Model getAssertions(String individualURI) throws RDFServiceException {
String queryStr = "CONSTRUCT { \n" +
" <" + individualURI + "> ?p ?value \n" +
"} WHERE { \n" +
" GRAPH ?g { \n" +
" <" + individualURI + "> ?p ?value \n" +
" } \n" +
" FILTER (?g != <" + ModelNames.ABOX_INFERENCES + ">)\n" +
"} \n";
return RDFServiceUtils.parseModel(
rdfService.sparqlConstructQuery(
queryStr, RDFService.ModelSerializationFormat.N3)
, RDFService.ModelSerializationFormat.N3);
}
private Model getInferredTypes(Resource individual, Model assertedTypes) {
String queryStr = "CONSTRUCT { \n" +
" <" + individual.getURI() + "> a ?type \n" +
"} WHERE { \n" +
" <" + individual.getURI() + "> a ?assertedType .\n" +
" { ?assertedType <" + RDFS.subClassOf.getURI() + "> ?type } \n" +
" UNION \n" +
" { ?assertedType <" + OWL.equivalentClass.getURI() + "> ?type } \n" +
" FILTER (isURI(?type)) \n" +
" FILTER NOT EXISTS { \n" +
" <" + individual.getURI() + "> a ?type \n" +
" } \n" +
"} \n";
Model union = ModelFactory.createUnion(assertedTypes, tboxModel);
tboxModel.enterCriticalSection(Lock.READ);
try {
Query q = QueryFactory.create(queryStr);
QueryExecution qe = QueryExecutionFactory.create(q, union);
return qe.execConstruct();
} finally {
tboxModel.leaveCriticalSection();
}
}
private Model getMostSpecificTypes(Resource individual, Model assertedTypes) {
String queryStr = "CONSTRUCT { \n" +
" <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" +
"} WHERE { \n" +
" <" + individual.getURI() + "> a ?type .\n" +
" FILTER (isURI(?type)) \n" +
" FILTER NOT EXISTS { \n" +
" <" + individual.getURI() + "> a ?type2 . \n" +
" ?type2 <" + RDFS.subClassOf.getURI() + "> ?type. \n" +
" } \n" +
" FILTER NOT EXISTS { \n" +
" <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" +
" } \n" +
"} \n";
Model union = ModelFactory.createUnion(assertedTypes, tboxModel);
tboxModel.enterCriticalSection(Lock.READ);
try {
Query q = QueryFactory.create(queryStr);
QueryExecution qe = QueryExecutionFactory.create(q, union);
return qe.execConstruct();
} finally {
tboxModel.leaveCriticalSection();
}
}
private Model getInferredInverseStatements(String individualURI) throws RDFServiceException {
String queryStr = "CONSTRUCT { \n" +
" <" + individualURI + "> ?inv ?value \n" +
"} WHERE { \n" +
" GRAPH ?gr { \n" +
" ?value ?prop <" + individualURI + "> \n" +
" } \n" +
" FILTER (isURI(?value)) \n" +
" FILTER (?gr != <" + ModelNames.ABOX_INFERENCES + ">) \n" +
" { ?prop <" + OWL.inverseOf.getURI() + "> ?inv } \n" +
" UNION \n" +
" { ?inv <" + OWL.inverseOf.getURI() + "> ?prop } \n" +
"} \n";
return RDFServiceUtils.parseModel(
rdfService.sparqlConstructQuery(
queryStr, RDFService.ModelSerializationFormat.N3)
, RDFService.ModelSerializationFormat.N3);
}
private Model rewriteInferences(Model inferences, String aliasURI) {
if (aliasURI == null) {
return inferences;
}
Model rewrite = ModelFactory.createDefaultModel();
Resource alias = ResourceFactory.createResource(aliasURI);
StmtIterator sit = inferences.listStatements();
while(sit.hasNext()) {
Statement stmt = sit.nextStatement();
rewrite.add(alias, stmt.getPredicate(), stmt.getObject());
}
return rewrite;
}
/*
* Get the URIs for all individuals in the system
*/
protected Collection<String> getAllIndividualURIs() {
HashSet<String> individualURIs = new HashSet<String>();
List<String> classList = new ArrayList<String>();
tboxModel.enterCriticalSection(Lock.READ);
try {
StmtIterator classIt = tboxModel.listStatements(
(Resource) null, RDF.type, OWL.Class);
while(classIt.hasNext()) {
Statement stmt = classIt.nextStatement();
if(stmt.getSubject().isURIResource()
&& stmt.getSubject().getURI() != null
&& !stmt.getSubject().getURI().isEmpty()) {
classList.add(stmt.getSubject().getURI());
}
}
} finally {
tboxModel.leaveCriticalSection();
}
for (String classURI : classList) {
String queryString = "SELECT ?s WHERE { ?s a <" + classURI + "> } ";
getIndividualURIs(queryString, individualURIs);
}
return individualURIs;
}
protected void getIndividualURIs(String queryString, Set<String> individuals) {
int batchSize = 50000;
int offset = 0;
boolean done = false;
while (!done) {
String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset;
if(log.isDebugEnabled()) {
log.debug(queryStr);
}
ResultSet results = null;
try {
InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON);
results = ResultSetFactory.fromJSON(in);
} catch (RDFServiceException e) {
throw new RuntimeException(e);
}
}
if (!results.hasNext()) {
done = true;
}
while (results.hasNext()) {
QuerySolution solution = results.next();
Resource resource = solution.getResource("s");
}
if ((resource != null) && !resource.isAnon()) {
individuals.add(resource.getURI());
}
}
if(log.isDebugEnabled()) {
log.debug(individuals.size() + " in set");
}
offset += batchSize;
}
}
/**
* This is called when the application shuts down.
*/
public void setStopRequested() {
this.stopRequested = true;
}
/*
* reconcile a set of inferences into the application inference model
*/
protected void updateInferenceModel(Model rebuildModel,
Collection<String> individuals) throws RDFServiceException {
Model inferenceModel = RDFServiceGraph.createRDFServiceModel(
new RDFServiceGraph(rdfService, ModelNames.ABOX_INFERENCES));
Model existing = ModelFactory.createDefaultModel();
for (String individualURI : individuals) {
Resource subjInd = ResourceFactory.createResource(individualURI);
existing.add(inferenceModel.listStatements(subjInd, null, (RDFNode) null));
}
Model retractions = existing.difference(rebuildModel);
Model additions = rebuildModel.difference(existing);
inferenceModel.enterCriticalSection(Lock.WRITE);
try {
long start = System.currentTimeMillis();
ChangeSet change = rdfService.manufactureChangeSet();
change.addRemoval(makeN3InputStream(retractions),
RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES);
change.addAddition(makeN3InputStream(additions),
RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES);
rdfService.changeSetUpdate(change);
log.debug((System.currentTimeMillis() - start) +
" ms to retract " + retractions.size() +
" statements and add " + additions.size() + " statements");
} finally {
inferenceModel.leaveCriticalSection();
}
}
private InputStream makeN3InputStream(Model m) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
m.write(out, "N3");
return new ByteArrayInputStream(out.toByteArray());
}
private Set<String> getSameAsIndividuals(String individualURI) {
HashSet<String> sameAsInds = new HashSet<String>();
sameAsInds.add(individualURI);
getSameAsIndividuals(individualURI, sameAsInds);
sameAsInds.remove(individualURI);
return sameAsInds;
}
private void getSameAsIndividuals(String individualURI, Set<String> sameAsInds) {
Model m = RDFServiceGraph.createRDFServiceModel(new RDFServiceGraph(rdfService));
Resource individual = ResourceFactory.createResource(individualURI);
StmtIterator sit = m.listStatements(individual, OWL.sameAs, (RDFNode) null);
while(sit.hasNext()) {
Statement stmt = sit.nextStatement();
if (stmt.getObject().isURIResource()) {
String sameAsURI = stmt.getObject().asResource().getURI();
if (!sameAsInds.contains(sameAsURI)) {
sameAsInds.add(sameAsURI);
getSameAsIndividuals(sameAsURI, sameAsInds);
}
}
}
sit = m.listStatements(null, OWL.sameAs, individual);
while(sit.hasNext()) {
Statement stmt = sit.nextStatement();
if (stmt.getSubject().isURIResource()) {
String sameAsURI = stmt.getSubject().asResource().getURI();
if (!sameAsInds.contains(sameAsURI)) {
sameAsInds.add(sameAsURI);
getSameAsIndividuals(sameAsURI, sameAsInds);
}
}
}
}
/**
* This is called when the application shuts down.
*/
public void setStopRequested() {
this.stopRequested = true;
}
}

View file

@ -18,6 +18,8 @@ import com.hp.hpl.jena.ontology.AnnotationProperty;
import com.hp.hpl.jena.ontology.OntClass;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntProperty;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.DatasetFactory;
import com.hp.hpl.jena.rdf.listeners.StatementListener;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
@ -40,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.CumulativeDeltaModeler;
import edu.cornell.mannlib.vitro.webapp.dao.jena.DifferenceGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.event.BulkUpdateEvent;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.adapters.VitroModelFactory;
@ -114,7 +117,7 @@ public class SimpleReasoner extends StatementListener {
this.batchMode = 0;
aBoxDeltaModeler1 = new CumulativeDeltaModeler();
aBoxDeltaModeler2 = new CumulativeDeltaModeler();
recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,inferenceRebuildModel,scratchpadModel,rdfService,this);
recomputer = new ABoxRecomputer(tboxModel, aboxModel, rdfService, this);
stopRequested = false;
if (rdfService == null) {
@ -146,7 +149,13 @@ public class SimpleReasoner extends StatementListener {
aBoxDeltaModeler2 = new CumulativeDeltaModeler();
this.batchMode = 0;
stopRequested = false;
recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,ModelFactory.createDefaultModel(), ModelFactory.createDefaultModel(), new RDFServiceModel(aboxModel), this);
Dataset ds = DatasetFactory.createMem();
ds.addNamedModel(ModelNames.ABOX_ASSERTIONS, aboxModel);
ds.addNamedModel(ModelNames.ABOX_INFERENCES, inferenceModel);
ds.addNamedModel(ModelNames.TBOX_ASSERTIONS, tboxModel);
ds.setDefaultModel(ModelFactory.createUnion(fullModel, tboxModel));
recomputer = new ABoxRecomputer(tboxModel, aboxModel, new RDFServiceModel(ds), this);
}
public void setPluginList(List<ReasonerPlugin> pluginList) {
@ -805,16 +814,7 @@ public class SimpleReasoner extends StatementListener {
Resource subject = sameIter.next();
Statement sameStmt =
ResourceFactory.createStatement(subject,stmt.getPredicate(),stmt.getObject());
addInference(sameStmt,inferenceModel,false);
}
inferenceModel.enterCriticalSection(Lock.WRITE);
try {
if (inferenceModel.contains(stmt)) {
inferenceModel.remove(stmt);
}
} finally {
inferenceModel.leaveCriticalSection();
addInference(sameStmt,inferenceModel, doSameAs);
}
}
@ -838,11 +838,22 @@ public class SimpleReasoner extends StatementListener {
Property inverseProp = inverseIter.next();
Statement infStmt = ResourceFactory.createStatement(
stmt.getObject().asResource(), inverseProp, stmt.getSubject());
addInference(infStmt,inferenceModel,true);
addInference(infStmt, inferenceModel, true);
}
}
doSameAsForAddedABoxAssertion( stmt, inferenceModel);
inferenceModel.enterCriticalSection(Lock.WRITE);
try {
if (inferenceModel.contains(stmt)) {
inferenceModel.remove(stmt);
}
} finally {
inferenceModel.leaveCriticalSection();
}
if(doSameAs) {
doSameAsForAddedABoxAssertion( stmt, inferenceModel);
}
}
void doSameAsForRemovedABoxAssertion(Statement stmt, Model inferenceModel){

View file

@ -21,6 +21,7 @@ import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames;
@ -56,7 +57,15 @@ public class SimpleReasonerSetup implements ServletContextListener {
Dataset dataset = ModelAccess.on(ctx).getDataset();
Model rebuildModel = dataset.getNamedModel(JENA_INF_MODEL_REBUILD);
if(rebuildModel.contains(null, null, (RDFNode) null)) {
log.info("Clearing obsolete data from inference rebuild model");
rebuildModel.removeAll();
}
Model scratchModel = dataset.getNamedModel(JENA_INF_MODEL_SCRATCHPAD);
if(scratchModel.contains(null, null, (RDFNode) null)) {
log.info("Clearing obsolete data from inference scratchpad model");
scratchModel.removeAll();
}
Model inferenceModel = dataset.getNamedModel(ABOX_INFERENCES);
// the simple reasoner will register itself as a listener to the ABox assertions