diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/model/RDFServiceModel.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/model/RDFServiceModel.java index 8526c5972..95a0942cb 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/model/RDFServiceModel.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/model/RDFServiceModel.java @@ -27,6 +27,7 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService { private final static Log log = LogFactory.getLog(RDFServiceModel.class); private Model model; + private Dataset dataset; private String modelName; /** @@ -36,14 +37,27 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService { public RDFServiceModel(Model model) { this.model = model; } - + + /** + * Create an RDFService to access a Jena Dataset + * @param dataset + */ + public RDFServiceModel(Dataset dataset) { + this.dataset = dataset; + } + @Override protected DatasetWrapper getDatasetWrapper() { - Dataset d = DatasetFactory.createMem(); - if (modelName == null) { - d.setDefaultModel(this.model); + Dataset d = null; + if (dataset != null) { + d = dataset; } else { - d.addNamedModel(this.modelName, model); + d = DatasetFactory.createMem(); + if (modelName == null) { + d.setDefaultModel(this.model); + } else { + d.addNamedModel(this.modelName, model); + } } DatasetWrapper datasetWrapper = new DatasetWrapper(d); return datasetWrapper; @@ -75,7 +89,16 @@ public class RDFServiceModel extends RDFServiceJena implements RDFService { modelChange.setSerializedModel(new ByteArrayInputStream(bytes)); } modelChange.getSerializedModel().mark(Integer.MAX_VALUE); - operateOnModel(model, modelChange, null); + Model m = this.model; + if (m == null && dataset != null) { + String changeGraphURI = modelChange.getGraphURI(); + if (changeGraphURI != null) { + m = dataset.getNamedModel(changeGraphURI); + } else { + m = dataset.getDefaultModel(); + } + } + operateOnModel(m, modelChange, null); } // notify listeners of triple changes diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java index 1f0f14a80..1ee3c309d 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java @@ -595,13 +595,13 @@ public class RDFServiceSparql extends RDFServiceImpl implements RDFService { private void performChange(ModelChange modelChange) throws RDFServiceException { Model model = parseModel(modelChange); + Model[] separatedModel = separateStatementsWithBlankNodes(model); if (modelChange.getOperation() == ModelChange.Operation.ADD) { - Model[] separatedModel = separateStatementsWithBlankNodes(model); addModel(separatedModel[1], modelChange.getGraphURI()); addBlankNodesWithSparqlUpdate(separatedModel[0], modelChange.getGraphURI()); } else if (modelChange.getOperation() == ModelChange.Operation.REMOVE) { - deleteModel(model, modelChange.getGraphURI()); - removeBlankNodesWithSparqlUpdate(model, modelChange.getGraphURI()); + deleteModel(separatedModel[1], modelChange.getGraphURI()); + removeBlankNodesWithSparqlUpdate(separatedModel[0], modelChange.getGraphURI()); } else { log.error("unrecognized operation type"); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java index 0c7bca4f1..5a687d379 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/ABoxRecomputer.java @@ -2,6 +2,8 @@ package edu.cornell.mannlib.vitro.webapp.reasoner; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; @@ -14,7 +16,10 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.hp.hpl.jena.ontology.OntModel; -import com.hp.hpl.jena.ontology.OntProperty; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; import com.hp.hpl.jena.query.QuerySolution; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.ResultSetFactory; @@ -25,530 +30,458 @@ import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; -import com.hp.hpl.jena.shared.JenaException; import com.hp.hpl.jena.shared.Lock; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; +import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; +import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph; +import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames; +import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; -import edu.cornell.mannlib.vitro.webapp.servlet.setup.JenaDataSourceSetupBase; -import edu.cornell.mannlib.vitro.webapp.servlet.setup.SimpleReasonerSetup; public class ABoxRecomputer { - private static final Log log = LogFactory.getLog(ABoxRecomputer.class); - - private OntModel tboxModel; // asserted and inferred TBox axioms - private OntModel aboxModel; // ABox assertions - private Model inferenceModel; // ABox inferences - private Model inferenceRebuildModel; // work area for recomputing all ABox inferences - private Model scratchpadModel; // work area for recomputing all ABox inferences - private RDFService rdfService; - private SimpleReasoner simpleReasoner; - private Object lock1 = new Object(); - - private volatile boolean recomputing = false; - private boolean stopRequested = false; - - /** - * @param tboxModel - input. This model contains both asserted and inferred TBox axioms - * @param aboxModel - input. This model contains asserted ABox statements - * @param inferenceModel - output. This is the model in which inferred (materialized) ABox statements are maintained (added or retracted). - * @param inferenceRebuildModel - output. This the model is temporarily used when the whole ABox inference model is rebuilt - * @param inferenceScratchpadModel - output. This the model is temporarily used when the whole ABox inference model is rebuilt - */ - public ABoxRecomputer(OntModel tboxModel, - OntModel aboxModel, - Model inferenceModel, - Model inferenceRebuildModel, - Model scratchpadModel, - RDFService rdfService, - SimpleReasoner simpleReasoner) { - this.tboxModel = tboxModel; - this.aboxModel = aboxModel; - this.inferenceModel = inferenceModel; - this.inferenceRebuildModel = inferenceRebuildModel; - this.scratchpadModel = scratchpadModel; - this.rdfService = rdfService; - this.simpleReasoner = simpleReasoner; - recomputing = false; - stopRequested = false; - } - - /** - * Returns true if the recomputer is in the process of recomputing - * all inferences. - */ - public boolean isRecomputing() { - return recomputing; - } - - /** - * Recompute all inferences. - */ - public void recompute() { - - synchronized (lock1) { - if (recomputing) { - return; - } else { - recomputing = true; - } - } - - try { - recomputeABox(); - } finally { - synchronized (lock1) { - recomputing = false; - } - } - } + private static final Log log = LogFactory.getLog(ABoxRecomputer.class); - // don't check for existing inferences in the rebuild model - private boolean DO_CHECK = true; - - /* - * Recompute the entire ABox inference graph. The new - * inference graph is built in a separate model and - * then reconciled with the inference graph in active - * use. The model reconciliation must be done - * without reading the whole inference models into - * memory in order to support very large ABox - * inference models. - */ - protected void recomputeABox() { - - // recompute class subsumption inferences - inferenceRebuildModel.enterCriticalSection(Lock.WRITE); - try { - - log.info("Clearing inference rebuild model."); - HashSet unknownTypes = new HashSet(); - inferenceRebuildModel.removeAll(); - - log.info("Computing class subsumption ABox inferences."); - int numStmts = 0; - Collection individuals = this.getAllIndividualURIs(); - - log.info("Recomputing inferences for " + individuals.size() + " individuals"); - - long start = System.currentTimeMillis(); - - for (String individualURI : individuals) { - Resource individual = ResourceFactory.createResource(individualURI); - - try { - addedABoxTypeAssertion(individual, inferenceRebuildModel, unknownTypes, DO_CHECK); - simpleReasoner.setMostSpecificTypes(individual, inferenceRebuildModel, unknownTypes); - List pluginList = simpleReasoner.getPluginList(); - if (pluginList.size() > 0) { - StmtIterator sit = aboxModel.listStatements(individual, null, (RDFNode) null); - while (sit.hasNext()) { - Statement s = sit.nextStatement(); - for (ReasonerPlugin plugin : pluginList) { - plugin.addedABoxStatement(s, aboxModel, inferenceRebuildModel, tboxModel); - } - } - } - } catch (NullPointerException npe) { - log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation."); - npe.printStackTrace(); - return; - } catch (JenaException je) { - if (je.getMessage().equals("Statement models must no be null")) { - log.error("Exception while recomputing ABox inference model. Halting inference computation.", je); - return; - } - log.error("Exception while recomputing ABox inference model: ", je); - } catch (Exception e) { - log.error("Exception while recomputing ABox inference model: ", e); - } catch (OutOfMemoryError e) { - log.error(individualURI + " out of memory", e); - } - - numStmts++; - if ((numStmts % 1000) == 0) { - log.info("Still computing class subsumption ABox inferences (" - + numStmts + "/" + individuals.size() + " individuals)"); - log.info((System.currentTimeMillis() - start) / 1000 + " ms per individual"); - start = System.currentTimeMillis(); - } - - if (stopRequested) { - log.info("a stopRequested signal was received during recomputeABox. Halting Processing."); - return; - } - } - - log.info("Finished computing class subsumption ABox inferences"); - log.info("Computing inverse property ABox inferences"); - - Iterator invStatements = null; - tboxModel.enterCriticalSection(Lock.READ); - try { - invStatements = tboxModel.listStatements((Resource) null, OWL.inverseOf, (Resource) null); - } finally { - tboxModel.leaveCriticalSection(); - } - - numStmts = 0; - while (invStatements.hasNext()) { - Statement stmt = invStatements.next(); - - try { - OntProperty prop1 = tboxModel.getOntProperty((stmt.getSubject()).getURI()); - if (prop1 == null) { - //TODO make sure not to print out a million of these for the same property - log.debug("didn't find subject property in the tbox: " + (stmt.getSubject()).getURI()); - continue; - } - - OntProperty prop2 = tboxModel.getOntProperty(((Resource)stmt.getObject()).getURI()); - if (prop2 == null) { - //TODO make sure not to print out a million of these for the same property - log.debug("didn't find object property in the tbox: " + ((Resource)stmt.getObject()).getURI()); - continue; - } - - simpleReasoner.addedInverseProperty(prop1, prop2, inferenceRebuildModel); - } catch (NullPointerException npe) { - log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation."); - npe.printStackTrace(); - return; - } catch (JenaException je) { - if (je.getMessage().equals("Statement models must no be null")) { - log.error("Exception while recomputing ABox inference model. Halting inference computation.", je); - return; - } - log.error("Exception while recomputing ABox inference model: ", je); - } catch (Exception e) { - log.error("Exception while recomputing ABox inference model: ", e); - } - - numStmts++; - if ((numStmts % 10000) == 0) { - log.info("Still computing inverse property ABox inferences..."); - } - - if (stopRequested) { - log.info("a stopRequested signal was received during recomputeABox. Halting Processing."); - return; - } - } - - log.info("Finished computing inverse property ABox inferences"); - log.info("Computing sameAs ABox inferences"); - - Iterator sameAsStatements = null; - aboxModel.enterCriticalSection(Lock.READ); - try { - sameAsStatements = aboxModel.listStatements((Resource) null, OWL.sameAs, (Resource) null); - } finally { - aboxModel.leaveCriticalSection(); - } - - numStmts = 0; - while (sameAsStatements.hasNext()) { - Statement stmt = sameAsStatements.next(); - - try { - simpleReasoner.addedABoxSameAsAssertion(stmt, inferenceRebuildModel); - } catch (NullPointerException npe) { - log.error("a NullPointerException was received while recomputing the ABox inferences. Halting inference computation."); - npe.printStackTrace(); - return; - } catch (JenaException je) { - if (je.getMessage().equals("Statement models must no be null")) { - log.error("Exception while recomputing ABox inference model. Halting inference computation.", je); - return; - } - log.error("Exception while recomputing ABox inference model: ", je); - } catch (Exception e) { - log.error("Exception while recomputing ABox inference model: ", e); - } - - numStmts++; - if ((numStmts % 10000) == 0) { - log.info("Still computing sameAs ABox inferences..."); - } - - if (stopRequested) { - log.info("a stopRequested signal was received during recomputeABox. Halting Processing."); - return; - } - } - log.info("Finished computing sameAs ABox inferences"); - - try { - if (updateInferenceModel(inferenceRebuildModel, individuals)) { - log.info("a stopRequested signal was received during updateInferenceModel. Halting Processing."); - return; - } - } catch (Exception e) { - log.error("Exception while reconciling the current and recomputed ABox inference model for class subsumption inferences. Halting processing." , e); - } - } catch (Exception e) { - e.printStackTrace(); - log.error("Exception while recomputing ABox inferences. Halting processing.", e); - } finally { - inferenceRebuildModel.removeAll(); - inferenceRebuildModel.leaveCriticalSection(); - } - } - - /* - * Get the URIs for all individuals in the system - */ - protected Collection getAllIndividualURIs() { - - HashSet individualURIs = new HashSet(); - - List classList = new ArrayList(); - - tboxModel.enterCriticalSection(Lock.READ); - try { - StmtIterator classIt = tboxModel.listStatements( - (Resource) null, RDF.type, OWL.Class); - while(classIt.hasNext()) { - Statement stmt = classIt.nextStatement(); - if(stmt.getSubject().isURIResource() - && stmt.getSubject().getURI() != null - && !stmt.getSubject().getURI().isEmpty()) { - classList.add(stmt.getSubject().getURI()); - } - } - } finally { - tboxModel.leaveCriticalSection(); - } - - for (String classURI : classList) { - String queryString = "SELECT ?s WHERE { ?s a <" + classURI + "> } "; - getIndividualURIs(queryString, individualURIs); - } - - return individualURIs; - } + private OntModel tboxModel; // asserted and inferred TBox axioms + private OntModel aboxModel; + private RDFService rdfService; + private SimpleReasoner simpleReasoner; + private Object lock1 = new Object(); + private volatile boolean recomputing = false; + private boolean stopRequested = false; + private boolean handleSameAs = false; - protected void getIndividualURIs(String queryString, Set individuals) { + private final int BATCH_SIZE = 100; + private final int REPORTING_INTERVAL = 1000; - int batchSize = 50000; - int offset = 0; - boolean done = false; - - while (!done) { - String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset; - if(log.isDebugEnabled()) { - log.debug(queryStr); - } - - ResultSet results = null; - - try { - InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON); - results = ResultSetFactory.fromJSON(in); - } catch (RDFServiceException e) { - throw new RuntimeException(e); - } - - if (!results.hasNext()) { - done = true; - } - - while (results.hasNext()) { - QuerySolution solution = results.next(); - Resource resource = solution.getResource("s"); - - if ((resource != null) && !resource.isAnon()) { - individuals.add(resource.getURI()); - } - } - - if(log.isDebugEnabled()) { - log.info(individuals.size() + " in set"); - } - offset += batchSize; - } - - } - - protected void addedABoxTypeAssertion(Resource individual, Model inferenceModel, - HashSet unknownTypes) { - addedABoxTypeAssertion(individual, inferenceModel, unknownTypes, true); - } - - protected void addedABoxTypeAssertion(Resource individual, Model inferenceModel, - HashSet unknownTypes, boolean checkRedundancy) { + /** + * @param tboxModel - input. This model contains both asserted and inferred TBox axioms + * @param aboxModel - input. This model contains asserted ABox statements + * @param inferenceModel - output. This is the model in which inferred (materialized) ABox statements are maintained (added or retracted). + */ + public ABoxRecomputer(OntModel tboxModel, + OntModel aboxModel, + RDFService rdfService, + SimpleReasoner simpleReasoner) { + this.tboxModel = tboxModel; + this.aboxModel = aboxModel; + this.rdfService = rdfService; + this.simpleReasoner = simpleReasoner; + recomputing = false; + stopRequested = false; + handleSameAs = simpleReasoner.getSameAsEnabled(); + } - StmtIterator iter = null; - - aboxModel.enterCriticalSection(Lock.READ); - try { - iter = aboxModel.listStatements(individual, RDF.type, (RDFNode) null); - - while (iter.hasNext()) { - Statement stmt = iter.next(); - simpleReasoner.addedABoxTypeAssertion( - stmt, inferenceModel, unknownTypes, checkRedundancy); - } - } finally { - if (iter != null) { - iter.close(); - } - aboxModel.leaveCriticalSection(); - } - } - /* - * reconcile a set of inferences into the application inference model - */ - protected boolean updateInferenceModel(Model inferenceRebuildModel, - Collection individuals) { + /** + * Returns true if the recomputer is in the process of recomputing + * all inferences. + */ + public boolean isRecomputing() { + return recomputing; + } - log.info("Updating ABox inference model"); - - // Remove everything from the current inference model that is not - // in the recomputed inference model - int num = 0; - scratchpadModel.enterCriticalSection(Lock.WRITE); - scratchpadModel.removeAll(); - Model rebuild = ModelFactory.createDefaultModel(); - Model existing = ModelFactory.createDefaultModel(); - - long start = System.currentTimeMillis(); - - for (String individualURI : individuals) { - rebuild.removeAll(); - existing.removeAll(); - Resource subjInd = ResourceFactory.createResource(individualURI); - inferenceModel.enterCriticalSection(Lock.READ); - try { - existing.add(inferenceModel.listStatements(subjInd, null, (RDFNode) null)); - } finally { - inferenceModel.leaveCriticalSection(); - } - inferenceRebuildModel.enterCriticalSection(Lock.READ); - try { - rebuild.add(inferenceRebuildModel.listStatements(subjInd, null, (RDFNode) null)); - } finally { - inferenceRebuildModel.leaveCriticalSection(); + /** + * Recompute all inferences. + */ + public void recompute() { + synchronized (lock1) { + if (recomputing) { + return; + } else { + recomputing = true; } - - Model retractions = existing.difference(rebuild); - Model additions = rebuild.difference(existing); - - inferenceModel.enterCriticalSection(Lock.WRITE); - try { - inferenceModel.remove(retractions); - inferenceModel.add(additions); - } finally { - inferenceModel.leaveCriticalSection(); - } - - inferenceRebuildModel.enterCriticalSection(Lock.WRITE); - try { - inferenceRebuildModel.remove(rebuild); - } finally { - inferenceRebuildModel.leaveCriticalSection(); + } + try { + recomputeABox(); + } finally { + synchronized (lock1) { + recomputing = false; } + } + } - num++; - if ((num % 1000) == 0) { - log.info("Still updating ABox inference model (" + - + num + "/" + individuals.size() + " individuals)"); - log.info((System.currentTimeMillis() - start) / 1000 + " ms per individual"); - start = System.currentTimeMillis(); - } - - if (stopRequested) { - return true; - } - - } - - log.info("ABox inference model updated"); - return false; - } - - private Iterator listModelStatements(Model model, String graphURI) { - // the RDFServices supplied by the unit tests won't have the right - // named graphs. So if the graphURI-based chunked iterator is empty, - // we'll try listStatements() on the model instead. - Iterator it = new ChunkedStatementIterator(graphURI); - if (it.hasNext()) { - return it; - } else { - return model.listStatements(); - } - } - - // avoids OutOfMemory errors by retrieving triples in batches - private class ChunkedStatementIterator implements Iterator { - - final int CHUNK_SIZE = 50000; - private int offset = 0; - - private String queryString; - - private Model temp = ModelFactory.createDefaultModel(); - private StmtIterator tempIt; - - public ChunkedStatementIterator(String graphURI) { - this.queryString = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <" + - graphURI + "> { ?s ?p ?o } }"; - } - - public Statement next() { - if (tempIt.hasNext()) { - return tempIt.nextStatement(); - } else { - return null; - } - } - - public void remove() { - throw new UnsupportedOperationException(this.getClass().getName() + - " does not support .remove()"); - } - - public boolean hasNext() { - if (tempIt != null && tempIt.hasNext()) { - return true; - } else { - getNextChunk(); - if (temp.size() > 0) { - tempIt = temp.listStatements(); - return true; - } else { - return false; - } - } - } - - private void getNextChunk() { - - String chunkQueryString = queryString + " LIMIT " + CHUNK_SIZE + " OFFSET " + offset; - offset += CHUNK_SIZE; - + /* + * Recompute the entire ABox inference graph. + */ + protected void recomputeABox() { + log.info("Recomputing ABox inferences."); + log.info("Finding individuals in ABox."); + Collection individuals = this.getAllIndividualURIs(); + log.info("Recomputing inferences for " + individuals.size() + " individuals"); + long start = System.currentTimeMillis(); + int numInds = 0; + Model rebuildModel = ModelFactory.createDefaultModel(); + Model additionalInferences = ModelFactory.createDefaultModel(); + List individualsInBatch = new ArrayList(); + Iterator individualIt = individuals.iterator(); + while (individualIt.hasNext()) { + String individualURI = individualIt.next(); try { - InputStream in = rdfService.sparqlConstructQuery( - chunkQueryString, RDFService.ModelSerializationFormat.NTRIPLE); - temp.removeAll(); - temp.add(RDFServiceUtils.parseModel( - in, RDFService.ModelSerializationFormat.NTRIPLE)); + additionalInferences.add(recomputeIndividual( + individualURI, rebuildModel)); + numInds++; + individualsInBatch.add(individualURI); + boolean batchFilled = (numInds % BATCH_SIZE) == 0; + boolean reportingInterval = (numInds % REPORTING_INTERVAL) == 0; + if (batchFilled || !individualIt.hasNext()) { + log.debug(rebuildModel.size() + " total inferences"); + updateInferenceModel(rebuildModel, individualsInBatch); + rebuildModel.removeAll(); + individualsInBatch.clear(); + } + if (reportingInterval) { + log.info("Still recomputing inferences (" + + numInds + "/" + individuals.size() + " individuals)"); + log.info((System.currentTimeMillis() - start) / numInds + " ms per individual"); + } + if (stopRequested) { + log.info("a stopRequested signal was received during recomputeABox. Halting Processing."); + return; + } + } catch (Exception e) { + log.error("Error recomputing inferences for individual <" + individualURI + ">", e); + } + } + if(additionalInferences.size() > 0) { + log.info("Writing additional inferences generated by reasoner plugins."); + ChangeSet change = rdfService.manufactureChangeSet(); + change.addAddition(makeN3InputStream(additionalInferences), + RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES); + try { + rdfService.changeSetUpdate(change); + } catch (RDFServiceException e) { + log.error("Unable to write additional inferences from reasoner plugins", e); + } + } + log.info("Finished recomputing inferences"); + } + + private static final boolean RUN_PLUGINS = true; + private static final boolean SKIP_PLUGINS = !RUN_PLUGINS; + + private Model recomputeIndividual(String individualURI, + Model rebuildModel) throws RDFServiceException { + long start = System.currentTimeMillis(); + Model assertions = getAssertions(individualURI); + log.trace((System.currentTimeMillis() - start) + " ms to get assertions."); + Model additionalInferences = recomputeIndividual( + individualURI, null, assertions, rebuildModel, RUN_PLUGINS); + if (handleSameAs) { + Set sameAsInds = getSameAsIndividuals(individualURI); + for (String sameAsInd : sameAsInds) { + // sameAs for plugins is handled by the SimpleReasoner + Model sameAsIndAssertions = getAssertions(sameAsInd); + recomputeIndividual( + sameAsInd, individualURI, sameAsIndAssertions, rebuildModel, SKIP_PLUGINS); + rebuildModel.add( + rewriteInferences(getAssertions(sameAsInd), individualURI)); + Resource indRes = ResourceFactory.createResource(individualURI); + Resource sameAsIndRes = ResourceFactory.createResource(sameAsInd); + if(!assertions.contains(indRes, OWL.sameAs, sameAsIndRes)) { + rebuildModel.add(indRes, OWL.sameAs, sameAsIndRes); + } + } + } + return additionalInferences; + } + + /** + * Adds inferences to temporary rebuildmodel + * @param individualURI + * @param rebuildModel + * @return any additional inferences produced by plugins that affect other + * individuals + */ + private Model recomputeIndividual(String individualURI, String aliasURI, + Model assertions, Model rebuildModel, boolean runPlugins) + throws RDFServiceException { + + Model additionalInferences = ModelFactory.createDefaultModel(); + Resource individual = ResourceFactory.createResource(individualURI); + + long start = System.currentTimeMillis(); + Model types = ModelFactory.createDefaultModel(); + types.add(assertions.listStatements(null, RDF.type, (RDFNode) null)); + Model inferredTypes = rewriteInferences(getInferredTypes(individual, types), aliasURI); + rebuildModel.add(inferredTypes); + log.trace((System.currentTimeMillis() - start) + " to infer " + inferredTypes.size() + " types"); + + start = System.currentTimeMillis(); + types.add(inferredTypes); + Model mst = getMostSpecificTypes(individual, types); + rebuildModel.add(rewriteInferences(mst, aliasURI)); + log.trace((System.currentTimeMillis() - start) + " to infer " + mst.size() + " mostSpecificTypes"); + + start = System.currentTimeMillis(); + Model inferredInvs = getInferredInverseStatements(individualURI); + inferredInvs.remove(assertions); + rebuildModel.add(rewriteInferences(inferredInvs, aliasURI)); + log.trace((System.currentTimeMillis() - start) + " to infer " + inferredInvs.size() + " inverses"); + + List pluginList = simpleReasoner.getPluginList(); + if (runPlugins && pluginList.size() > 0) { + Model tmpModel = ModelFactory.createDefaultModel(); + StmtIterator sit = assertions.listStatements(); + while (sit.hasNext()) { + Statement s = sit.nextStatement(); + for (ReasonerPlugin plugin : pluginList) { + plugin.addedABoxStatement(s, aboxModel, tmpModel, tboxModel); + } + } + StmtIterator tmpIt = tmpModel.listStatements(); + while(tmpIt.hasNext()) { + Statement tmpStmt = tmpIt.nextStatement(); + if(individual.equals(tmpStmt.getSubject())) { + rebuildModel.add(tmpStmt); + } else { + additionalInferences.add(tmpStmt); + } + } + } + return additionalInferences; + } + + private Model getAssertions(String individualURI) throws RDFServiceException { + String queryStr = "CONSTRUCT { \n" + + " <" + individualURI + "> ?p ?value \n" + + "} WHERE { \n" + + " GRAPH ?g { \n" + + " <" + individualURI + "> ?p ?value \n" + + " } \n" + + " FILTER (?g != <" + ModelNames.ABOX_INFERENCES + ">)\n" + + "} \n"; + return RDFServiceUtils.parseModel( + rdfService.sparqlConstructQuery( + queryStr, RDFService.ModelSerializationFormat.N3) + , RDFService.ModelSerializationFormat.N3); + } + + private Model getInferredTypes(Resource individual, Model assertedTypes) { + String queryStr = "CONSTRUCT { \n" + + " <" + individual.getURI() + "> a ?type \n" + + "} WHERE { \n" + + " <" + individual.getURI() + "> a ?assertedType .\n" + + " { ?assertedType <" + RDFS.subClassOf.getURI() + "> ?type } \n" + + " UNION \n" + + " { ?assertedType <" + OWL.equivalentClass.getURI() + "> ?type } \n" + + " FILTER (isURI(?type)) \n" + + " FILTER NOT EXISTS { \n" + + " <" + individual.getURI() + "> a ?type \n" + + " } \n" + + "} \n"; + Model union = ModelFactory.createUnion(assertedTypes, tboxModel); + tboxModel.enterCriticalSection(Lock.READ); + try { + Query q = QueryFactory.create(queryStr); + QueryExecution qe = QueryExecutionFactory.create(q, union); + return qe.execConstruct(); + } finally { + tboxModel.leaveCriticalSection(); + } + } + + private Model getMostSpecificTypes(Resource individual, Model assertedTypes) { + String queryStr = "CONSTRUCT { \n" + + " <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" + + "} WHERE { \n" + + " <" + individual.getURI() + "> a ?type .\n" + + " FILTER (isURI(?type)) \n" + + " FILTER NOT EXISTS { \n" + + " <" + individual.getURI() + "> a ?type2 . \n" + + " ?type2 <" + RDFS.subClassOf.getURI() + "> ?type. \n" + + " } \n" + + " FILTER NOT EXISTS { \n" + + " <" + individual.getURI() + "> <" + VitroVocabulary.MOST_SPECIFIC_TYPE + "> ?type \n" + + " } \n" + + "} \n"; + Model union = ModelFactory.createUnion(assertedTypes, tboxModel); + tboxModel.enterCriticalSection(Lock.READ); + try { + Query q = QueryFactory.create(queryStr); + QueryExecution qe = QueryExecutionFactory.create(q, union); + return qe.execConstruct(); + } finally { + tboxModel.leaveCriticalSection(); + } + } + + private Model getInferredInverseStatements(String individualURI) throws RDFServiceException { + String queryStr = "CONSTRUCT { \n" + + " <" + individualURI + "> ?inv ?value \n" + + "} WHERE { \n" + + " GRAPH ?gr { \n" + + " ?value ?prop <" + individualURI + "> \n" + + " } \n" + + " FILTER (isURI(?value)) \n" + + " FILTER (?gr != <" + ModelNames.ABOX_INFERENCES + ">) \n" + + " { ?prop <" + OWL.inverseOf.getURI() + "> ?inv } \n" + + " UNION \n" + + " { ?inv <" + OWL.inverseOf.getURI() + "> ?prop } \n" + + "} \n"; + return RDFServiceUtils.parseModel( + rdfService.sparqlConstructQuery( + queryStr, RDFService.ModelSerializationFormat.N3) + , RDFService.ModelSerializationFormat.N3); + } + + private Model rewriteInferences(Model inferences, String aliasURI) { + if (aliasURI == null) { + return inferences; + } + Model rewrite = ModelFactory.createDefaultModel(); + Resource alias = ResourceFactory.createResource(aliasURI); + StmtIterator sit = inferences.listStatements(); + while(sit.hasNext()) { + Statement stmt = sit.nextStatement(); + rewrite.add(alias, stmt.getPredicate(), stmt.getObject()); + } + return rewrite; + } + + /* + * Get the URIs for all individuals in the system + */ + protected Collection getAllIndividualURIs() { + HashSet individualURIs = new HashSet(); + List classList = new ArrayList(); + tboxModel.enterCriticalSection(Lock.READ); + try { + StmtIterator classIt = tboxModel.listStatements( + (Resource) null, RDF.type, OWL.Class); + while(classIt.hasNext()) { + Statement stmt = classIt.nextStatement(); + if(stmt.getSubject().isURIResource() + && stmt.getSubject().getURI() != null + && !stmt.getSubject().getURI().isEmpty()) { + classList.add(stmt.getSubject().getURI()); + } + } + } finally { + tboxModel.leaveCriticalSection(); + } + for (String classURI : classList) { + String queryString = "SELECT ?s WHERE { ?s a <" + classURI + "> } "; + getIndividualURIs(queryString, individualURIs); + } + return individualURIs; + } + + protected void getIndividualURIs(String queryString, Set individuals) { + int batchSize = 50000; + int offset = 0; + boolean done = false; + while (!done) { + String queryStr = queryString + " LIMIT " + batchSize + " OFFSET " + offset; + if(log.isDebugEnabled()) { + log.debug(queryStr); + } + ResultSet results = null; + try { + InputStream in = rdfService.sparqlSelectQuery(queryStr, RDFService.ResultFormat.JSON); + results = ResultSetFactory.fromJSON(in); } catch (RDFServiceException e) { throw new RuntimeException(e); } - } - - } - - - /** - * This is called when the application shuts down. - */ - public void setStopRequested() { - this.stopRequested = true; - } + if (!results.hasNext()) { + done = true; + } + while (results.hasNext()) { + QuerySolution solution = results.next(); + Resource resource = solution.getResource("s"); + + if ((resource != null) && !resource.isAnon()) { + individuals.add(resource.getURI()); + } + } + if(log.isDebugEnabled()) { + log.debug(individuals.size() + " in set"); + } + offset += batchSize; + } + + } + + /* + * reconcile a set of inferences into the application inference model + */ + protected void updateInferenceModel(Model rebuildModel, + Collection individuals) throws RDFServiceException { + Model inferenceModel = RDFServiceGraph.createRDFServiceModel( + new RDFServiceGraph(rdfService, ModelNames.ABOX_INFERENCES)); + Model existing = ModelFactory.createDefaultModel(); + for (String individualURI : individuals) { + Resource subjInd = ResourceFactory.createResource(individualURI); + existing.add(inferenceModel.listStatements(subjInd, null, (RDFNode) null)); + } + Model retractions = existing.difference(rebuildModel); + Model additions = rebuildModel.difference(existing); + inferenceModel.enterCriticalSection(Lock.WRITE); + try { + long start = System.currentTimeMillis(); + ChangeSet change = rdfService.manufactureChangeSet(); + change.addRemoval(makeN3InputStream(retractions), + RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES); + change.addAddition(makeN3InputStream(additions), + RDFService.ModelSerializationFormat.N3, ModelNames.ABOX_INFERENCES); + rdfService.changeSetUpdate(change); + log.debug((System.currentTimeMillis() - start) + + " ms to retract " + retractions.size() + + " statements and add " + additions.size() + " statements"); + } finally { + inferenceModel.leaveCriticalSection(); + } + } + + private InputStream makeN3InputStream(Model m) { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + m.write(out, "N3"); + return new ByteArrayInputStream(out.toByteArray()); + } + + private Set getSameAsIndividuals(String individualURI) { + HashSet sameAsInds = new HashSet(); + sameAsInds.add(individualURI); + getSameAsIndividuals(individualURI, sameAsInds); + sameAsInds.remove(individualURI); + return sameAsInds; + } + + private void getSameAsIndividuals(String individualURI, Set sameAsInds) { + Model m = RDFServiceGraph.createRDFServiceModel(new RDFServiceGraph(rdfService)); + Resource individual = ResourceFactory.createResource(individualURI); + StmtIterator sit = m.listStatements(individual, OWL.sameAs, (RDFNode) null); + while(sit.hasNext()) { + Statement stmt = sit.nextStatement(); + if (stmt.getObject().isURIResource()) { + String sameAsURI = stmt.getObject().asResource().getURI(); + if (!sameAsInds.contains(sameAsURI)) { + sameAsInds.add(sameAsURI); + getSameAsIndividuals(sameAsURI, sameAsInds); + } + } + } + sit = m.listStatements(null, OWL.sameAs, individual); + while(sit.hasNext()) { + Statement stmt = sit.nextStatement(); + if (stmt.getSubject().isURIResource()) { + String sameAsURI = stmt.getSubject().asResource().getURI(); + if (!sameAsInds.contains(sameAsURI)) { + sameAsInds.add(sameAsURI); + getSameAsIndividuals(sameAsURI, sameAsInds); + } + } + } + } + + /** + * This is called when the application shuts down. + */ + public void setStopRequested() { + this.stopRequested = true; + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/SimpleReasoner.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/SimpleReasoner.java index bce72e978..6dc912724 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/SimpleReasoner.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/reasoner/SimpleReasoner.java @@ -18,6 +18,8 @@ import com.hp.hpl.jena.ontology.AnnotationProperty; import com.hp.hpl.jena.ontology.OntClass; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntProperty; +import com.hp.hpl.jena.query.Dataset; +import com.hp.hpl.jena.query.DatasetFactory; import com.hp.hpl.jena.rdf.listeners.StatementListener; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; @@ -40,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.CumulativeDeltaModeler; import edu.cornell.mannlib.vitro.webapp.dao.jena.DifferenceGraph; import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceGraph; import edu.cornell.mannlib.vitro.webapp.dao.jena.event.BulkUpdateEvent; +import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.adapters.VitroModelFactory; @@ -114,7 +117,7 @@ public class SimpleReasoner extends StatementListener { this.batchMode = 0; aBoxDeltaModeler1 = new CumulativeDeltaModeler(); aBoxDeltaModeler2 = new CumulativeDeltaModeler(); - recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,inferenceRebuildModel,scratchpadModel,rdfService,this); + recomputer = new ABoxRecomputer(tboxModel, aboxModel, rdfService, this); stopRequested = false; if (rdfService == null) { @@ -146,7 +149,13 @@ public class SimpleReasoner extends StatementListener { aBoxDeltaModeler2 = new CumulativeDeltaModeler(); this.batchMode = 0; stopRequested = false; - recomputer = new ABoxRecomputer(tboxModel,this.aboxModel,inferenceModel,ModelFactory.createDefaultModel(), ModelFactory.createDefaultModel(), new RDFServiceModel(aboxModel), this); + Dataset ds = DatasetFactory.createMem(); + ds.addNamedModel(ModelNames.ABOX_ASSERTIONS, aboxModel); + ds.addNamedModel(ModelNames.ABOX_INFERENCES, inferenceModel); + ds.addNamedModel(ModelNames.TBOX_ASSERTIONS, tboxModel); + + ds.setDefaultModel(ModelFactory.createUnion(fullModel, tboxModel)); + recomputer = new ABoxRecomputer(tboxModel, aboxModel, new RDFServiceModel(ds), this); } public void setPluginList(List pluginList) { @@ -805,17 +814,8 @@ public class SimpleReasoner extends StatementListener { Resource subject = sameIter.next(); Statement sameStmt = ResourceFactory.createStatement(subject,stmt.getPredicate(),stmt.getObject()); - addInference(sameStmt,inferenceModel,false); + addInference(sameStmt,inferenceModel, doSameAs); } - - inferenceModel.enterCriticalSection(Lock.WRITE); - try { - if (inferenceModel.contains(stmt)) { - inferenceModel.remove(stmt); - } - } finally { - inferenceModel.leaveCriticalSection(); - } } @@ -838,11 +838,22 @@ public class SimpleReasoner extends StatementListener { Property inverseProp = inverseIter.next(); Statement infStmt = ResourceFactory.createStatement( stmt.getObject().asResource(), inverseProp, stmt.getSubject()); - addInference(infStmt,inferenceModel,true); + addInference(infStmt, inferenceModel, true); } } - doSameAsForAddedABoxAssertion( stmt, inferenceModel); + inferenceModel.enterCriticalSection(Lock.WRITE); + try { + if (inferenceModel.contains(stmt)) { + inferenceModel.remove(stmt); + } + } finally { + inferenceModel.leaveCriticalSection(); + } + + if(doSameAs) { + doSameAsForAddedABoxAssertion( stmt, inferenceModel); + } } void doSameAsForRemovedABoxAssertion(Statement stmt, Model inferenceModel){ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/SimpleReasonerSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/SimpleReasonerSetup.java index 3422eccff..8ad1730ac 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/SimpleReasonerSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/SimpleReasonerSetup.java @@ -21,6 +21,7 @@ import org.apache.commons.logging.LogFactory; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.query.Dataset; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.RDFNode; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelNames; @@ -55,8 +56,16 @@ public class SimpleReasonerSetup implements ServletContextListener { RDFService rdfService = ModelAccess.on(ctx).getRDFService(); Dataset dataset = ModelAccess.on(ctx).getDataset(); - Model rebuildModel = dataset.getNamedModel(JENA_INF_MODEL_REBUILD); + Model rebuildModel = dataset.getNamedModel(JENA_INF_MODEL_REBUILD); + if(rebuildModel.contains(null, null, (RDFNode) null)) { + log.info("Clearing obsolete data from inference rebuild model"); + rebuildModel.removeAll(); + } Model scratchModel = dataset.getNamedModel(JENA_INF_MODEL_SCRATCHPAD); + if(scratchModel.contains(null, null, (RDFNode) null)) { + log.info("Clearing obsolete data from inference scratchpad model"); + scratchModel.removeAll(); + } Model inferenceModel = dataset.getNamedModel(ABOX_INFERENCES); // the simple reasoner will register itself as a listener to the ABox assertions