From 809ff432b6bc2cbc9bae2775005e333328975d93 Mon Sep 17 00:00:00 2001 From: stellamit Date: Thu, 5 Jul 2012 14:37:08 +0000 Subject: [PATCH] NIHVIVO-3871 blank node handling from ingest tools --- .../dao/jena/BlankNodeFilteringGraph.java | 147 ++++++++++++++++++ .../dao/jena/BlankNodeStatementListener.java | 36 +++++ .../webapp/dao/jena/RDFServiceGraph.java | 2 +- .../webapp/dao/jena/RDFServiceModelMaker.java | 72 +++++++-- 4 files changed, 244 insertions(+), 13 deletions(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeFilteringGraph.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeStatementListener.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeFilteringGraph.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeFilteringGraph.java new file mode 100644 index 000000000..811423b44 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeFilteringGraph.java @@ -0,0 +1,147 @@ +package edu.cornell.mannlib.vitro.webapp.dao.jena; + +import java.util.ArrayList; +import java.util.List; + +import com.hp.hpl.jena.graph.BulkUpdateHandler; +import com.hp.hpl.jena.graph.Capabilities; +import com.hp.hpl.jena.graph.Graph; +import com.hp.hpl.jena.graph.GraphEventManager; +import com.hp.hpl.jena.graph.GraphStatisticsHandler; +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Reifier; +import com.hp.hpl.jena.graph.TransactionHandler; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.graph.TripleMatch; +import com.hp.hpl.jena.graph.query.QueryHandler; +import com.hp.hpl.jena.shared.AddDeniedException; +import com.hp.hpl.jena.shared.DeleteDeniedException; +import com.hp.hpl.jena.shared.PrefixMapping; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; +import com.hp.hpl.jena.util.iterator.WrappedIterator; + +public class BlankNodeFilteringGraph implements Graph { + + private Graph graph; + + public BlankNodeFilteringGraph(Graph graph) { + this.graph = graph; + } + + @Override + public void add(Triple t) throws AddDeniedException { + if (!t.getSubject().isBlank() && !t.getObject().isBlank()) { + graph.add(t); + } + } + + @Override + public void close() { + graph.close(); + } + + @Override + public boolean contains(Node arg0, Node arg1, Node arg2) { + return graph.contains(arg0, arg1, arg2); + } + + @Override + public boolean contains(Triple arg0) { + return graph.contains(arg0); + } + + @Override + public void delete(Triple t) throws DeleteDeniedException { + if (!t.getSubject().isBlank() && !t.getObject().isBlank()) { + graph.delete(t); + } ; + } + + @Override + public boolean dependsOn(Graph arg0) { + return graph.dependsOn(arg0); + } + + @Override + public ExtendedIterator find(Node subject, Node predicate, Node object) { + + List nbTripList = new ArrayList(); + ExtendedIterator triples = graph.find(subject, predicate, object); + + while (triples.hasNext()) { + Triple triple = triples.next(); + + if (!triple.getSubject().isBlank() && !triple.getObject().isBlank()) { + nbTripList.add(triple); + } + } + + return WrappedIterator.create(nbTripList.iterator()); + } + + @Override + public ExtendedIterator find(TripleMatch tripleMatch) { + Triple t = tripleMatch.asTriple(); + return find(t.getSubject(), t.getPredicate(), t.getObject()); + } + + @Override + public BulkUpdateHandler getBulkUpdateHandler() { + return graph.getBulkUpdateHandler(); + } + + @Override + public Capabilities getCapabilities() { + return graph.getCapabilities(); + } + + @Override + public GraphEventManager getEventManager() { + return graph.getEventManager(); + } + + @Override + public PrefixMapping getPrefixMapping() { + return graph.getPrefixMapping(); + } + + @Override + public Reifier getReifier() { + return graph.getReifier(); + } + + @Override + public GraphStatisticsHandler getStatisticsHandler() { + return graph.getStatisticsHandler(); + } + + @Override + public TransactionHandler getTransactionHandler() { + return graph.getTransactionHandler(); + } + + @Override + public boolean isClosed() { + return graph.isClosed(); + } + + @Override + public boolean isEmpty() { + return graph.isEmpty(); + } + + @Override + public boolean isIsomorphicWith(Graph arg0) { + return graph.isIsomorphicWith(arg0); + } + + @Override + public QueryHandler queryHandler() { + return graph.queryHandler(); + } + + @Override + public int size() { + return graph.size(); + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeStatementListener.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeStatementListener.java new file mode 100644 index 000000000..b37a5bc9e --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/BlankNodeStatementListener.java @@ -0,0 +1,36 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.dao.jena; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.hp.hpl.jena.rdf.listeners.StatementListener; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.Statement; + +public class BlankNodeStatementListener extends StatementListener { + + private static final Log log = LogFactory.getLog(BlankNodeStatementListener.class); + private Model bnodeModel; + + public BlankNodeStatementListener(Model bnodeModel) { + this.bnodeModel = bnodeModel; + } + + @Override + public void addedStatement(Statement stmt) { + + if (stmt.getSubject().isAnon() || stmt.getObject().isAnon()) { + bnodeModel.add(stmt); + } + } + + @Override + public void removedStatement(Statement stmt) { + + if (stmt.getSubject().isAnon() || stmt.getObject().isAnon()) { + bnodeModel.remove(stmt); + } + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java index 766b1bbf4..c7fb46680 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java @@ -47,7 +47,7 @@ public class RDFServiceGraph implements GraphWithPerform { private RDFService rdfService; private String graphURI; - private static final Log log = LogFactory.getLog(SparqlGraph.class); + private static final Log log = LogFactory.getLog(RDFServiceGraph.class); private BulkUpdateHandler bulkUpdateHandler; private PrefixMapping prefixMapping = new PrefixMappingImpl(); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceModelMaker.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceModelMaker.java index b29ba75da..de822941c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceModelMaker.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceModelMaker.java @@ -13,20 +13,25 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import com.hp.hpl.jena.graph.BulkUpdateHandler; +import com.hp.hpl.jena.graph.Graph; import com.hp.hpl.jena.graph.GraphMaker; import com.hp.hpl.jena.query.Dataset; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.ModelMaker; import com.hp.hpl.jena.rdf.model.ModelReader; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.util.iterator.ExtendedIterator; import com.hp.hpl.jena.util.iterator.WrappedIterator; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ModelSerializationFormat; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; @@ -72,12 +77,20 @@ public class RDFServiceModelMaker implements ModelMaker { } public Model createModel(String modelName) { + Model model = getModel(modelName); Model metadataModel = getMetadataModel(); + + Literal modelNameLiteral = ResourceFactory.createPlainLiteral(modelName); + Statement metadataStatment = ResourceFactory.createStatement(dbResource,metadataModel.getProperty( + HAS_NAMED_MODEL_URI), modelNameLiteral); + + // to get around blank node filtering on BlankNodeFiltering graph + List stmtList = new ArrayList(); + stmtList.add(metadataStatment); + try { - metadataModel.add( - dbResource,metadataModel.getProperty( - HAS_NAMED_MODEL_URI), modelName); + metadataModel.add(stmtList); } finally { metadataModel.close(); } @@ -150,16 +163,51 @@ public class RDFServiceModelMaker implements ModelMaker { return WrappedIterator.create(modelNameList.iterator()); } - public Model openModel(String arg0, boolean arg1) { - RDFService service = getRDFService(); - try { - Dataset dataset = new RDFServiceDataset(service); - return dataset.getNamedModel(arg0); - } finally { - service.close(); - } - } + public Model openModel(String graph, boolean arg1) { + + RDFService rdfService = getRDFService(); + + String bnodeQuery = "construct { ?s ?p ?o } where { "; + bnodeQuery += (graph != null) ? "graph <" + graph + "> {" : ""; + bnodeQuery += "?s ?p ?o filter (isBlank(?s) || isBlank(?o)) }"; + bnodeQuery += (graph != null) ? "}" : ""; + + Model bnodeModel = ModelFactory.createDefaultModel(); + long start = System.currentTimeMillis(); + try { + bnodeModel.read(rdfService.sparqlConstructQuery(bnodeQuery, ModelSerializationFormat.N3), null, "N3"); + log.debug("constructed a model of blank nodes of size: " + bnodeModel.size() + " for graph " + graph); + } catch (RDFServiceException se) { + log.error("Error trying to create blank node model.", se); + return null; + } catch (Exception e) { + log.error("error trying to create a blank node model: " + e.getMessage()); + return null; + } + long timeElapsedMillis = System.currentTimeMillis() - start; + log.debug("msecs to find blank nodes for graph " + graph + " " + timeElapsedMillis); + + Model model = null; + try { + Dataset dataset = new RDFServiceDataset(rdfService); + model = dataset.getNamedModel(graph); + } finally { + rdfService.close(); + } + + Graph bnodeFilteringGraph = new BlankNodeFilteringGraph(model.getGraph()); + Model bnodeFilteringModel = ModelFactory.createModelForGraph(bnodeFilteringGraph); + + BulkUpdateHandler bulkUpdateHandler = model.getGraph().getBulkUpdateHandler(); + Model unionModel = ModelFactory.createUnion(bnodeFilteringModel, bnodeModel); + Graph specialGraph = new SpecialBulkUpdateHandlerGraph(unionModel.getGraph(), bulkUpdateHandler); + Model specialUnionModel = ModelFactory.createModelForGraph(specialGraph); + bnodeFilteringModel.register(new BlankNodeStatementListener(bnodeModel)); + + return specialUnionModel; + } + public void removeModel(String arg0) { Model m = getModel(arg0); m.removeAll(null,null,null);