From 1bd242a0e185453a78744bddebe317153a756ae2 Mon Sep 17 00:00:00 2001 From: brianjlowe Date: Tue, 5 Jun 2012 20:57:45 +0000 Subject: [PATCH] NIHVIVO-3643 Jena graph implementation using RDFService --- .../webapp/dao/jena/RDFServiceDataset.java | 62 +++ .../dao/jena/RDFServiceDatasetGraph.java | 221 ++++++++ .../webapp/dao/jena/RDFServiceGraph.java | 471 ++++++++++++++++++ .../dao/jena/RDFServiceGraphBulkUpdater.java | 211 ++++++++ .../vitro/webapp/rdfservice/RDFService.java | 2 +- .../rdfservice/impl/sdb/RDFServiceSDB.java | 59 +++ 6 files changed, 1025 insertions(+), 1 deletion(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDataset.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDatasetGraph.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDataset.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDataset.java new file mode 100644 index 000000000..bc0b573dc --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDataset.java @@ -0,0 +1,62 @@ +package edu.cornell.mannlib.vitro.webapp.dao.jena; + +import java.util.ArrayList; +import java.util.Iterator; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.query.Dataset; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.shared.Lock; +import com.hp.hpl.jena.sparql.core.DatasetGraph; + +public class RDFServiceDataset implements Dataset { + + private RDFServiceDatasetGraph g; + + public RDFServiceDataset(RDFServiceDatasetGraph g) { + this.g = g; + } + + @Override + public DatasetGraph asDatasetGraph() { + return g; + } + + @Override + public void close() { + g.close(); + } + + @Override + public boolean containsNamedModel(String arg0) { + return g.containsGraph(Node.createURI(arg0)); + } + + @Override + public Model getDefaultModel() { + return ModelFactory.createModelForGraph(g.getDefaultGraph()); + } + + @Override + public Lock getLock() { + return g.getLock(); + } + + @Override + public Model getNamedModel(String arg0) { + return ModelFactory.createModelForGraph(g.getGraph(Node.createURI(arg0))); + } + + @Override + public Iterator listNames() { + ArrayList nameList = new ArrayList(); + Iterator nodeIt = g.listGraphNodes(); + while (nodeIt.hasNext()) { + Node n = nodeIt.next(); + nameList.add(n.getURI()); + } + return nameList.iterator(); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDatasetGraph.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDatasetGraph.java new file mode 100644 index 000000000..9a7965777 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceDatasetGraph.java @@ -0,0 +1,221 @@ +package edu.cornell.mannlib.vitro.webapp.dao.jena; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import com.hp.hpl.jena.graph.Graph; +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.shared.Lock; +import com.hp.hpl.jena.shared.LockMRSW; +import com.hp.hpl.jena.sparql.core.DatasetGraph; +import com.hp.hpl.jena.sparql.core.Quad; +import com.hp.hpl.jena.sparql.resultset.JSONInput; +import com.hp.hpl.jena.sparql.resultset.ResultSetMem; +import com.hp.hpl.jena.sparql.util.Context; +import com.hp.hpl.jena.util.iterator.SingletonIterator; +import com.hp.hpl.jena.util.iterator.WrappedIterator; + +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; + +public class RDFServiceDatasetGraph implements DatasetGraph { + + private RDFService rdfService; + private Lock lock = new LockMRSW(); + + public RDFServiceDatasetGraph(RDFService rdfService) { + this.rdfService = rdfService; + } + + private Graph getGraphFor(Quad q) { + return getGraphFor(q.getGraph()); + } + + private Graph getGraphFor(Node g) { + return (g == Node.ANY) + ? new RDFServiceGraph(rdfService) + : new RDFServiceGraph(rdfService, g.getURI()); + } + + @Override + public void add(Quad arg0) { + getGraphFor(arg0).add(new Triple(arg0.getSubject(), arg0.getPredicate(), arg0.getObject())); + } + + @Override + public void addGraph(Node arg0, Graph arg1) { + // TODO Auto-generated method stub + } + + @Override + public void close() { + // TODO Auto-generated method stub + } + + @Override + public boolean contains(Quad arg0) { + return getGraphFor(arg0).contains(new Triple(arg0.getSubject(), arg0.getPredicate(), arg0.getObject())); + } + + @Override + public boolean contains(Node arg0, Node arg1, Node arg2, Node arg3) { + return getGraphFor(arg0).contains(arg1, arg2, arg3); + } + + @Override + public boolean containsGraph(Node arg0) { + // TODO Auto-generated method stub + return true; + } + + @Override + public void delete(Quad arg0) { + getGraphFor(arg0).delete(new Triple(arg0.getSubject(), arg0.getPredicate(), arg0.getObject())); + } + + @Override + public void deleteAny(Node arg0, Node arg1, Node arg2, Node arg3) { + // TODO check this + getGraphFor(arg0).delete(new Triple(arg1, arg2, arg3)); + } + + @Override + public Iterator find() { + return find(Node.ANY, Node.ANY, Node.ANY, Node.ANY); + } + + @Override + public Iterator find(Quad arg0) { + return find(arg0.getSubject(), arg0.getPredicate(), arg0.getObject(), arg0.getGraph()); + } + + @Override + public Iterator find(Node graph, Node subject, Node predicate, Node object) { + if (!isVar(subject) && !isVar(predicate) && !isVar(object) &&!isVar(graph)) { + if (contains(subject, predicate, object, graph)) { + return new SingletonIterator(new Triple(subject, predicate, object)); + } else { + return WrappedIterator.create(Collections.EMPTY_LIST.iterator()); + } + } + StringBuffer findQuery = new StringBuffer("SELECT * WHERE { \n"); + String graphURI = !isVar(graph) ? graph.getURI() : null; + findQuery.append(" GRAPH "); + if (graphURI != null) { + findQuery.append(" <" + graphURI + ">"); + } else { + findQuery.append("?g"); + } + findQuery.append(" { "); + findQuery.append(SparqlGraph.sparqlNode(subject, "?s")) + .append(" ") + .append(SparqlGraph.sparqlNode(predicate, "?p")) + .append(" ") + .append(SparqlGraph.sparqlNode(object, "?o")); + findQuery.append(" } "); + findQuery.append("\n}"); + + //log.info(findQuery.toString()); + + ResultSet rs = null; + + try { + rs = JSONInput.fromJSON(rdfService.sparqlSelectQuery( + findQuery.toString(), RDFService.ResultFormat.JSON)); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + + List quadlist = new ArrayList(); + while (rs.hasNext()) { + QuerySolution soln = rs.nextSolution(); + Quad q = new Quad(isVar(graph) ? soln.get("?g").asNode() : graph, + isVar(subject) ? soln.get("?s").asNode() : subject, + isVar(predicate) ? soln.get("?p").asNode() : predicate, + isVar(object) ? soln.get("?o").asNode() : object); + //log.info(t); + quadlist.add(q); + } + //log.info(triplist.size() + " results"); + return WrappedIterator.create(quadlist.iterator()); } + + @Override + public Iterator findNG(Node arg0, Node arg1, Node arg2, Node arg3) { + // TODO check this + return find(arg0, arg1, arg2, arg3); + } + + @Override + public Context getContext() { + // TODO Auto-generated method stub + return null; + } + + @Override + public Graph getDefaultGraph() { + return new RDFServiceGraph(rdfService); + } + + @Override + public Graph getGraph(Node arg0) { + return new RDFServiceGraph(rdfService, arg0.getURI()); + } + + @Override + public Lock getLock() { + return lock; + } + + @Override + public boolean isEmpty() { + // TODO Auto-generated method stub + return false; + } + + @Override + public Iterator listGraphNodes() { + List graphNodeList = new ArrayList(); + try { + for (String graphURI : rdfService.getGraphURIs()) { + graphNodeList.add(Node.createURI(graphURI)); + } + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + return graphNodeList.iterator(); + } + + @Override + public void removeGraph(Node arg0) { + // TODO Auto-generated method stub + + } + + @Override + public void setDefaultGraph(Graph arg0) { + // TODO Auto-generated method stub + + } + + @Override + public long size() { + // TODO Auto-generated method stub + return 0; + } + + private boolean isVar(Node node) { + return (node == null || node.isVariable() || node == Node.ANY); + } + + + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java new file mode 100644 index 000000000..269737b9d --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraph.java @@ -0,0 +1,471 @@ +package edu.cornell.mannlib.vitro.webapp.dao.jena; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.hp.hpl.jena.graph.BulkUpdateHandler; +import com.hp.hpl.jena.graph.Capabilities; +import com.hp.hpl.jena.graph.Graph; +import com.hp.hpl.jena.graph.GraphEventManager; +import com.hp.hpl.jena.graph.GraphStatisticsHandler; +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Reifier; +import com.hp.hpl.jena.graph.TransactionHandler; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.graph.TripleMatch; +import com.hp.hpl.jena.graph.impl.GraphWithPerform; +import com.hp.hpl.jena.graph.impl.SimpleEventManager; +import com.hp.hpl.jena.graph.query.QueryHandler; +import com.hp.hpl.jena.graph.query.SimpleQueryHandler; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.shared.AddDeniedException; +import com.hp.hpl.jena.shared.DeleteDeniedException; +import com.hp.hpl.jena.shared.PrefixMapping; +import com.hp.hpl.jena.shared.impl.PrefixMappingImpl; +import com.hp.hpl.jena.sparql.resultset.JSONInput; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; +import com.hp.hpl.jena.util.iterator.SingletonIterator; +import com.hp.hpl.jena.util.iterator.WrappedIterator; + +import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; + +public class RDFServiceGraph implements GraphWithPerform { + + private RDFService rdfService; + private String graphURI; + private static final Log log = LogFactory.getLog(SparqlGraph.class); + + private BulkUpdateHandler bulkUpdateHandler; + private PrefixMapping prefixMapping = new PrefixMappingImpl(); + private GraphEventManager eventManager; + private Reifier reifier = new EmptyReifier(this); + private GraphStatisticsHandler graphStatisticsHandler; + private TransactionHandler transactionHandler; + private QueryHandler queryHandler; + + /** + * Returns a SparqlGraph for the union of named graphs in a remote repository + * @param endpointURI + */ + public RDFServiceGraph(RDFService rdfService) { + this(rdfService, null); + } + + /** + * Returns a SparqlGraph for a particular named graph in a remote repository + * @param endpointURI + * @param graphURI + */ + public RDFServiceGraph(RDFService rdfService, String graphURI) { + this.rdfService = rdfService; + this.graphURI = graphURI; + } + + public RDFService getRDFService() { + return this.rdfService; + } + + public String getGraphURI() { + return graphURI; + } + + @Override + public void add(Triple arg0) throws AddDeniedException { + performAdd(arg0); + } + +// public void executeUpdate(String updateString) { +// try { +// RepositoryConnection conn = getConnection(); +// try { +// Update u = conn.prepareUpdate(QueryLanguage.SPARQL, updateString); +// u.execute(); +// } catch (MalformedQueryException e) { +// throw new RuntimeException(e); +// } catch (UpdateExecutionException e) { +// log.error(e,e); +// log.error("Update command: \n" + updateString); +// throw new RuntimeException(e); +// } finally { +// conn.close(); +// } +// } catch (RepositoryException re) { +// throw new RuntimeException(re); +// } +// } + + private String serialize(Triple t) { + StringBuffer sb = new StringBuffer(); + sb.append(sparqlNodeUpdate(t.getSubject(), "")).append(" ") + .append(sparqlNodeUpdate(t.getPredicate(), "")).append(" ") + .append(sparqlNodeUpdate(t.getObject(), "")).append(" ."); + return sb.toString(); + } + + @Override + public void performAdd(Triple t) { + + ChangeSet changeSet = rdfService.manufactureChangeSet(); + try { + changeSet.addAddition(new ByteArrayInputStream( + serialize(t).getBytes()), RDFService.ModelSerializationFormat.N3, graphURI); + rdfService.changeSetUpdate(changeSet); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + + } + + @Override + public void performDelete(Triple t) { + ChangeSet changeSet = rdfService.manufactureChangeSet(); + try { + changeSet.addRemoval(new ByteArrayInputStream( + serialize(t).getBytes()), RDFService.ModelSerializationFormat.N3, graphURI); + rdfService.changeSetUpdate(changeSet); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + } + + public void removeAll() { + // only to be used with a single graph + if (graphURI == null) { + return; + } + String constructStr = "CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <" + graphURI + "> { ?s ?p ?o } }"; + try { + InputStream model = rdfService.sparqlConstructQuery( + constructStr, RDFService.ModelSerializationFormat.N3); + ChangeSet changeSet = rdfService.manufactureChangeSet(); + changeSet.addRemoval(model, RDFService.ModelSerializationFormat.N3, graphURI); + rdfService.changeSetUpdate(changeSet); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + } + + @Override + public void close() { + // can't close a remote endpoint + } + + @Override + public boolean contains(Triple arg0) { + return contains(arg0.getSubject(), arg0.getPredicate(), arg0.getObject()); + } + + @Override + public boolean contains(Node subject, Node predicate, Node object) { + if (subject.isBlank() || predicate.isBlank() || object.isBlank()) { + return false; + } + StringBuffer containsQuery = new StringBuffer("ASK { \n"); + if (graphURI != null) { + containsQuery.append(" GRAPH <" + graphURI + "> { "); + } + containsQuery.append(sparqlNode(subject, "?s")) + .append(" ") + .append(sparqlNode(predicate, "?p")) + .append(" ") + .append(sparqlNode(object, "?o")); + if (graphURI != null) { + containsQuery.append(" } \n"); + } + containsQuery.append("\n}"); + boolean result = execAsk(containsQuery.toString()); + return result; + } + + @Override + public void delete(Triple arg0) throws DeleteDeniedException { + performDelete(arg0); + } + + @Override + public boolean dependsOn(Graph arg0) { + return false; // who knows? + } + + @Override + public ExtendedIterator find(TripleMatch arg0) { + //log.info("find(TripleMatch) " + arg0); + Triple t = arg0.asTriple(); + return find(t.getSubject(), t.getPredicate(), t.getObject()); + } + + public static String sparqlNode(Node node, String varName) { + if (node == null || node.isVariable()) { + return varName; + } else if (node.isBlank()) { + return ""; // or throw exception? + } else if (node.isURI()) { + StringBuffer uriBuff = new StringBuffer(); + return uriBuff.append("<").append(node.getURI()).append(">").toString(); + } else if (node.isLiteral()) { + StringBuffer literalBuff = new StringBuffer(); + literalBuff.append("\""); + pyString(literalBuff, node.getLiteralLexicalForm()); + literalBuff.append("\""); + if (node.getLiteralDatatypeURI() != null) { + literalBuff.append("^^<").append(node.getLiteralDatatypeURI()).append(">"); + } else if (node.getLiteralLanguage() != null && node.getLiteralLanguage() != "") { + literalBuff.append("@").append(node.getLiteralLanguage()); + } + return literalBuff.toString(); + } else { + return varName; + } + } + + public static String sparqlNodeUpdate(Node node, String varName) { + if (node.isBlank()) { + return "_:" + node.getBlankNodeLabel().replaceAll("\\W", ""); + } else { + return sparqlNode(node, varName); + } + } + + public static String sparqlNodeDelete(Node node, String varName) { + if (node.isBlank()) { + return "?" + node.getBlankNodeLabel().replaceAll("\\W", ""); + } else { + return sparqlNode(node, varName); + } + } + + @Override + public ExtendedIterator find(Node subject, Node predicate, Node object) { + if (!isVar(subject) && !isVar(predicate) && !isVar(object)) { + if (contains(subject, predicate, object)) { + return new SingletonIterator(new Triple(subject, predicate, object)); + } else { + return WrappedIterator.create(Collections.EMPTY_LIST.iterator()); + } + } + StringBuffer findQuery = new StringBuffer("SELECT * WHERE { \n"); + if (graphURI != null) { + findQuery.append(" GRAPH <" + graphURI + "> { "); + } + findQuery.append(sparqlNode(subject, "?s")) + .append(" ") + .append(sparqlNode(predicate, "?p")) + .append(" ") + .append(sparqlNode(object, "?o")); + if (graphURI != null) { + findQuery.append(" } "); + } + findQuery.append("\n}"); + + String queryString = findQuery.toString(); + //log.info(queryString); + +// //TODO remove me +// if (queryString.contains("individual/AI") && queryString.contains("label")) { +// throw new RuntimeException("break!"); +// } + + ResultSet rs = execSelect(queryString); + //rs = execSelect(findQuery.toString()); + //rs = execSelect(findQuery.toString()); + + List triplist = new ArrayList(); + while (rs.hasNext()) { + QuerySolution soln = rs.nextSolution(); + Triple t = new Triple(isVar(subject) ? soln.get("?s").asNode() : subject, + isVar(predicate) ? soln.get("?p").asNode() : predicate, + isVar(object) ? soln.get("?o").asNode() : object); + //log.info(t); + triplist.add(t); + } + //log.info(triplist.size() + " results"); + return WrappedIterator.create(triplist.iterator()); + } + + private boolean isVar(Node node) { + return (node == null || node.isVariable() || node == Node.ANY); + } + + @Override + public BulkUpdateHandler getBulkUpdateHandler() { + if (this.bulkUpdateHandler == null) { + this.bulkUpdateHandler = new RDFServiceGraphBulkUpdater(this); + } + return this.bulkUpdateHandler; + } + + @Override + public Capabilities getCapabilities() { + return capabilities; + } + + @Override + public GraphEventManager getEventManager() { + if (eventManager == null) { + eventManager = new SimpleEventManager(this); + } + return eventManager; + } + + @Override + public PrefixMapping getPrefixMapping() { + return prefixMapping; + } + + @Override + public Reifier getReifier() { + //if (reifier == null) { + // reifier = new SimpleReifier(this, ReificationStyle.Standard); + //} + return reifier; + } + + @Override + public GraphStatisticsHandler getStatisticsHandler() { + return null; + } + + @Override + public TransactionHandler getTransactionHandler() { + // TODO Auto-generated method stub + return null; + } + + @Override + public boolean isClosed() { + // TODO Auto-generated method stub + return false; + } + + @Override + public boolean isEmpty() { + return (size() == 0); + } + + @Override + public boolean isIsomorphicWith(Graph arg0) { + log.info("Hey dummy!"); + throw new UnsupportedOperationException("isIsomorphicWith() not supported " + + "by SPARQL graphs"); + } + + @Override + public QueryHandler queryHandler() { + if (queryHandler == null) { + queryHandler = new SimpleQueryHandler(this); + } + return queryHandler; + } + + @Override + public int size() { + int size = find(null, null, null).toList().size(); + return size; + } + + private final static Capabilities capabilities = new Capabilities() { + + public boolean addAllowed() { + return false; + } + + public boolean addAllowed(boolean everyTriple) { + return false; + } + + public boolean canBeEmpty() { + return true; + } + + public boolean deleteAllowed() { + return false; + } + + public boolean deleteAllowed(boolean everyTriple) { + return false; + } + + public boolean findContractSafe() { + return true; + } + + public boolean handlesLiteralTyping() { + return true; + } + + public boolean iteratorRemoveAllowed() { + return false; + } + + public boolean sizeAccurate() { + return true; + } + }; + + private boolean execAsk(String queryStr) { + try { + return rdfService.sparqlAskQuery(queryStr); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + } + + private ResultSet execSelect(String queryStr) { + try { + return JSONInput.fromJSON(rdfService.sparqlSelectQuery( + queryStr, RDFService.ResultFormat.JSON)); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + } + + /* + * + * see http://www.python.org/doc/2.5.2/ref/strings.html + * or see jena's n3 grammar jena/src/com/hp/hpl/jena/n3/n3.g + */ + protected static void pyString(StringBuffer sbuff, String s) + { + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + + // Escape escapes and quotes + if (c == '\\' || c == '"' ) + { + sbuff.append('\\') ; + sbuff.append(c) ; + continue ; + } + + // Whitespace + if (c == '\n'){ sbuff.append("\\n");continue; } + if (c == '\t'){ sbuff.append("\\t");continue; } + if (c == '\r'){ sbuff.append("\\r");continue; } + if (c == '\f'){ sbuff.append("\\f");continue; } + if (c == '\b'){ sbuff.append("\\b");continue; } + if( c == 7 ) { sbuff.append("\\a");continue; } + + // Output as is (subject to UTF-8 encoding on output that is) + sbuff.append(c) ; + +// // Unicode escapes +// // c < 32, c >= 127, not whitespace or other specials +// String hexstr = Integer.toHexString(c).toUpperCase(); +// int pad = 4 - hexstr.length(); +// sbuff.append("\\u"); +// for (; pad > 0; pad--) +// sbuff.append("0"); +// sbuff.append(hexstr); + } + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java new file mode 100644 index 000000000..3d7699121 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java @@ -0,0 +1,211 @@ +package edu.cornell.mannlib.vitro.webapp.dao.jena; + +import java.io.IOException; +import java.io.PipedInputStream; +import java.io.PipedOutputStream; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.hp.hpl.jena.graph.Graph; +import com.hp.hpl.jena.graph.GraphEvents; +import com.hp.hpl.jena.graph.GraphUtil; +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.graph.impl.SimpleBulkUpdateHandler; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.sparql.util.graph.GraphFactory; +import com.hp.hpl.jena.util.iterator.ExtendedIterator; + +import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; + +public class RDFServiceGraphBulkUpdater extends SimpleBulkUpdateHandler { + + private static final Log log = LogFactory.getLog(SparqlGraphBulkUpdater.class); + private RDFServiceGraph graph; + + public RDFServiceGraphBulkUpdater(RDFServiceGraph graph) { + super(graph); + this.graph = graph; + } + + @Override + public void add(Triple[] arg0) { + Graph g = GraphFactory.createPlainGraph(); + for (int i = 0 ; i < arg0.length ; i++) { + g.add(arg0[i]); + } + add(g); + } + + @Override + public void add(List arg0) { + Graph g = GraphFactory.createPlainGraph(); + for (Triple t : arg0) { + g.add(t); + } + add(g); + } + + @Override + public void add(Iterator arg0) { + Graph g = GraphFactory.createPlainGraph(); + while (arg0.hasNext()) { + Triple t = arg0.next(); + g.add(t); + } + add(g); + } + + @Override + public void add(Graph arg0) { + add(arg0, false); + } + + @Override + public void add(Graph g, boolean arg1) { + log.info("adding graph"); + Model[] model = separateStatementsWithBlankNodes(g); + addModel(model[1] /* nonBlankNodeModel */); + // replace following call with different method + addModel(model[0] /*blankNodeModel*/); + } + + /** + * Returns a pair of models. The first contains any statement containing at + * least one blank node. The second contains all remaining statements. + * @param g + * @return + */ + + private Model[] separateStatementsWithBlankNodes(Graph g) { + Model gm = ModelFactory.createModelForGraph(g); + Model blankNodeModel = ModelFactory.createDefaultModel(); + Model nonBlankNodeModel = ModelFactory.createDefaultModel(); + StmtIterator sit = gm.listStatements(); + while (sit.hasNext()) { + Statement stmt = sit.nextStatement(); + if (!stmt.getSubject().isAnon() && !stmt.getObject().isAnon()) { + nonBlankNodeModel.add(stmt); + } else { + blankNodeModel.add(stmt); + } + } + Model[] result = new Model[2]; + result[0] = blankNodeModel; + result[1] = nonBlankNodeModel; + return result; + } + + + @Override + public void delete(Graph g, boolean withReifications) { + delete(g); + } + + @Override + public void delete(Graph g) { + ChangeSet changeSet = graph.getRDFService().manufactureChangeSet(); + Model m = ModelFactory.createModelForGraph(g); + PipedOutputStream out = new PipedOutputStream(); + m.write(out, "N-TRIPLE"); + try { + changeSet.addRemoval(new PipedInputStream( + out), RDFService.ModelSerializationFormat.N3, graph.getGraphURI()); + graph.getRDFService().changeSetUpdate(changeSet); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + } + + public void addModel(Model model) { + ChangeSet changeSet = graph.getRDFService().manufactureChangeSet(); + PipedOutputStream out = new PipedOutputStream(); + model.write(out, "N-TRIPLE"); + try { + changeSet.addAddition(new PipedInputStream( + out), RDFService.ModelSerializationFormat.N3, graph.getGraphURI()); + graph.getRDFService().changeSetUpdate(changeSet); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + } + + public void deleteModel(Model model) { + ChangeSet changeSet = graph.getRDFService().manufactureChangeSet(); + PipedOutputStream out = new PipedOutputStream(); + model.write(out, "N-TRIPLE"); + try { + changeSet.addRemoval(new PipedInputStream( + out), RDFService.ModelSerializationFormat.N3, graph.getGraphURI()); + graph.getRDFService().changeSetUpdate(changeSet); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } catch (RDFServiceException rdfse) { + throw new RuntimeException(rdfse); + } + } + + @Override + public void removeAll() { + removeAll(graph); + notifyRemoveAll(); + } + + protected void notifyRemoveAll() { + manager.notifyEvent(graph, GraphEvents.removeAll); + } + + @Override + public void remove(Node s, Node p, Node o) { + removeAll(graph, s, p, o); + manager.notifyEvent(graph, GraphEvents.remove(s, p, o)); + } + + public static void removeAll(Graph g, Node s, Node p, Node o) + { + ExtendedIterator it = g.find( s, p, o ); + try { + while (it.hasNext()) { + Triple t = it.next(); + g.delete(t); + it.remove(); + } + } + finally { + it.close(); + } + } + + public static void removeAll( Graph g ) + { + ExtendedIterator it = GraphUtil.findAll(g); + try { + while (it.hasNext()) { + Triple t = it.next(); + g.delete(t); + it.remove(); + } + } finally { + it.close(); + } + + // get rid of remaining blank nodes using a SPARQL DELETE + if (g instanceof SparqlGraph) { + ((SparqlGraph) g).removeAll(); + } + + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java index cb78f6b52..689db1ec1 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java @@ -146,7 +146,7 @@ public interface RDFService { * @return ChangeSet an empty ChangeSet object */ public ChangeSet manufactureChangeSet(); - + /** * Free any resources held by this RDFService object */ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sdb/RDFServiceSDB.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sdb/RDFServiceSDB.java index e2476e3f5..18ebc9108 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sdb/RDFServiceSDB.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sdb/RDFServiceSDB.java @@ -12,6 +12,7 @@ import org.apache.commons.dbcp.BasicDataSource; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Dataset; import com.hp.hpl.jena.query.Query; import com.hp.hpl.jena.query.QueryExecution; @@ -23,11 +24,18 @@ import com.hp.hpl.jena.rdf.listeners.StatementListener; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; import com.hp.hpl.jena.rdf.model.Statement; +import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.sdb.SDBFactory; import com.hp.hpl.jena.sdb.StoreDesc; import com.hp.hpl.jena.sdb.sql.SDBConnection; +import com.hp.hpl.jena.update.GraphStore; +import com.hp.hpl.jena.update.GraphStoreFactory; +import com.hp.hpl.jena.update.UpdateAction; +import com.hp.hpl.jena.update.UpdateFactory; +import com.hp.hpl.jena.update.UpdateRequest; import edu.cornell.mannlib.vitro.webapp.dao.jena.DatasetWrapper; +import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlGraph; import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet; import edu.cornell.mannlib.vitro.webapp.rdfservice.ModelChange; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; @@ -93,6 +101,7 @@ public class RDFServiceSDB extends RDFServiceImpl implements RDFService { model.add(parseModel(modelChange)); } else if (modelChange.getOperation() == ModelChange.Operation.REMOVE) { model.remove(parseModel(modelChange)); + removeBlankNodesWithSparqlUpdate(dataset, model, modelChange.getGraphURI()); } else { log.error("unrecognized operation type"); } @@ -112,6 +121,56 @@ public class RDFServiceSDB extends RDFServiceImpl implements RDFService { return true; } + private void removeBlankNodesWithSparqlUpdate(Dataset dataset, Model model, String graphURI) { + Model blankNodeModel = ModelFactory.createDefaultModel(); + StmtIterator stmtIt = model.listStatements(); + while (stmtIt.hasNext()) { + Statement stmt = stmtIt.nextStatement(); + if (stmt.getSubject().isAnon() || stmt.getObject().isAnon()) { + blankNodeModel.add(stmt); + } + } + removeUsingSparqlUpdate(dataset, blankNodeModel, graphURI); + } + + private void removeUsingSparqlUpdate(Dataset dataset, Model model, String graphURI) { + + StringBuffer patternBuff = new StringBuffer(); + StmtIterator stmtIt = model.listStatements(); + while(stmtIt.hasNext()) { + Triple t = stmtIt.next().asTriple(); + patternBuff.append(SparqlGraph.sparqlNodeDelete(t.getSubject(), null)); + patternBuff.append(" "); + patternBuff.append(SparqlGraph.sparqlNodeDelete(t.getPredicate(), null)); + patternBuff.append(" "); + patternBuff.append(SparqlGraph.sparqlNodeDelete(t.getObject(), null)); + patternBuff.append(" .\n"); + } + + StringBuffer queryBuff = new StringBuffer(); + queryBuff.append("DELETE { " + ((graphURI != null) ? "GRAPH <" + graphURI + "> { " : "" ) + " \n"); + queryBuff.append(patternBuff); + if (graphURI != null) { + queryBuff.append(" } \n"); + } + queryBuff.append("} WHERE { \n"); + if (graphURI != null) { + queryBuff.append(" GRAPH <" + graphURI + "> { \n"); + } + queryBuff.append(patternBuff); + if (graphURI != null) { + queryBuff.append(" } \n"); + } + queryBuff.append("} \n"); + + //log.debug(queryBuff.toString()); + + GraphStore graphStore = GraphStoreFactory.create(dataset); + UpdateRequest request = UpdateFactory.create(); + request.add(queryBuff.toString()); + UpdateAction.execute(request, graphStore); + } + private Model parseModel(ModelChange modelChange) { Model model = ModelFactory.createDefaultModel(); model.read(modelChange.getSerializedModel(),