From 6416e51bdbbc6c03eb858b3e26885b2cbe8d1023 Mon Sep 17 00:00:00 2001 From: brianjlowe Date: Wed, 25 Apr 2012 20:40:57 +0000 Subject: [PATCH] improvements to SparqlGraph getting closer to proper behavior of blank nodes --- .../vitro/webapp/dao/jena/SparqlGraph.java | 94 +++++++++------- .../dao/jena/SparqlGraphBulkUpdater.java | 101 +++++++++++++++++- .../webapp/servlet/setup/FileGraphSetup.java | 12 --- 3 files changed, 154 insertions(+), 53 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraph.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraph.java index 86f6ea4f1..95e0979f4 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraph.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraph.java @@ -79,7 +79,15 @@ public class SparqlGraph implements GraphWithPerform { this.repository = new HTTPRepository(endpointURI); } - private RepositoryConnection getConnection() { + public String getEndpointURI() { + return endpointURI; + } + + public String getGraphURI() { + return graphURI; + } + + public RepositoryConnection getConnection() { try { return this.repository.getConnection(); } catch (RepositoryException e) { @@ -92,63 +100,63 @@ public class SparqlGraph implements GraphWithPerform { performAdd(arg0); } + public void executeUpdate(String updateString) { + try { + RepositoryConnection conn = getConnection(); + try { + Update u = conn.prepareUpdate(QueryLanguage.SPARQL, updateString); + u.execute(); + } catch (MalformedQueryException e) { + throw new RuntimeException(e); + } catch (UpdateExecutionException e) { + throw new RuntimeException(e); + } finally { + conn.close(); + } + } catch (RepositoryException re) { + throw new RuntimeException(re); + } + } + @Override public void performAdd(Triple t) { //log.info("adding " + t); String updateString = "INSERT DATA { " + ((graphURI != null) ? "GRAPH <" + graphURI + "> { " : "" ) - + sparqlNode(t.getSubject(), "") + " " - + sparqlNode(t.getPredicate(), "") + " " - + sparqlNode(t.getObject(), "") + " } " + + sparqlNodeUpdate(t.getSubject(), "") + " " + + sparqlNodeUpdate(t.getPredicate(), "") + " " + + sparqlNodeUpdate(t.getObject(), "") + " } " + ((graphURI != null) ? " } " : ""); //log.info(updateString); - try { - RepositoryConnection conn = getConnection(); - try { - Update u = conn.prepareUpdate(QueryLanguage.SPARQL, updateString); - u.execute(); - } catch (MalformedQueryException e) { - throw new RuntimeException(e); - } catch (UpdateExecutionException e) { - throw new RuntimeException(e); - } finally { - conn.close(); - } - } catch (RepositoryException re) { - throw new RuntimeException(re); - } - + executeUpdate(updateString); + } @Override public void performDelete(Triple t) { String updateString = "DELETE DATA { " + ((graphURI != null) ? "GRAPH <" + graphURI + "> { " : "" ) - + sparqlNode(t.getSubject(), "") + " " - + sparqlNode(t.getPredicate(), "") + " " - + sparqlNode(t.getObject(), "") + " } " + + sparqlNodeUpdate(t.getSubject(), "") + " " + + sparqlNodeUpdate(t.getPredicate(), "") + " " + + sparqlNodeUpdate(t.getObject(), "") + " } " + ((graphURI != null) ? " } " : ""); //log.info(updateString); - try { - RepositoryConnection conn = getConnection(); - try { - Update u = conn.prepareUpdate(QueryLanguage.SPARQL, updateString); - u.execute(); - } catch (MalformedQueryException e) { - throw new RuntimeException(e); - } catch (UpdateExecutionException e) { - throw new RuntimeException(e); - } finally { - conn.close(); - } - } catch (RepositoryException re) { - throw new RuntimeException(re); - } + executeUpdate(updateString); + } + + public void removeAll() { + // now we flush out any remaining blank nodes + String updateString = "DELETE { ?s ?p ?o } WHERE { \n" + + ((getGraphURI() != null) ? ("GRAPH <" + getGraphURI() + "> { \n") : ("")) + + " ?s ?p ?o \n" + + ((getGraphURI() != null) ? "} \n" : "") + + "}"; + executeUpdate(updateString); } @Override @@ -205,7 +213,7 @@ public class SparqlGraph implements GraphWithPerform { if (node == null || node.isVariable()) { return varName; } else if (node.isBlank()) { - return "<" + "fake:blank" + ">"; // or throw exception? + return ""; // or throw exception? } else if (node.isURI()) { StringBuffer uriBuff = new StringBuffer(); return uriBuff.append("<").append(node.getURI()).append(">").toString(); @@ -225,6 +233,14 @@ public class SparqlGraph implements GraphWithPerform { } } + public static String sparqlNodeUpdate(Node node, String varName) { + if (node.isBlank()) { + return "_:" + node.getBlankNodeLabel().replaceAll("\\W", ""); + } else { + return sparqlNode(node, varName); + } + } + @Override public ExtendedIterator find(Node subject, Node predicate, Node object) { if (!isVar(subject) && !isVar(predicate) && !isVar(object)) { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraphBulkUpdater.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraphBulkUpdater.java index 7021c0f5e..9561be18c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraphBulkUpdater.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/SparqlGraphBulkUpdater.java @@ -1,18 +1,105 @@ package edu.cornell.mannlib.vitro.webapp.dao.jena; +import java.io.StringWriter; +import java.util.Iterator; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.openrdf.query.MalformedQueryException; +import org.openrdf.query.QueryLanguage; +import org.openrdf.query.Update; +import org.openrdf.query.UpdateExecutionException; +import org.openrdf.repository.RepositoryConnection; +import org.openrdf.repository.RepositoryException; + import com.hp.hpl.jena.graph.Graph; import com.hp.hpl.jena.graph.GraphEvents; import com.hp.hpl.jena.graph.GraphUtil; import com.hp.hpl.jena.graph.Node; import com.hp.hpl.jena.graph.Triple; -import com.hp.hpl.jena.graph.impl.GraphWithPerform; import com.hp.hpl.jena.graph.impl.SimpleBulkUpdateHandler; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; +import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.sparql.util.graph.GraphFactory; import com.hp.hpl.jena.util.iterator.ExtendedIterator; public class SparqlGraphBulkUpdater extends SimpleBulkUpdateHandler { - public SparqlGraphBulkUpdater(GraphWithPerform graph) { + private static final Log log = LogFactory.getLog(SparqlGraphBulkUpdater.class); + private SparqlGraph graph; + + public SparqlGraphBulkUpdater(SparqlGraph graph) { super(graph); + this.graph = graph; + } + + @Override + public void add(Triple[] arg0) { + Graph g = GraphFactory.createPlainGraph(); + for (int i = 0 ; i < arg0.length ; i++) { + g.add(arg0[i]); + } + add(g); + } + + @Override + public void add(List arg0) { + Graph g = GraphFactory.createPlainGraph(); + for (Triple t : arg0) { + g.add(t); + } + add(g); + } + + @Override + public void add(Iterator arg0) { + Graph g = GraphFactory.createPlainGraph(); + while (arg0.hasNext()) { + Triple t = arg0.next(); + g.add(t); + } + add(g); + } + + @Override + public void add(Graph arg0) { + add(arg0, false); + } + + @Override + public void add(Graph g, boolean arg1) { + Model gm = ModelFactory.createModelForGraph(g); + Model m = ModelFactory.createDefaultModel(); + int testLimit = 1000; + StmtIterator stmtIt = gm.listStatements(); + int count = 0; + try { + while (stmtIt.hasNext()) { + count++; + m.add(stmtIt.nextStatement()); + if (count % testLimit == 0 || !stmtIt.hasNext()) { + StringWriter sw = new StringWriter(); + m.write(sw, "N-TRIPLE"); + StringBuffer updateStringBuff = new StringBuffer(); + String graphURI = graph.getGraphURI(); + updateStringBuff.append("INSERT DATA { " + ((graphURI != null) ? "GRAPH <" + graphURI + "> { " : "" )); + updateStringBuff.append(sw); + updateStringBuff.append(((graphURI != null) ? " } " : "") + " }"); + + String updateString = updateStringBuff.toString(); + + //log.info(updateString); + + graph.executeUpdate(updateString); + + m.removeAll(); + } + } + } finally { + stmtIt.close(); + } } @Override @@ -33,6 +120,10 @@ public class SparqlGraphBulkUpdater extends SimpleBulkUpdateHandler { public static void removeAll(Graph g, Node s, Node p, Node o) { + // OK, so the strategy here should be to remove all triples without blank nodes first + // Then, feel the entire remaining part of the graph as a DELETE WHERE + // with the blank nodes as variables? + ExtendedIterator it = g.find( s, p, o ); try { while (it.hasNext()) { @@ -58,6 +149,12 @@ public class SparqlGraphBulkUpdater extends SimpleBulkUpdateHandler { } finally { it.close(); } + + // get rid of remaining blank nodes using a SPARQL DELETE + if (g instanceof SparqlGraph) { + ((SparqlGraph) g).removeAll(); + } + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/FileGraphSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/FileGraphSetup.java index 1dd489931..49f895e01 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/FileGraphSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/servlet/setup/FileGraphSetup.java @@ -178,20 +178,8 @@ public class FileGraphSetup implements ServletContextListener { dbModel.add(fileModel); modelChanged = true; } else if (!isIsomorphic) { - System.out.println("=================================================="); - System.out.println("Remove the following print statement from FileGraphSetup.java"); - System.out.println("Updating " + path + " because graphs are not isomorphic"); log.info("Updating " + path + " because graphs are not isomorphic"); log.info("dbModel: " + dbModel.size() + " ; fileModel: " + fileModel.size()); - System.out.println("--------------------"); - System.out.println("fileModel - dbModel:"); - System.out.println("--------------------"); - fileModel.difference(dbModel).write(System.out); - System.out.println("--------------------"); - System.out.println("dbModel - fileModel:"); - System.out.println("--------------------"); - dbModel.difference(fileModel).write(System.out); - dbModel.removeAll(); dbModel.add(fileModel); modelChanged = true;