From 0b79dfdf0128a9b2f2996cc6cb5f4218ddbd280b Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Mon, 27 Apr 2015 10:36:50 -0400 Subject: [PATCH] VIVO-1025 Improvements to the Virtuoso driver. --- .../dao/jena/RDFServiceGraphBulkUpdater.java | 4 +- .../impl/sparql/RDFServiceSparql.java | 4 +- .../impl/virtuoso/RDFServiceVirtuoso.java | 113 ++++++++++++++++-- .../sparql/ContentTripleSourceSPARQL.java | 18 ++- .../virtuoso/ContentTripleSourceVirtuoso.java | 85 +++++++++---- 5 files changed, 179 insertions(+), 45 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java index 641805c60..3530fba32 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/RDFServiceGraphBulkUpdater.java @@ -27,6 +27,7 @@ import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; +import edu.cornell.mannlib.vitro.webapp.utils.logging.ToString; public class RDFServiceGraphBulkUpdater implements BulkUpdateHandler { private static final Log log = LogFactory.getLog(RDFServiceGraphBulkUpdater.class); @@ -203,7 +204,8 @@ public class RDFServiceGraphBulkUpdater implements BulkUpdateHandler { private static void removeAll(Graph g, Node s, Node p, Node o) { - log.debug("removeAll: g=" + g + ", s=" + s + ", p=" + p + ", o=" + o); + log.debug("removeAll: g=" + ToString.graphToString(g) + ", s=" + s + + ", p=" + p + ", o=" + o); if (!(g instanceof RDFServiceGraph)) { removeAllTripleByTriple(g, s, p, o); return; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java index 2c6743fed..e17657da6 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java @@ -70,7 +70,7 @@ public class RDFServiceSparql extends RDFServiceImpl implements RDFService { private static final Log log = LogFactory.getLog(RDFServiceImpl.class); protected String readEndpointURI; protected String updateEndpointURI; - private CloseableHttpClient httpClient; + protected CloseableHttpClient httpClient; // the number of triples to be private static final int CHUNK_SIZE = 1000; // added/removed in a single // SPARQL UPDATE @@ -467,7 +467,7 @@ public class RDFServiceSparql extends RDFServiceImpl implements RDFService { try { int statusCode = response.getStatusLine().getStatusCode(); if (statusCode > 399) { - log.error("response " + statusCode + " to update. \n"); + log.error("response " + response.getStatusLine() + " to update. \n"); //log.debug("update string: \n" + updateString); throw new RDFServiceException("Unable to perform SPARQL UPDATE"); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/virtuoso/RDFServiceVirtuoso.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/virtuoso/RDFServiceVirtuoso.java index ddca7a6c4..dca6100da 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/virtuoso/RDFServiceVirtuoso.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/virtuoso/RDFServiceVirtuoso.java @@ -7,41 +7,132 @@ import java.io.IOException; import java.io.InputStream; import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.http.auth.AuthScope; +import org.apache.http.auth.UsernamePasswordCredentials; +import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.client.methods.HttpPost; +import org.apache.http.client.protocol.HttpClientContext; +import org.apache.http.entity.StringEntity; +import org.apache.http.impl.client.BasicCredentialsProvider; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.sparql.RDFServiceSparql; /** - * For now, at least, it is just like an RDFServiceSparql except for: + * For now, at least, it is just like an RDFServiceSparql except: * - * A small change in the syntax of an UPDATE request. + * A username and password are required. These should refer to a Virtuoso user + * that posesses the SPARQL_UPDATE role. + * + * The endpoint URI and the update endpoint URI are derived from the base URI. + * You provide: http://localhost:8890 + * endpoint is: http://localhost:8890/sparql/ + * update is: http://localhost:8890/DAV/home/username/rdf_sink/vitro_update + * + * A change in the syntax of an UPDATE request: "INSERT DATA" becomes "INSERT". + * This fixes a problem with inserting blank nodes. + * + * The HTTP request is equipped with the username and password, to answer a + * challenge for basic authentication. * * Allow for the nonNegativeInteger bug when checking to see whether a graph has * changed. */ public class RDFServiceVirtuoso extends RDFServiceSparql { + private static final Log log = LogFactory.getLog(RDFServiceVirtuoso.class); - public RDFServiceVirtuoso(String readEndpointURI, String updateEndpointURI, - String defaultWriteGraphURI) { - super(readEndpointURI, updateEndpointURI, defaultWriteGraphURI); + private final String username; + private final String password; + + public RDFServiceVirtuoso(String baseURI, String username, String password) { + super(figureReadEndpointUri(baseURI), figureUpdateEndpointUri(baseURI, + username)); + this.username = username; + this.password = password; } - public RDFServiceVirtuoso(String readEndpointURI, String updateEndpointURI) { - super(readEndpointURI, updateEndpointURI); + private static String figureReadEndpointUri(String baseUri) { + return noTrailingSlash(baseUri) + "/sparql/"; } - public RDFServiceVirtuoso(String endpointURI) { - super(endpointURI); + private static String figureUpdateEndpointUri(String baseUri, + String username) { + return noTrailingSlash(baseUri) + "/DAV/home/" + username + + "/rdf_sink/vitro_update"; + } + + private static String noTrailingSlash(String uri) { + return uri.endsWith("/") ? uri.substring(0, uri.length() - 1) : uri; } @Override protected void executeUpdate(String updateString) throws RDFServiceException { - super.executeUpdate(updateString.replace("INSERT DATA", "INSERT")); + updateString = tweakUpdateStringSyntax(updateString); + log.debug("UPDATE STRING: " + updateString); + + try { + CloseableHttpResponse response = httpClient.execute( + createHttpRequest(updateString), createHttpContext()); + try { + int statusCode = response.getStatusLine().getStatusCode(); + if (statusCode > 399) { + log.error("response " + response.getStatusLine() + + " to update. \n"); + + InputStream content = response.getEntity().getContent(); + for (String line : IOUtils.readLines(content)) { + log.error("response-line >>" + line); + } + + throw new RDFServiceException( + "Unable to perform SPARQL UPDATE: status code = " + + statusCode); + } + } finally { + response.close(); + } + } catch (Exception e) { + log.error("Failed to update: " + updateString, e); + throw new RDFServiceException( + "Unable to perform change set update", e); + } + } + + private String tweakUpdateStringSyntax(String updateString) { + if (updateString.startsWith("INSERT DATA")) { + return updateString.replaceFirst("INSERT DATA", "INSERT"); + } + return updateString; + } + + // TODO entity.setContentType("application/sparql-query"); + private HttpPost createHttpRequest(String updateString) { + HttpPost meth = new HttpPost(updateEndpointURI); + meth.addHeader("Content-Type", "application/sparql-query"); + meth.setEntity(new StringEntity(updateString, "UTF-8")); + return meth; } /** - * Virtuoso has a bug, which it shares with TDB: if given a literal of type + * We need an HttpContext that will provide username and password in + * response to a basic authentication challenge. + */ + private HttpClientContext createHttpContext() { + CredentialsProvider provider = new BasicCredentialsProvider(); + provider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials( + username, password)); + + HttpClientContext context = HttpClientContext.create(); + context.setCredentialsProvider(provider); + return context; + } + + /** + * Virtuoso has a bug which it shares with TDB: if given a literal of type * xsd:nonNegativeInteger, it stores a literal of type xsd:integer. * * To determine whether this serialized graph is equivalent to what is diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/sparql/ContentTripleSourceSPARQL.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/sparql/ContentTripleSourceSPARQL.java index 59c6a1e62..cc19fac1e 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/sparql/ContentTripleSourceSPARQL.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/sparql/ContentTripleSourceSPARQL.java @@ -73,22 +73,20 @@ public class ContentTripleSourceSPARQL extends ContentTripleSource { @Override public void startup(Application application, ComponentStartupStatus ss) { - this.rdfServiceFactory = createRDFServiceFactory(createRDFService(ss, - endpointURI, updateEndpointURI)); + this.rdfServiceFactory = createRDFServiceFactory(createRDFService(ss)); this.rdfService = this.rdfServiceFactory.getRDFService(); this.dataset = createDataset(); this.modelMaker = createModelMaker(); } - protected RDFService createRDFService(ComponentStartupStatus ss, - String endpoint, String updateEndpoint) { - if (updateEndpoint == null) { - ss.info("Using endpoint at " + endpoint); - return new RDFServiceSparql(endpoint); + protected RDFService createRDFService(ComponentStartupStatus ss) { + if (updateEndpointURI == null) { + ss.info("Using endpoint at " + endpointURI); + return new RDFServiceSparql(endpointURI); } else { - ss.info("Using read endpoint at " + endpoint - + " and update endpoint at " + updateEndpoint); - return new RDFServiceSparql(endpoint, updateEndpoint); + ss.info("Using read endpoint at " + endpointURI + + " and update endpoint at " + updateEndpointURI); + return new RDFServiceSparql(endpointURI, updateEndpointURI); } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/virtuoso/ContentTripleSourceVirtuoso.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/virtuoso/ContentTripleSourceVirtuoso.java index adbf6d4c0..441279fb3 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/virtuoso/ContentTripleSourceVirtuoso.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/triplesource/impl/virtuoso/ContentTripleSourceVirtuoso.java @@ -7,36 +7,79 @@ import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.virtuoso.RDFServiceVirtuoso; import edu.cornell.mannlib.vitro.webapp.triplesource.impl.sparql.ContentTripleSourceSPARQL; import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation; +import edu.cornell.mannlib.vitro.webapp.utils.logging.ToString; /** * So far, it's just like a ContentTripleSourceSPARQL but it uses an instance of * RDFServiceVirtuoso. */ public class ContentTripleSourceVirtuoso extends ContentTripleSourceSPARQL { + private String baseUri; + private String username; + private String password; - @Override - @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasEndpointURI") - public void setEndpointURI(String eUri) { - super.setEndpointURI(eUri); - } - - @Override - @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasUpdateEndpointURI") - public void setUpdateEndpointURI(String ueUri) { - super.setUpdateEndpointURI(ueUri); - } - - @Override - protected RDFService createRDFService(ComponentStartupStatus ss, - String endpoint, String updateEndpoint) { - if (updateEndpoint == null) { - ss.info("Using endpoint at " + endpoint); - return new RDFServiceVirtuoso(endpoint); + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBaseURI") + public void setBaseUri(String uri) { + if (baseUri == null) { + baseUri = uri; } else { - ss.info("Using read endpoint at " + endpoint - + " and update endpoint at " + updateEndpoint); - return new RDFServiceVirtuoso(endpoint, updateEndpoint); + throw new IllegalStateException( + "Configuration includes multiple instances of BaseURI: " + + baseUri + ", and " + uri); } } + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasUsername") + public void setUsername(String user) { + if (username == null) { + username = user; + } else { + throw new IllegalStateException( + "Configuration includes multiple instances of Username: " + + username + ", and " + user); + } + } + + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasPassword") + public void setPassword(String pass) { + if (password == null) { + password = pass; + } else { + throw new IllegalStateException( + "Configuration includes multiple instances of Password: " + + password + ", and " + pass); + } + } + + @Override + @Validation + public void validate() throws Exception { + if (baseUri == null) { + throw new IllegalStateException( + "Configuration did not include a BaseURI."); + } + if (username == null) { + throw new IllegalStateException( + "Configuration did not include a Username."); + } + if (password == null) { + throw new IllegalStateException( + "Configuration did not include a Password."); + } + } + + @Override + protected RDFService createRDFService(ComponentStartupStatus ss) { + ss.info("Using Virtuoso at " + baseUri + ", authenticating as " + + username); + return new RDFServiceVirtuoso(baseUri, username, password); + } + + @Override + public String toString() { + return "ContentTripleSourceVirtuoso[" + ToString.hashHex(this) + + ", baseUri=" + baseUri + ", username=" + username + "]"; + } + }