From cca747f86fc38f91122c2a738ae885b8c0637bdd Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Mon, 16 Feb 2015 12:21:18 -0500 Subject: [PATCH] VIVO-974 Replace streaming variant of sparqlSelectQuery() with serializeGraph() and serializeAll() --- .../dumprestore/DumpModelsAction.java | 5 +- .../datatools/dumprestore/JsonToNquads.java | 128 ------------------ .../vitro/webapp/rdfservice/RDFService.java | 38 ++++-- .../filter/LanguageFilteringRDFService.java | 28 ++-- .../SameAsFilteringRDFServiceFactory.java | 28 ++-- .../impl/RDFServiceFactorySingle.java | 18 ++- .../rdfservice/impl/jena/RDFServiceJena.java | 84 ++++++------ .../impl/jena/tdb/RDFServiceTDB.java | 33 +++-- .../impl/logging/LoggingRDFService.java | 23 +++- .../impl/sparql/RDFServiceSparql.java | 45 ++++-- .../utils/sparql/ResultSetIterators.java | 97 +++++++++++++ 11 files changed, 271 insertions(+), 256 deletions(-) delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/JsonToNquads.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/utils/sparql/ResultSetIterators.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/DumpModelsAction.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/DumpModelsAction.java index 1787742f5..5e3dd58e7 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/DumpModelsAction.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/DumpModelsAction.java @@ -16,7 +16,6 @@ import org.apache.commons.logging.LogFactory; import edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.BadRequestException; import edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; /** @@ -70,9 +69,7 @@ class DumpModelsAction extends AbstractDumpRestoreAction { */ private void dumpNQuads(RDFService rdfService, String query) throws RDFServiceException, IOException { - JsonToNquads converter = new JsonToNquads(resp.getOutputStream()); - rdfService.sparqlSelectQuery(query, ResultFormat.JSON, converter); - converter.close(); + rdfService.serializeAll(resp.getOutputStream()); } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/JsonToNquads.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/JsonToNquads.java deleted file mode 100644 index acc0d7dd4..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/datatools/dumprestore/JsonToNquads.java +++ /dev/null @@ -1,128 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.io.OutputStreamWriter; -import java.io.StringReader; -import java.io.Writer; - -import javax.json.Json; -import javax.json.JsonObject; -import javax.json.JsonReader; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -/** - * An output stream decorator that converts a stream of - * application/sparql-results+json to a stream of application/n-quads - * - * This could be a lot more efficient. - */ -public class JsonToNquads extends OutputStream { - private static final Log log = LogFactory.getLog(JsonToNquads.class); - - private final Writer writer; - private final ByteArrayOutputStream buffer = new ByteArrayOutputStream(); - private final ByteArrayOutputStream header = new ByteArrayOutputStream(); - - private boolean headerIsComplete; - private long recordCount; - - public JsonToNquads(OutputStream out) throws IOException { - this.writer = new OutputStreamWriter(out, "UTF-8"); - log.info("Dump beginning."); - } - - @Override - public void flush() throws IOException { - writer.flush(); - } - - @Override - public void close() throws IOException { - writer.close(); - log.info("Dump is complete: " + recordCount + " records."); - log.debug("Left over in the buffer: '" + buffer + "'"); - } - - @Override - public void write(int b) throws IOException { - if (!headerIsComplete) { - writeToHeader(b); - } else { - buffer.write(b); - if (bufferHoldsARecord()) { - processRecord(); - buffer.reset(); - } - } - } - - private void writeToHeader(int b) { - header.write((byte) b); - String text = header.toString(); - int bindingsHere = text.indexOf("\"bindings\""); - int lastColonHere = text.lastIndexOf(":"); - int lastOpenBracket = text.lastIndexOf("["); - headerIsComplete = (bindingsHere >= 0) - && (lastColonHere > bindingsHere) - && (lastOpenBracket > lastColonHere); - log.debug("complete=" + headerIsComplete + ", header='" + text + "'"); - } - - private boolean bufferHoldsARecord() throws IOException { - String text = buffer.toString("UTF-8"); - boolean inQuotes = false; - int braceLevel = 0; - char previous = 0; - for (char c : text.toCharArray()) { - if (inQuotes) { - if ((c == '"') && (previous != '\\')) { - inQuotes = false; - } - } else { - if (c == '"') { - inQuotes = true; - } else if (c == '{') { - braceLevel++; - } else if (c == '}') { - braceLevel--; - } - } - previous = c; - } - return (braceLevel == 0) && (text.endsWith(",") || text.endsWith("]")); - } - - private void processRecord() throws IOException { - String text = buffer.toString("UTF-8"); - log.debug("Parsing record: '" + text + "'"); - try (JsonReader jsRead = Json.createReader(new StringReader(text))) { - JsonObject jsRecord = jsRead.readObject(); - DumpNode s = DumpNode.fromJson(jsRecord.getJsonObject("s")); - DumpNode p = DumpNode.fromJson(jsRecord.getJsonObject("p")); - DumpNode o = DumpNode.fromJson(jsRecord.getJsonObject("o")); - DumpNode g = DumpNode.fromJson(jsRecord.getJsonObject("g")); - - if (g == null) { - writer.write(String.format("%s %s %s .\n", s.toNquad(), - p.toNquad(), o.toNquad())); - } else { - writer.write(String.format("%s %s %s %s .\n", s.toNquad(), - p.toNquad(), o.toNquad(), g.toNquad())); - } - - recordCount++; - if (recordCount % 10000 == 0) { - log.info("dumped " + recordCount + " records."); - } - } catch (Exception e) { - log.error("Failed to parse record: '" + text + "'", e); - throw new RuntimeException(e); - } - } -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java index 487fc93ab..2c7ea4425 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/RDFService.java @@ -87,21 +87,6 @@ public interface RDFService { */ public InputStream sparqlDescribeQuery(String query, RDFService.ModelSerializationFormat resultFormat) throws RDFServiceException; - /** - * Performs a SPARQL select query against the knowledge base. The query may have - * an embedded graph identifier. If the query does not contain a graph identifier - * the query is executed against the union of all named and unnamed graphs in the - * store. - * - * Preferred for streaming because it avoids in-memory buffering. - * - * @param query - the SPARQL query to be executed against the RDF store - * @param resultFormat - format for the result of the Select query - * @param outputStream - receives the result of the query - * - */ - public void sparqlSelectQuery(String query, RDFService.ResultFormat resultFormat, OutputStream outputStream) throws RDFServiceException; - /** * Performs a SPARQL select query against the knowledge base. The query may have * an embedded graph identifier. If the query does not contain a graph identifier @@ -151,6 +136,29 @@ public interface RDFService { */ public String getDefaultWriteGraphURI() throws RDFServiceException; + /** + * Serializes the union of all named and unnamed graphs in the store to the + * supplied OutputStream, in N-Quads format. This method is designed for + * exporting data from VIVO, so any filters should be bypassed. If possible, + * this should be done without buffering in memory, so arbitrarily large + * graphs can be exported. + * + * @param outputStream - receives the serialized result. + */ + public void serializeAll(OutputStream outputStream) throws RDFServiceException; + + /** + * Serializes the contents of the named graph to the supplied OutputStream, + * in N-Triples format. This method is designed for exporting data from + * VIVO, so any filters should be bypassed. If possible, this should be + * done without buffering in memory, so arbitrarily large graphs can be + * exported. + * + * @param graphURI - the URI of the desired graph. May not be null. + * @param outputStream - receives the serialized result. + */ + public void serializeGraph(String graphURI, OutputStream outputStream) throws RDFServiceException; + /** * Registers a listener to listen to changes in any graph in * the RDF store. diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java index b769d2e98..11bf8bcf1 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java @@ -4,7 +4,6 @@ package edu.cornell.mannlib.vitro.webapp.rdfservice.filter; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; @@ -13,7 +12,6 @@ import java.util.Comparator; import java.util.Iterator; import java.util.List; -import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -157,20 +155,6 @@ public class LanguageFilteringRDFService implements RDFService { return langStrings.toString(); } - /** - * TODO rewrite the filtering to use this form - avoid one level of - * buffering. - */ - @Override - public void sparqlSelectQuery(String query, ResultFormat resultFormat, - OutputStream outputStream) throws RDFServiceException { - try (InputStream input = sparqlSelectQuery(query, resultFormat)){ - IOUtils.copy(input, outputStream); - } catch (IOException e) { - throw new RDFServiceException(e); - } - } - @Override public InputStream sparqlSelectQuery(String query, ResultFormat resultFormat) throws RDFServiceException { @@ -340,6 +324,18 @@ public class LanguageFilteringRDFService implements RDFService { } @Override + public void serializeAll(OutputStream outputStream) + throws RDFServiceException { + s.serializeAll(outputStream); + } + + @Override + public void serializeGraph(String graphURI, OutputStream outputStream) + throws RDFServiceException { + s.serializeGraph(graphURI, outputStream); + } + + @Override public void registerListener(ChangeListener changeListener) throws RDFServiceException { // TODO Auto-generated method stub diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/SameAsFilteringRDFServiceFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/SameAsFilteringRDFServiceFactory.java index cf0f7c3fd..3af8ebb85 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/SameAsFilteringRDFServiceFactory.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/SameAsFilteringRDFServiceFactory.java @@ -4,14 +4,12 @@ package edu.cornell.mannlib.vitro.webapp.rdfservice.filter; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; -import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.Iterator; import java.util.List; -import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -107,20 +105,6 @@ public class SameAsFilteringRDFServiceFactory implements RDFServiceFactory { return new ByteArrayInputStream(out.toByteArray()); } - /** - * TODO rewrite the filtering to use this form instead - avoid one level of - * buffering. - */ - @Override - public void sparqlSelectQuery(String query, ResultFormat resultFormat, - OutputStream outputStream) throws RDFServiceException { - try (InputStream input = sparqlSelectQuery(query, resultFormat)) { - IOUtils.copy(input, outputStream); - } catch (IOException e) { - throw new RDFServiceException(e); - } - } - @Override public InputStream sparqlSelectQuery(String query, ResultFormat resultFormat) throws RDFServiceException { @@ -256,6 +240,18 @@ public class SameAsFilteringRDFServiceFactory implements RDFServiceFactory { s.getGraphMetadata(); } + @Override + public void serializeAll(OutputStream outputStream) + throws RDFServiceException { + s.serializeAll(outputStream); + } + + @Override + public void serializeGraph(String graphURI, OutputStream outputStream) + throws RDFServiceException { + s.serializeGraph(graphURI, outputStream); + } + @Override public void close() { s.close(); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceFactorySingle.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceFactorySingle.java index a1915a189..9565508fd 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceFactorySingle.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceFactorySingle.java @@ -87,12 +87,6 @@ public class RDFServiceFactorySingle implements RDFServiceFactory { return s.sparqlDescribeQuery(query, resultFormat); } - @Override - public void sparqlSelectQuery(String query, ResultFormat resultFormat, - OutputStream outputStream) throws RDFServiceException { - s.sparqlSelectQuery(query, resultFormat, outputStream); - } - @Override public InputStream sparqlSelectQuery(String query, ResultFormat resultFormat) throws RDFServiceException { @@ -120,6 +114,18 @@ public class RDFServiceFactorySingle implements RDFServiceFactory { } @Override + public void serializeAll(OutputStream outputStream) + throws RDFServiceException { + s.serializeAll(outputStream); + } + + @Override + public void serializeGraph(String graphURI, OutputStream outputStream) + throws RDFServiceException { + s.serializeGraph(graphURI, outputStream); + } + + @Override public void registerListener(ChangeListener changeListener) throws RDFServiceException { s.registerListener(changeListener); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/RDFServiceJena.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/RDFServiceJena.java index 76e057489..5c1e46d30 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/RDFServiceJena.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/RDFServiceJena.java @@ -15,6 +15,7 @@ import java.util.concurrent.ConcurrentLinkedQueue; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.jena.riot.RDFDataMgr; import org.apache.log4j.lf5.util.StreamUtils; import com.hp.hpl.jena.graph.Triple; @@ -35,6 +36,7 @@ import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.sdb.SDB; import com.hp.hpl.jena.shared.Lock; +import com.hp.hpl.jena.sparql.core.Quad; import edu.cornell.mannlib.vitro.webapp.dao.jena.DatasetWrapper; import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlGraph; @@ -44,6 +46,8 @@ import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceImpl; import edu.cornell.mannlib.vitro.webapp.utils.logging.ToString; +import edu.cornell.mannlib.vitro.webapp.utils.sparql.ResultSetIterators.ResultSetQuadsIterator; +import edu.cornell.mannlib.vitro.webapp.utils.sparql.ResultSetIterators.ResultSetTriplesIterator; public abstract class RDFServiceJena extends RDFServiceImpl implements RDFService { @@ -430,45 +434,6 @@ public abstract class RDFServiceJena extends RDFServiceImpl implements RDFServic return getRDFResultStream(query, DESCRIBE, resultFormat); } - @Override - public void sparqlSelectQuery(String query, ResultFormat resultFormat, - OutputStream outputStream) throws RDFServiceException { - DatasetWrapper dw = getDatasetWrapper(); - try { - Dataset d = dw.getDataset(); - Query q = createQuery(query); - QueryExecution qe = createQueryExecution(query, q, d); - // These properties only help for SDB, but shouldn't hurt for TDB. - qe.getContext().set(SDB.jdbcFetchSize, Integer.MIN_VALUE); - qe.getContext().set(SDB.jdbcStream, true); - qe.getContext().set(SDB.streamGraphAPI, true); - try { - ResultSet resultSet = qe.execSelect(); - switch (resultFormat) { - case CSV: - ResultSetFormatter.outputAsCSV(outputStream, resultSet); - break; - case TEXT: - ResultSetFormatter.out(outputStream, resultSet); - break; - case JSON: - ResultSetFormatter.outputAsJSON(outputStream, resultSet); - break; - case XML: - ResultSetFormatter.outputAsXML(outputStream, resultSet); - break; - default: - throw new RDFServiceException("unrecognized result format"); - } - } finally { - qe.close(); - } - } finally { - dw.close(); - } - - } - /** * TODO Is there a way to accomplish this without buffering the entire result? */ @@ -548,6 +513,47 @@ public abstract class RDFServiceJena extends RDFServiceImpl implements RDFServic } @Override + public void serializeAll(OutputStream outputStream) + throws RDFServiceException { + String query = "SELECT * WHERE { GRAPH ?g {?s ?p ?o}}"; + serialize(outputStream, query); + } + + @Override + public void serializeGraph(String graphURI, OutputStream outputStream) + throws RDFServiceException { + String query = "SELECT * WHERE { GRAPH <" + graphURI + "> {?s ?p ?o}}"; + serialize(outputStream, query); + } + + private void serialize(OutputStream outputStream, String query) { + DatasetWrapper dw = getDatasetWrapper(); + try { + Dataset d = dw.getDataset(); + Query q = createQuery(query); + QueryExecution qe = createQueryExecution(query, q, d); + // These properties only help for SDB, but shouldn't hurt for TDB. + qe.getContext().set(SDB.jdbcFetchSize, Integer.MIN_VALUE); + qe.getContext().set(SDB.jdbcStream, true); + qe.getContext().set(SDB.streamGraphAPI, true); + try { + ResultSet resultSet = qe.execSelect(); + if (resultSet.getResultVars().contains("g")) { + Iterator quads = new ResultSetQuadsIterator(resultSet); + RDFDataMgr.writeQuads(outputStream, quads); + } else { + Iterator triples = new ResultSetTriplesIterator(resultSet); + RDFDataMgr.writeTriples(outputStream, triples); + } + } finally { + qe.close(); + } + } finally { + dw.close(); + } + } + + @Override public void close() { // nothing } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/tdb/RDFServiceTDB.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/tdb/RDFServiceTDB.java index 519bd9e02..307ff438c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/tdb/RDFServiceTDB.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/jena/tdb/RDFServiceTDB.java @@ -124,17 +124,6 @@ public class RDFServiceTDB extends RDFServiceJena { } } - @Override - public void sparqlSelectQuery(String query, ResultFormat resultFormat, - OutputStream outputStream) throws RDFServiceException { - dataset.getLock().enterCriticalSection(Lock.READ); - try { - super.sparqlSelectQuery(query, resultFormat, outputStream); - } finally { - dataset.getLock().leaveCriticalSection(); - } - } - @Override public InputStream sparqlSelectQuery(String query, ResultFormat resultFormat) throws RDFServiceException { @@ -166,6 +155,28 @@ public class RDFServiceTDB extends RDFServiceJena { } } + @Override + public void serializeAll(OutputStream outputStream) + throws RDFServiceException { + dataset.getLock().enterCriticalSection(Lock.READ); + try { + super.serializeAll(outputStream); + } finally { + dataset.getLock().leaveCriticalSection(); + } + } + + @Override + public void serializeGraph(String graphURI, OutputStream outputStream) + throws RDFServiceException { + dataset.getLock().enterCriticalSection(Lock.READ); + try { + super.serializeGraph(graphURI, outputStream); + } finally { + dataset.getLock().leaveCriticalSection(); + } + } + @Override public String toString() { return "RDFServiceTDB[" + ToString.hashHex(this) + "]"; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/logging/LoggingRDFService.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/logging/LoggingRDFService.java index 99d50b095..38f2a2034 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/logging/LoggingRDFService.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/logging/LoggingRDFService.java @@ -52,13 +52,6 @@ public class LoggingRDFService implements RDFService { } } - @Override - public void sparqlSelectQuery(String query, ResultFormat resultFormat, - OutputStream outputStream) throws RDFServiceException { - innerService.sparqlSelectQuery(query, resultFormat, - outputStream); - } - @Override public InputStream sparqlSelectQuery(String query, ResultFormat resultFormat) throws RDFServiceException { @@ -74,6 +67,22 @@ public class LoggingRDFService implements RDFService { } } + @Override + public void serializeAll(OutputStream outputStream) + throws RDFServiceException { + try (RDFServiceLogger l = new RDFServiceLogger()) { + innerService.serializeAll(outputStream); + } + } + + @Override + public void serializeGraph(String graphURI, OutputStream outputStream) + throws RDFServiceException { + try (RDFServiceLogger l = new RDFServiceLogger(graphURI)) { + innerService.serializeGraph(graphURI, outputStream); + } + } + // ---------------------------------------------------------------------- // Untimed methods // ---------------------------------------------------------------------- diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java index 1ee3c309d..937438dfb 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/sparql/RDFServiceSparql.java @@ -27,6 +27,7 @@ import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.message.BasicNameValuePair; +import org.apache.jena.riot.RDFDataMgr; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.query.Query; @@ -43,6 +44,7 @@ import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.Statement; import com.hp.hpl.jena.rdf.model.StmtIterator; +import com.hp.hpl.jena.sparql.core.Quad; import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlGraph; import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeListener; @@ -53,6 +55,8 @@ import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.ChangeSetImpl; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceImpl; import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.ListeningGraph; +import edu.cornell.mannlib.vitro.webapp.utils.sparql.ResultSetIterators.ResultSetQuadsIterator; +import edu.cornell.mannlib.vitro.webapp.utils.sparql.ResultSetIterators.ResultSetTriplesIterator; /* * API to write, read, and update Vitro's RDF store, with support @@ -263,20 +267,6 @@ public class RDFServiceSparql extends RDFServiceImpl implements RDFService { return result; } - /** - * TODO rewrite the query to use this form instead - avoid one level of - * buffering. - */ - @Override - public void sparqlSelectQuery(String query, ResultFormat resultFormat, - OutputStream outputStream) throws RDFServiceException { - try (InputStream input = sparqlSelectQuery(query, resultFormat)) { - IOUtils.copy(input, outputStream); - } catch (IOException e) { - throw new RDFServiceException(e); - } - } - /** * Performs a SPARQL select query against the knowledge base. The query may have * an embedded graph identifier. @@ -826,4 +816,31 @@ public class RDFServiceSparql extends RDFServiceImpl implements RDFService { getSerializationFormatString(modelChange.getSerializationFormat())); return model; } + + @Override + public void serializeAll(OutputStream outputStream) + throws RDFServiceException { + String query = "SELECT * WHERE { GRAPH ?g {?s ?p ?o}}"; + serialize(outputStream, query); + } + + @Override + public void serializeGraph(String graphURI, OutputStream outputStream) + throws RDFServiceException { + String query = "SELECT * WHERE { GRAPH <" + graphURI + "> {?s ?p ?o}}"; + serialize(outputStream, query); + } + + private void serialize(OutputStream outputStream, String query) throws RDFServiceException { + InputStream resultStream = sparqlSelectQuery(query, RDFService.ResultFormat.JSON); + ResultSet resultSet = ResultSetFactory.fromJSON(resultStream); + if (resultSet.getResultVars().contains("g")) { + Iterator quads = new ResultSetQuadsIterator(resultSet); + RDFDataMgr.writeQuads(outputStream, quads); + } else { + Iterator triples = new ResultSetTriplesIterator(resultSet); + RDFDataMgr.writeTriples(outputStream, triples); + } + } + } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/sparql/ResultSetIterators.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/sparql/ResultSetIterators.java new file mode 100644 index 000000000..1f935a83a --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/sparql/ResultSetIterators.java @@ -0,0 +1,97 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.utils.sparql; + +import java.util.Iterator; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.graph.NodeFactory; +import com.hp.hpl.jena.graph.Triple; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.sparql.core.Quad; + +/** + * Helper classes for producing N-Triples or N-Quads from ResultSets + */ +public class ResultSetIterators { + /** + * If the ResultSet contains appropriate values for g, s, p, and o, return a + * Quad for each row. + */ + public static class ResultSetQuadsIterator implements Iterator { + private final ResultSet resultSet; + + public ResultSetQuadsIterator(ResultSet resultSet) { + this.resultSet = resultSet; + } + + @Override + public boolean hasNext() { + return resultSet.hasNext(); + } + + @Override + public Quad next() { + QuerySolution s = resultSet.next(); + return new Quad(NodeConverter.toNode(s.get("g")), + NodeConverter.toNode(s.get("s")), NodeConverter.toNode(s + .get("p")), NodeConverter.toNode(s.get("o"))); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + } + + /** + * If the ResultSet contains appropriate values for s, p, and o, return a + * Triple for each row. + */ + public static class ResultSetTriplesIterator implements Iterator { + private final ResultSet resultSet; + + public ResultSetTriplesIterator(ResultSet resultSet) { + this.resultSet = resultSet; + } + + @Override + public boolean hasNext() { + return resultSet.hasNext(); + } + + @Override + public Triple next() { + QuerySolution s = resultSet.next(); + return new Triple(NodeConverter.toNode(s.get("s")), + NodeConverter.toNode(s.get("p")), NodeConverter.toNode(s + .get("o"))); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private static class NodeConverter { + public static Node toNode(RDFNode rdfNode) { + if (rdfNode.isAnon()) { + Resource a = rdfNode.asResource(); + return NodeFactory.createAnon(a.getId()); + } + if (rdfNode.isLiteral()) { + Literal l = rdfNode.asLiteral(); + return NodeFactory.createLiteral(l.getLexicalForm(), + l.getLanguage(), l.getDatatype()); + } + return NodeFactory.createURI(rdfNode.asResource().getURI()); + } + } + +}