VIVO-719 Create the dump/restore tool - no restore yet.

This commit is contained in:
Jim Blake 2014-06-05 15:58:06 -04:00
parent 2ea6a5d8cb
commit a7915785f4
10 changed files with 709 additions and 0 deletions

View file

@ -0,0 +1,46 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore;
import java.util.EnumSet;
import javax.servlet.http.HttpServletRequest;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.BadRequestException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils.WhichService;
/**
* Some utility methods that are common to the Action classes.
*/
abstract class AbstractDumpRestoreAction {
protected final HttpServletRequest req;
public AbstractDumpRestoreAction(HttpServletRequest req) {
this.req = req;
}
protected RDFService getRdfService(WhichService which) {
return RDFServiceUtils.getRDFService(new VitroRequest(req), which);
}
protected <T extends Enum<T>> T getEnumFromParameter(Class<T> enumClass,
String key) throws BadRequestException {
String valueString = req.getParameter(key);
if (valueString == null) {
throw new BadRequestException("Request has no '" + key
+ "' parameter. ");
}
try {
return Enum.valueOf(enumClass, valueString);
} catch (Exception e) {
throw new BadRequestException("Request has invalid '" + key
+ "' parameter: '" + valueString
+ "'; acceptable values are " + EnumSet.allOf(enumClass));
}
}
}

View file

@ -0,0 +1,85 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore;
import static edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.ACTION_DUMP;
import static edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.PARAMETER_FORMAT;
import static edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.PARAMETER_WHICH;
import java.io.IOException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.BadRequestException;
import edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.DumpFormat;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils.WhichService;
/**
* The user has requested a dump.
*
* First time through, figure out what filename we would like to put on the
* dump, and send a redirect.
*
* Second time thorugh, actually create the dump.
*/
class DumpModelsAction extends AbstractDumpRestoreAction {
private static final Log log = LogFactory.getLog(DumpModelsAction.class);
private final HttpServletResponse resp;
private final DumpFormat format;
private final WhichService which;
private final String queryString;
DumpModelsAction(HttpServletRequest req, HttpServletResponse resp)
throws BadRequestException {
super(req);
this.resp = resp;
this.format = getEnumFromParameter(DumpFormat.class, PARAMETER_FORMAT);
this.which = getEnumFromParameter(WhichService.class, PARAMETER_WHICH);
this.queryString = req.getQueryString();
}
void redirectToFilename() throws IOException {
String filename = which + "." + format.getExtension();
String urlPath = req.getContextPath() + req.getServletPath()
+ ACTION_DUMP;
resp.sendRedirect(urlPath + "/" + filename + "?" + queryString);
}
void dumpModels() {
try {
RDFService rdfService = getRdfService(which);
String query = "SELECT * WHERE { GRAPH ?g {?s ?p ?o}}";
resp.setContentType(format.getMimeType());
if (format == DumpFormat.NQUADS) {
dumpNQuads(rdfService, query);
} else {
rdfService.sparqlSelectQuery(query,
format.getRdfServiceFormat(), resp.getOutputStream());
}
} catch (Throwable t) {
log.error("Failed to dump " + which + " models as " + format + ".",
t);
}
}
/**
* The RDF service won't produce NQuads, so we get JSON and parse it.
*/
private void dumpNQuads(RDFService rdfService, String query)
throws RDFServiceException, IOException {
JsonToNquads converter = new JsonToNquads(resp.getOutputStream());
rdfService.sparqlSelectQuery(query, ResultFormat.JSON, converter);
converter.close();
}
}

View file

@ -0,0 +1,116 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore;
import javax.json.JsonObject;
import javax.json.JsonString;
import org.apache.jena.riot.out.EscapeStr;
/**
* A representation of an RDF Node, read from one format of a result set, and
* able to write to a different format.
*/
public abstract class DumpNode {
public static DumpNode fromJson(JsonObject json) throws BadNodeException {
if (json == null) {
return null;
}
String type = getString(json, "type");
switch (type) {
case "uri":
return new DumpUriNode(getString(json, "value"));
case "literal":
case "typed-literal": // this isn't part of the spec, but Jena uses it.
return new DumpLiteralNode(getString(json, "value"), getString(
json, "xml:lang"), getString(json, "datatype"));
case "bnode":
return new DumpBlankNode(getString(json, "value"));
default:
throw new BadNodeException("Unrecognized type: '" + type + "'");
}
}
private static String getString(JsonObject json, String name) {
JsonString jsString = json.getJsonString(name);
return (jsString == null) ? null : json.getString(name);
}
public abstract String toNquad();
public static class DumpUriNode extends DumpNode {
private final String uri;
public DumpUriNode(String uri) throws BadNodeException {
if (uri == null) {
throw new BadNodeException("uri may not be null.");
}
this.uri = uri;
}
@Override
public String toNquad() {
return "<" + EscapeStr.stringEsc(uri) + ">";
}
}
public static class DumpLiteralNode extends DumpNode {
private final String value;
private final String language;
private final String datatype;
public DumpLiteralNode(String value, String language, String datatype)
throws BadNodeException {
if (value == null) {
throw new BadNodeException("value may not be null.");
}
if (language != null && datatype != null) {
throw new BadNodeException("either language('" + language
+ "') or datatype('" + datatype + "') must be null.");
}
this.value = value;
this.language = language;
this.datatype = datatype;
}
@Override
public String toNquad() {
String valueString = "\"" + EscapeStr.stringEsc(value) + "\"";
if (language != null) {
return valueString + "@" + language;
} else if (datatype != null) {
return valueString + "^^<" + EscapeStr.stringEsc(datatype)
+ ">";
} else {
return valueString;
}
}
}
public static class DumpBlankNode extends DumpNode {
private final String label;
public DumpBlankNode(String label) throws BadNodeException {
if (label == null) {
throw new BadNodeException("label may not be null.");
}
this.label = label;
}
@Override
public String toNquad() {
return "_:" + label;
}
}
public static class BadNodeException extends Exception {
public BadNodeException(String message) {
super(message);
}
public BadNodeException(String message, Throwable cause) {
super(message, cause);
}
}
}

View file

@ -0,0 +1,170 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils;
import edu.cornell.mannlib.vitro.webapp.auth.permissions.SimplePermission;
import edu.cornell.mannlib.vitro.webapp.auth.policy.PolicyHelper;
import edu.cornell.mannlib.vitro.webapp.auth.requestedAction.RequestedAction;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.FreemarkerHttpServlet;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
/**
* Allow the user to dump the knowledge base from either RDFService, or restore
* it.
*
* Show the user the selection page. If they select "dump" parameters, redirect
* to an appropriate filename-based URL, so they will receive a nicely named
* file. If they chose to "restore", just do it.
*
* The first request, the selection and the redirected dump should all be GET
* requests. A restore should be a POST request.
*/
public class DumpRestoreController extends FreemarkerHttpServlet {
private static final RequestedAction REQUIRED_ACTION = SimplePermission.USE_ADVANCED_DATA_TOOLS_PAGES.ACTION;
static final String ACTION_DUMP = "/dump";
static final String ACTION_RESTORE = "/restore";
static final String ACTION_SELECT = "/select";
static final String PARAMETER_WHICH = "which";
static final String PARAMETER_FORMAT = "format";
static final String PARAMETER_SOURCE_FILE = "sourceFile";
static final String ATTRIBUTE_TRIPLE_COUNT = "tripleCount";
private static final String TEMPLATE_NAME = "datatools-dumpRestore.ftl";
/**
* Override this to change the maximum size of uploaded files in multipart
* requests.
*/
@Override
public long maximumMultipartFileSize() {
return 100L * 1024L * 1024L * 1024L; // allow really big uploads.
}
@Override
public void doGet(HttpServletRequest req, HttpServletResponse resp)
throws IOException, ServletException {
if (!isAuthorizedToDisplayPage(req, resp, REQUIRED_ACTION)) {
return;
}
try {
String action = req.getPathInfo();
if (ACTION_SELECT.equals(action)) {
new DumpModelsAction(req, resp).redirectToFilename();
} else if (StringUtils.startsWith(action, ACTION_DUMP)) {
new DumpModelsAction(req, resp).dumpModels();
} else {
super.doGet(req, resp);
}
} catch (BadRequestException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public void doPost(HttpServletRequest req, HttpServletResponse resp)
throws IOException, ServletException {
if (!PolicyHelper.isAuthorizedForActions(req, REQUIRED_ACTION)) {
resp.sendError(HttpServletResponse.SC_FORBIDDEN);
}
try {
if (ACTION_RESTORE.equals(req.getPathInfo())) {
long tripleCount = new RestoreModelsAction(req, resp)
.restoreModels();
req.setAttribute(ATTRIBUTE_TRIPLE_COUNT, tripleCount);
super.doGet(req, resp);
} else {
resp.sendError(HttpServletResponse.SC_METHOD_NOT_ALLOWED);
}
} catch (BadRequestException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
protected ResponseValues processRequest(VitroRequest vreq) throws Exception {
Map<String, Object> bodyMap = new HashMap<>();
bodyMap.put("selectUrl",
UrlBuilder.getUrl(vreq.getServletPath() + ACTION_SELECT));
bodyMap.put("restoreUrl",
UrlBuilder.getUrl(vreq.getServletPath() + ACTION_RESTORE));
Object tripleCount = vreq.getAttribute(ATTRIBUTE_TRIPLE_COUNT);
if (tripleCount instanceof Long) {
bodyMap.put("tripleCount", tripleCount);
}
return new TemplateResponseValues(TEMPLATE_NAME, bodyMap);
}
/**
* Indicates a problem with the request parameters.
*/
static class BadRequestException extends Exception {
public BadRequestException(String message) {
super(message);
}
}
/**
* The formats that we will accept on a dump request.
*/
enum DumpFormat {
NQUADS("application/n-quads", "nq", null),
JSON("application/sparql-results+json", "srj", ResultFormat.JSON),
XML("application/sparql-results+xml", "srx", ResultFormat.XML);
private final String mimeType;
private final String extension;
private final ResultFormat rdfServiceFormat;
private DumpFormat(String mimeType, String extension,
ResultFormat rdfServiceFormat) {
this.mimeType = mimeType;
this.extension = extension;
this.rdfServiceFormat = rdfServiceFormat;
}
public String getMimeType() {
return mimeType;
}
public String getExtension() {
return extension;
}
public ResultFormat getRdfServiceFormat() {
return rdfServiceFormat;
}
}
/**
* The formats that we will accept on a restore request.
*/
enum RestoreFormat {
NQUADS
}
}

View file

@ -0,0 +1,118 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.io.Writer;
import javax.json.Json;
import javax.json.JsonObject;
import javax.json.JsonReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* An output stream decorator that converts a stream of
* application/sparql-results+json to a stream of application/n-quads
*
* This could be a lot more efficient.
*/
public class JsonToNquads extends OutputStream {
private static final Log log = LogFactory.getLog(JsonToNquads.class);
private final Writer writer;
private final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
private final ByteArrayOutputStream header = new ByteArrayOutputStream();
private boolean headerIsComplete;
public JsonToNquads(OutputStream out) throws IOException {
this.writer = new OutputStreamWriter(out, "UTF-8");
}
@Override
public void flush() throws IOException {
writer.flush();
}
@Override
public void close() throws IOException {
writer.close();
log.debug("Left over in the buffer: '" + buffer + "'");
}
@Override
public void write(int b) throws IOException {
if (!headerIsComplete) {
writeToHeader(b);
} else {
buffer.write(b);
if (bufferHoldsARecord()) {
processRecord();
buffer.reset();
}
}
}
private void writeToHeader(int b) {
header.write((byte) b);
String text = header.toString();
int bindingsHere = text.indexOf("\"bindings\"");
int lastColonHere = text.lastIndexOf(":");
int lastOpenBracket = text.lastIndexOf("[");
headerIsComplete = (bindingsHere >= 0)
&& (lastColonHere > bindingsHere)
&& (lastOpenBracket > lastColonHere);
log.debug("complete=" + headerIsComplete + ", header='" + text + "'");
}
private boolean bufferHoldsARecord() throws IOException {
String text = buffer.toString("UTF-8");
boolean inQuotes = false;
int braceLevel = 0;
for (char c : text.toCharArray()) {
if (inQuotes) {
if (c == '"') {
inQuotes = false;
}
} else {
if (c == '"') {
inQuotes = true;
} else if (c == '{') {
braceLevel++;
} else if (c == '}') {
braceLevel--;
}
}
}
return (braceLevel == 0) && (text.endsWith(",") || text.endsWith("]"));
}
private void processRecord() throws IOException {
String text = buffer.toString("UTF-8");
log.debug("Parsing record: '" + text + "'");
try (JsonReader jsRead = Json.createReader(new StringReader(text))) {
JsonObject jsRecord = jsRead.readObject();
DumpNode s = DumpNode.fromJson(jsRecord.getJsonObject("s"));
DumpNode p = DumpNode.fromJson(jsRecord.getJsonObject("p"));
DumpNode o = DumpNode.fromJson(jsRecord.getJsonObject("o"));
DumpNode g = DumpNode.fromJson(jsRecord.getJsonObject("g"));
if (g == null) {
writer.write(String.format("%s %s %s .\n", s.toNquad(),
p.toNquad(), o.toNquad()));
} else {
writer.write(String.format("%s %s %s %s .\n", s.toNquad(),
p.toNquad(), o.toNquad(), g.toNquad()));
}
} catch (Exception e) {
log.error("Failed to parse record: '" + text + "'", e);
throw new RuntimeException(e);
}
}
}

View file

@ -0,0 +1,67 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore;
import static edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.PARAMETER_FORMAT;
import static edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.PARAMETER_SOURCE_FILE;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.fileupload.FileItem;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.BadRequestException;
import edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.DumpRestoreController.RestoreFormat;
/**
* TODO
* In progress.
*/
public class RestoreModelsAction extends AbstractDumpRestoreAction {
private final FileItem sourceFile;
private final RestoreFormat format;
RestoreModelsAction(HttpServletRequest req, HttpServletResponse resp)
throws BadRequestException {
super(req);
this.sourceFile = getFileItem(PARAMETER_SOURCE_FILE);
this.format = getEnumFromParameter(RestoreFormat.class,
PARAMETER_FORMAT);
}
private FileItem getFileItem(String key) throws BadRequestException {
FileItem fileItem = new VitroRequest(req).getFileItem(key);
if (fileItem == null) {
throw new BadRequestException("Request has no file item named '"
+ key + "'");
}
return fileItem;
}
long restoreModels() throws IOException {
long lineCount = 0;
try (InputStream is = sourceFile.getInputStream();
Reader isr = new InputStreamReader(is, "UTF-8");
BufferedReader br = new BufferedReader(isr)) {
String line;
while (null != (line = br.readLine())) {
processLine(line);
lineCount++;
}
}
return lineCount;
}
private void processLine(String line) {
System.out.println("TOTALLY BOGUS RESTORE");
}
}

View file

@ -204,6 +204,7 @@ public class BaseSiteAdminController extends FreemarkerHttpServlet {
urls.put("rdfData", UrlBuilder.getUrl("/uploadRDFForm"));
urls.put("rdfExport", UrlBuilder.getUrl("/export"));
urls.put("sparqlQueryBuilder", UrlBuilder.getUrl("/admin/sparqlquerybuilder"));
urls.put("dumpRestore", UrlBuilder.getUrl("/dumpRestore"));
}
if (PolicyHelper.isAuthorizedForActions(vreq, SimplePermission.USE_SPARQL_QUERY_PAGE.ACTION)) {
urls.put("sparqlQuery", UrlBuilder.getUrl("/admin/sparqlquery"));