File Harvest: almost done
This commit is contained in:
parent
7e80fbebf3
commit
e28b7d7247
4 changed files with 319 additions and 157 deletions
|
@ -1,5 +1,11 @@
|
||||||
<#-- $This file is distributed under the terms of the license in /doc/license.txt$ -->
|
<#-- $This file is distributed under the terms of the license in /doc/license.txt$ -->
|
||||||
|
|
||||||
|
<#if !(user.loggedIn && user.hasSiteAdminAccess)>
|
||||||
|
|
||||||
|
<p>You must be an administrator to use this tool.</p>
|
||||||
|
|
||||||
|
<#else>
|
||||||
|
|
||||||
<#if jobKnown == "false">
|
<#if jobKnown == "false">
|
||||||
<p>Error: No file harvest job was specified, or an unknown job was specified.</p>
|
<p>Error: No file harvest job was specified, or an unknown job was specified.</p>
|
||||||
<p>The end user should not see this error under normal circumstances, so this is probably a bug and should be reported.</p>
|
<p>The end user should not see this error under normal circumstances, so this is probably a bug and should be reported.</p>
|
||||||
|
@ -61,10 +67,17 @@
|
||||||
|
|
||||||
var importedItems = document.getElementById("importedItems")
|
var importedItems = document.getElementById("importedItems")
|
||||||
|
|
||||||
for(var i = 0; i < json.newlyAddedUrls.length; i++) {
|
if(json.newlyAddedUrls.length > 0) {
|
||||||
|
for(var i = 0; i < json.newlyAddedUrls.length; i++) {
|
||||||
|
|
||||||
|
var newLi = document.createElement("li");
|
||||||
|
newLi.innerHTML = "<a href=\"" + json.newlyAddedUrls[i] + "\" target=\"_blank\">" + json.newlyAddedUris[i] + "</a>";
|
||||||
|
importedItems.appendChild(newLi);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
|
||||||
var newLi = document.createElement("li");
|
var newLi = document.createElement("li");
|
||||||
newLi.innerHTML = "<a href=\"" + json.newlyAddedUrls[i] + "\">" + json.newlyAddedUris[i] + "</a>";
|
newLi.innerHTML = "<a href=\"No new grants were imported.\" target=\"_blank\">" + json.newlyAddedUris[i] + "</a>";
|
||||||
importedItems.appendChild(newLi);
|
importedItems.appendChild(newLi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -250,6 +263,10 @@
|
||||||
<div class="clearBothDiv" />
|
<div class="clearBothDiv" />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<#-- if job known -->
|
||||||
|
</#if>
|
||||||
|
|
||||||
|
<#-- if user is logged-in with site admin access -->
|
||||||
</#if>
|
</#if>
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -28,14 +28,15 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
* @author mbarbieri
|
* @author mbarbieri
|
||||||
*/
|
*/
|
||||||
public enum JobType {
|
public enum JobType {
|
||||||
GRANT("csvGrant", "granttemplate.csv", "testCSVtoRDFgrant.sh", "Grant", "Imported Grants", new String[] {"http://vivoweb.org/ontology/core#Grant"}),
|
GRANT("csvGrant", "granttemplate.csv", "CSVtoRDFgrant.sh", "Grant", "Imported Grants", "No new grants were imported.", new String[] {"http://vivoweb.org/ontology/core#Grant"}),
|
||||||
PERSON("csvPerson", "persontemplate.csv", "testCSVtoRDFpeople.sh", "Person", "Imported Persons", new String[] {"http://xmlns.com/foaf/0.1/Person"});
|
PERSON("csvPerson", "persontemplate.csv", "CSVtoRDFperson.sh", "Person", "Imported Persons", "No new persons were imported.", new String[] {"http://xmlns.com/foaf/0.1/Person"});
|
||||||
|
|
||||||
public final String httpParameterName;
|
public final String httpParameterName;
|
||||||
private final String templateFileName;
|
private final String templateFileName;
|
||||||
private final String scriptFileName;
|
private final String scriptFileName;
|
||||||
private final String friendlyName;
|
private final String friendlyName;
|
||||||
private final String linkHeader;
|
private final String linkHeader;
|
||||||
|
private final String noNewDataMessage;
|
||||||
private final String[] rdfTypesForLinks;
|
private final String[] rdfTypesForLinks;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -68,17 +69,18 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
return returnValue;
|
return returnValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
private JobType(String httpParameterName, String templateFileName, String scriptFileName, String friendlyName, String linkHeader, String[] rdfTypesForLinks) {
|
private JobType(String httpParameterName, String templateFileName, String scriptFileName, String friendlyName, String linkHeader, String noNewDataMessage, String[] rdfTypesForLinks) {
|
||||||
this.httpParameterName = httpParameterName;
|
this.httpParameterName = httpParameterName;
|
||||||
this.templateFileName = templateFileName;
|
this.templateFileName = templateFileName;
|
||||||
this.scriptFileName = scriptFileName;
|
this.scriptFileName = scriptFileName;
|
||||||
this.friendlyName = friendlyName;
|
this.friendlyName = friendlyName;
|
||||||
this.linkHeader = linkHeader;
|
this.linkHeader = linkHeader;
|
||||||
|
this.noNewDataMessage = noNewDataMessage;
|
||||||
this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length);
|
this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
private CsvFileHarvestJob constructCsvFileHarvestJob(VitroRequest vreq, String namespace) {
|
private CsvFileHarvestJob constructCsvFileHarvestJob(VitroRequest vreq, String namespace) {
|
||||||
return new CsvFileHarvestJob(vreq, this.templateFileName, this.scriptFileName, namespace, this.friendlyName, this.linkHeader, this.rdfTypesForLinks);
|
return new CsvFileHarvestJob(vreq, this.templateFileName, this.scriptFileName, namespace, this.friendlyName, this.linkHeader, this.noNewDataMessage, this.rdfTypesForLinks);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,6 +108,7 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
/**
|
/**
|
||||||
* The namespace to be used for the harvest.
|
* The namespace to be used for the harvest.
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("unused")
|
||||||
private final String namespace;
|
private final String namespace;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -118,11 +121,21 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
*/
|
*/
|
||||||
private final String linkHeader;
|
private final String linkHeader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The message to show to the user if there are no newly-harvested entities to show them.
|
||||||
|
*/
|
||||||
|
private final String noNewDataMessage;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An array of rdf:type values which will be used for links.
|
* An array of rdf:type values which will be used for links.
|
||||||
*/
|
*/
|
||||||
private final String[] rdfTypesForLinks;
|
private final String[] rdfTypesForLinks;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The session ID of this user session.
|
||||||
|
*/
|
||||||
|
private final String sessionId;
|
||||||
|
|
||||||
|
|
||||||
public static CsvFileHarvestJob createJob(JobType jobType, VitroRequest vreq, String namespace) {
|
public static CsvFileHarvestJob createJob(JobType jobType, VitroRequest vreq, String namespace) {
|
||||||
return jobType.constructCsvFileHarvestJob(vreq, namespace);
|
return jobType.constructCsvFileHarvestJob(vreq, namespace);
|
||||||
|
@ -132,15 +145,17 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
* Constructor.
|
* Constructor.
|
||||||
* @param templateFileName just the name of the template file. The directory is assumed to be standard.
|
* @param templateFileName just the name of the template file. The directory is assumed to be standard.
|
||||||
*/
|
*/
|
||||||
private CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName, String linkHeader, String[] rdfTypesForLinks) {
|
private CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName, String linkHeader, String noNewDataMessage, String[] rdfTypesForLinks) {
|
||||||
this.vreq = vreq;
|
this.vreq = vreq;
|
||||||
this.templateFile = new File(getTemplateFileDirectory() + templateFileName);
|
this.templateFile = new File(getTemplateFileDirectory() + templateFileName);
|
||||||
this.scriptFile = new File(getScriptFileDirectory() + scriptFileName);
|
this.scriptFile = new File(getScriptFileDirectory() + scriptFileName);
|
||||||
log.error(getTemplateFileDirectory() + templateFileName);
|
|
||||||
this.namespace = namespace;
|
this.namespace = namespace;
|
||||||
this.friendlyName = friendlyName;
|
this.friendlyName = friendlyName;
|
||||||
this.linkHeader = linkHeader;
|
this.linkHeader = linkHeader;
|
||||||
|
this.noNewDataMessage = noNewDataMessage;
|
||||||
this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length);
|
this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length);
|
||||||
|
|
||||||
|
this.sessionId = this.vreq.getSession().getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -273,16 +288,12 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
|
|
||||||
String workingDirectory = TestFileController.getHarvesterPath();
|
String workingDirectory = TestFileController.getHarvesterPath();
|
||||||
String fileDirectory = TestFileController.getUploadPath(vreq);
|
String fileDirectory = TestFileController.getUploadPath(vreq);
|
||||||
|
String harvestedDataPath = getHarvestedDataPath();
|
||||||
|
|
||||||
replacements = replacements.replace("${WORKING_DIRECTORY}", workingDirectory);
|
replacements = replacements.replace("${WORKING_DIRECTORY}", workingDirectory);
|
||||||
replacements = replacements.replace("${UPLOADS_FOLDER}", fileDirectory);
|
replacements = replacements.replace("${UPLOADS_FOLDER}", fileDirectory);
|
||||||
|
replacements = replacements.replace("${HARVESTED_DATA_PATH}", harvestedDataPath);
|
||||||
|
|
||||||
/*
|
|
||||||
* What needs to be replaced?
|
|
||||||
*
|
|
||||||
* task directory name
|
|
||||||
*/
|
|
||||||
//todo: complete
|
|
||||||
return replacements;
|
return replacements;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,10 +321,13 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
return scriptTemplateContents;
|
return scriptTemplateContents;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String getHarvestedDataPath() {
|
||||||
|
return TestFileController.getFileHarvestRootPath() + "harvested-data/csv/" + this.sessionId + "/";
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getAdditionsFilePath() {
|
public String getAdditionsFilePath() {
|
||||||
|
return getHarvestedDataPath() + "additions.rdf.xml";
|
||||||
return TestFileController.getHarvesterPath() + TestFileController.PATH_TO_ADDITIONS_FILE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -350,6 +364,11 @@ class CsvFileHarvestJob implements FileHarvestJob {
|
||||||
return help;
|
return help;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getNoNewDataMessage() {
|
||||||
|
return this.noNewDataMessage;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -67,5 +67,11 @@ interface FileHarvestJob {
|
||||||
* @return the HTML to be shown in the collapsible "Help" area in the "Fill in data" section of the page.
|
* @return the HTML to be shown in the collapsible "Help" area in the "Fill in data" section of the page.
|
||||||
*/
|
*/
|
||||||
String getTemplateFillInHelp();
|
String getTemplateFillInHelp();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the message to show to the user if there are no newly-harvested entities to show them.
|
||||||
|
* @return the message to show to the user if there are no newly-harvested entities to show them
|
||||||
|
*/
|
||||||
|
String getNoNewDataMessage();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,16 +6,17 @@ import java.io.BufferedReader;
|
||||||
import java.io.DataInputStream;
|
import java.io.DataInputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.FileWriter;
|
import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Date;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Hashtable;
|
import java.util.Hashtable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
import javax.servlet.ServletContext;
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.servlet.ServletOutputStream;
|
import javax.servlet.ServletOutputStream;
|
||||||
|
@ -29,7 +30,6 @@ import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
import org.json.JSONObject;
|
import org.json.JSONObject;
|
||||||
import org.skife.csv.SimpleReader;
|
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
import org.w3c.dom.NamedNodeMap;
|
import org.w3c.dom.NamedNodeMap;
|
||||||
import org.w3c.dom.Node;
|
import org.w3c.dom.Node;
|
||||||
|
@ -62,6 +62,12 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
private static final String MODE_CHECK_STATUS = "checkStatus";
|
private static final String MODE_CHECK_STATUS = "checkStatus";
|
||||||
private static final String MODE_DOWNLOAD_TEMPLATE = "template";
|
private static final String MODE_DOWNLOAD_TEMPLATE = "template";
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stores information about the Harvester thread for a particular user session.
|
||||||
|
*/
|
||||||
|
private Map<String, SessionInfo> sessionIdToSessionInfo = new Hashtable<String, SessionInfo>(); //Hashtable is threadsafe, HashMap is not
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A list of known job parameters (that is, "job=" values from the query string which we will accept from the browser).
|
* A list of known job parameters (that is, "job=" values from the query string which we will accept from the browser).
|
||||||
* This should be filled in the static initializer and then never written to again.
|
* This should be filled in the static initializer and then never written to again.
|
||||||
|
@ -81,19 +87,20 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
private static final String PATH_TO_HARVESTER = "/home/mbarbieri/workspace/HarvesterDev/";
|
private static final String PATH_TO_HARVESTER = "/home/mbarbieri/workspace/HarvesterDev/";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relative path from the Harvester root directory to the Additions file containing rdf/xml added to VIVO from Harvest run.
|
* Relative path from the Harvester root directory to the main area reserved for the VIVO File Harvest feature. Include
|
||||||
|
* final slash.
|
||||||
*/
|
*/
|
||||||
public static final String PATH_TO_ADDITIONS_FILE = "harvested-data/csv/additions.rdf.xml"; //todo: this is job-specific
|
private static final String PATH_TO_FILE_HARVEST_ROOT = "vivo/";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relative path from the Harvester root directory to the directory where user-downloadable template files are stored.
|
* Relative path from the Harvester root directory to the directory where user-downloadable template files are stored.
|
||||||
*/
|
*/
|
||||||
public static final String PATH_TO_TEMPLATE_FILES = "files/";
|
public static final String PATH_TO_TEMPLATE_FILES = PATH_TO_FILE_HARVEST_ROOT + "templates/";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Relative path from the Harvester root directory to the directory containing the script templates. Include final slash.
|
* Relative path from the Harvester root directory to the directory containing the script templates. Include final slash.
|
||||||
*/
|
*/
|
||||||
public static final String PATH_TO_HARVESTER_SCRIPTS = "scripts/";
|
public static final String PATH_TO_HARVESTER_SCRIPTS = PATH_TO_FILE_HARVEST_ROOT + "scripts/";
|
||||||
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
|
@ -121,6 +128,8 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
@Override
|
@Override
|
||||||
protected ResponseValues processRequest(VitroRequest vreq) {
|
protected ResponseValues processRequest(VitroRequest vreq) {
|
||||||
try {
|
try {
|
||||||
|
cleanUpOldSessions();
|
||||||
|
|
||||||
String job = vreq.getParameter(PARAMETER_JOB);
|
String job = vreq.getParameter(PARAMETER_JOB);
|
||||||
String jobKnown = "false";
|
String jobKnown = "false";
|
||||||
if((job != null) && TestFileController.knownJobs.contains(job.toLowerCase()))
|
if((job != null) && TestFileController.knownJobs.contains(job.toLowerCase()))
|
||||||
|
@ -144,6 +153,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
body.put("jobSpecificLinkHeader", (jobObject != null) ? jobObject.getLinkHeader() : "");
|
body.put("jobSpecificLinkHeader", (jobObject != null) ? jobObject.getLinkHeader() : "");
|
||||||
body.put("jobSpecificDownloadHelp", (jobObject != null) ? jobObject.getTemplateDownloadHelp() : "");
|
body.put("jobSpecificDownloadHelp", (jobObject != null) ? jobObject.getTemplateDownloadHelp() : "");
|
||||||
body.put("jobSpecificFillInHelp", (jobObject != null) ? jobObject.getTemplateFillInHelp() : "");
|
body.put("jobSpecificFillInHelp", (jobObject != null) ? jobObject.getTemplateFillInHelp() : "");
|
||||||
|
body.put("jobSpecificNoNewDataMessage", (jobObject != null) ? jobObject.getNoNewDataMessage() : "");
|
||||||
return new TemplateResponseValues(TEMPLATE_DEFAULT, body);
|
return new TemplateResponseValues(TEMPLATE_DEFAULT, body);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
log.error(e, e);
|
log.error(e, e);
|
||||||
|
@ -166,6 +176,16 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
return harvesterPath;
|
return harvesterPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the path on this machine of the area within Harvester reserved for File Harvest.
|
||||||
|
* @return the path on this machine of the area within Harvester reserved for File Harvest
|
||||||
|
*/
|
||||||
|
public static String getFileHarvestRootPath()
|
||||||
|
{
|
||||||
|
String fileHarvestRootPath = PATH_TO_HARVESTER + PATH_TO_FILE_HARVEST_ROOT;
|
||||||
|
return fileHarvestRootPath;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the base directory used for all File Harvest uploads.
|
* Returns the base directory used for all File Harvest uploads.
|
||||||
* @param context the current servlet context
|
* @param context the current servlet context
|
||||||
|
@ -228,8 +248,6 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws IOException, ServletException {
|
throws IOException, ServletException {
|
||||||
|
|
||||||
log.error("this is a post.");
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
boolean isMultipart = ServletFileUpload.isMultipartContent(request);
|
boolean isMultipart = ServletFileUpload.isMultipartContent(request);
|
||||||
String mode = request.getParameter(PARAMETER_MODE);
|
String mode = request.getParameter(PARAMETER_MODE);
|
||||||
|
@ -260,7 +278,6 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
private void doFileUploadPost(HttpServletRequest request, HttpServletResponse response)
|
private void doFileUploadPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws IOException, ServletException {
|
throws IOException, ServletException {
|
||||||
|
|
||||||
log.error("file upload post.");
|
|
||||||
JSONObject json = new JSONObject();
|
JSONObject json = new JSONObject();
|
||||||
try {
|
try {
|
||||||
VitroRequest vreq = new VitroRequest(request);
|
VitroRequest vreq = new VitroRequest(request);
|
||||||
|
@ -285,7 +302,6 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
// upload directory if it exists (a "first upload" parameter, initialized to "true" but which gets set to
|
// upload directory if it exists (a "first upload" parameter, initialized to "true" but which gets set to
|
||||||
// "false" once the user starts uploading stuff is used for this).
|
// "false" once the user starts uploading stuff is used for this).
|
||||||
String firstUpload = req.getParameter(PARAMETER_FIRST_UPLOAD); //clear directory on first upload
|
String firstUpload = req.getParameter(PARAMETER_FIRST_UPLOAD); //clear directory on first upload
|
||||||
log.error(firstUpload);
|
|
||||||
if(firstUpload.toLowerCase().equals("true")) {
|
if(firstUpload.toLowerCase().equals("true")) {
|
||||||
if(directory.exists()) {
|
if(directory.exists()) {
|
||||||
File[] children = directory.listFiles();
|
File[] children = directory.listFiles();
|
||||||
|
@ -385,7 +401,6 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
*/
|
*/
|
||||||
private void doHarvestPost(HttpServletRequest request, HttpServletResponse response) {
|
private void doHarvestPost(HttpServletRequest request, HttpServletResponse response) {
|
||||||
|
|
||||||
log.error("harvest post.");
|
|
||||||
try {
|
try {
|
||||||
VitroRequest vreq = new VitroRequest(request);
|
VitroRequest vreq = new VitroRequest(request);
|
||||||
FileHarvestJob job = getJob(vreq, vreq.getParameter(PARAMETER_JOB));
|
FileHarvestJob job = getJob(vreq, vreq.getParameter(PARAMETER_JOB));
|
||||||
|
@ -394,9 +409,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
|
|
||||||
String script = job.getScript();
|
String script = job.getScript();
|
||||||
String additionsFilePath = job.getAdditionsFilePath();
|
String additionsFilePath = job.getAdditionsFilePath();
|
||||||
log.error("start harvest");
|
|
||||||
runScript(getSessionId(request), script, additionsFilePath);
|
runScript(getSessionId(request), script, additionsFilePath);
|
||||||
log.error("end harvest");
|
|
||||||
|
|
||||||
JSONObject json = new JSONObject();
|
JSONObject json = new JSONObject();
|
||||||
json.put("progressSinceLastCheck", "");
|
json.put("progressSinceLastCheck", "");
|
||||||
|
@ -418,16 +431,19 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
*/
|
*/
|
||||||
private void doCheckHarvestStatusPost(HttpServletRequest request, HttpServletResponse response) {
|
private void doCheckHarvestStatusPost(HttpServletRequest request, HttpServletResponse response) {
|
||||||
|
|
||||||
log.error("check harvest status post.");
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String newline = "\n";
|
String newline = "\n";
|
||||||
|
|
||||||
String sessionId = getSessionId(request);
|
String sessionId = getSessionId(request);
|
||||||
|
SessionInfo sessionInfo = sessionIdToSessionInfo.get(sessionId);
|
||||||
|
|
||||||
ArrayList<String> unsentLogLinesList = sessionIdToUnsentLogLines.get(sessionId);
|
//if we have started a thread, check the status and return it to the user
|
||||||
String[] unsentLogLines;
|
if(sessionInfo != null) {
|
||||||
if(unsentLogLinesList != null) {
|
|
||||||
|
String[] unsentLogLines;
|
||||||
|
ArrayList<String> unsentLogLinesList = sessionInfo.unsentLogLines;
|
||||||
|
|
||||||
|
//don't let the harvester thread add data to the unsent log lines list until we have both copied it and cleared it
|
||||||
synchronized (unsentLogLinesList) {
|
synchronized (unsentLogLinesList) {
|
||||||
unsentLogLines = unsentLogLinesList.toArray(new String[unsentLogLinesList.size()]);
|
unsentLogLines = unsentLogLinesList.toArray(new String[unsentLogLinesList.size()]);
|
||||||
unsentLogLinesList.clear();
|
unsentLogLinesList.clear();
|
||||||
|
@ -438,26 +454,22 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
progressSinceLastCheck += unsentLogLines[i] + newline;
|
progressSinceLastCheck += unsentLogLines[i] + newline;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean finished = !sessionIdToHarvestThread.containsKey(sessionId);
|
boolean finished = sessionInfo.isFinished();
|
||||||
|
|
||||||
VitroRequest vreq = new VitroRequest(request);
|
VitroRequest vreq = new VitroRequest(request);
|
||||||
ArrayList<String> newlyAddedUrls = new ArrayList<String>();
|
ArrayList<String> newlyAddedUrls = new ArrayList<String>();
|
||||||
ArrayList<String> newlyAddedUris = new ArrayList<String>();
|
ArrayList<String> newlyAddedUris = new ArrayList<String>();
|
||||||
if(finished) {
|
if(finished) {
|
||||||
newlyAddedUris = sessionIdToNewlyAddedUris.get(sessionId);
|
newlyAddedUris = sessionInfo.newlyAddedUris;
|
||||||
if(newlyAddedUris != null) {
|
if(newlyAddedUris != null) {
|
||||||
for(String uri : newlyAddedUris) {
|
for(String uri : newlyAddedUris) {
|
||||||
|
|
||||||
String namespaceRoot = vreq.getWebappDaoFactory().getDefaultNamespace();
|
|
||||||
|
|
||||||
String suffix = uri.substring(namespaceRoot.length());
|
|
||||||
String url = "display/" + suffix;
|
|
||||||
|
|
||||||
//newlyAddedUrls.add(uri);
|
|
||||||
//newlyAddedUrls.add(url);
|
|
||||||
newlyAddedUrls.add(UrlBuilder.getIndividualProfileUrl(uri, vreq));
|
newlyAddedUrls.add(UrlBuilder.getIndividualProfileUrl(uri, vreq));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//remove all entries in "sessionIdTo..." mappings for this session ID
|
||||||
|
clearSessionInfo(sessionId);
|
||||||
}
|
}
|
||||||
|
|
||||||
JSONObject json = new JSONObject();
|
JSONObject json = new JSONObject();
|
||||||
|
@ -467,6 +479,8 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
json.put("newlyAddedUrls", newlyAddedUrls);
|
json.put("newlyAddedUrls", newlyAddedUrls);
|
||||||
|
|
||||||
response.getWriter().write(json.toString());
|
response.getWriter().write(json.toString());
|
||||||
|
} else { //if we have not started a harvest thread, the browser should not have made this request to begin with. Bad browser, very bad browser.
|
||||||
|
log.error("Attempt to check status of a harvest that was never started! (Session ID " + sessionId + ")");
|
||||||
}
|
}
|
||||||
} catch(Exception e) {
|
} catch(Exception e) {
|
||||||
log.error(e, e);
|
log.error(e, e);
|
||||||
|
@ -518,13 +532,12 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
|
|
||||||
|
|
||||||
private void runScript(String sessionId, String script, String additionsFilePath) {
|
private void runScript(String sessionId, String script, String additionsFilePath) {
|
||||||
|
clearSessionInfo(sessionId);
|
||||||
|
|
||||||
if(!sessionIdToHarvestThread.containsKey(sessionId)) {
|
ScriptRunner runner = new ScriptRunner(sessionId, script, additionsFilePath);
|
||||||
|
SessionInfo info = new SessionInfo(sessionId, runner);
|
||||||
ScriptRunner runner = new ScriptRunner(sessionId, script, additionsFilePath);
|
sessionIdToSessionInfo.put(sessionId, info);
|
||||||
sessionIdToHarvestThread.put(sessionId, runner);
|
runner.start();
|
||||||
runner.start();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -567,10 +580,13 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
return request.getSession().getId();
|
return request.getSession().getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
private ArrayList<String> extractNewlyAddedUris(File additionsFile) {
|
/**
|
||||||
ArrayList<String> newlyAddedUris = new ArrayList<String>();
|
* Parse an additions file (RDF/XML) to get the URIs of newly-harvested data, which will be sent to the browser and
|
||||||
|
* displayed to the user as links.
|
||||||
log.error(additionsFile.getAbsolutePath());
|
* @param additionsFile the file containing the newly-added RDF/XML
|
||||||
|
* @param newlyAddedUris a list in which to place the newly added URIs
|
||||||
|
*/
|
||||||
|
private void extractNewlyAddedUris(File additionsFile, List<String> newlyAddedUris) {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
|
||||||
|
@ -578,7 +594,6 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
Document document = factory.newDocumentBuilder().parse(additionsFile);
|
Document document = factory.newDocumentBuilder().parse(additionsFile);
|
||||||
//Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile);
|
//Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile);
|
||||||
NodeList descriptionNodes = document.getElementsByTagNameNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "Description");
|
NodeList descriptionNodes = document.getElementsByTagNameNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "Description");
|
||||||
log.error("Description nodes: " + descriptionNodes.getLength());
|
|
||||||
|
|
||||||
int numNodes = descriptionNodes.getLength();
|
int numNodes = descriptionNodes.getLength();
|
||||||
for(int i = 0; i < numNodes; i++) {
|
for(int i = 0; i < numNodes; i++) {
|
||||||
|
@ -596,15 +611,16 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} catch(Exception e) {
|
} catch(Exception e) {
|
||||||
log.error(e, e);
|
log.error(e, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
return newlyAddedUris;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse an XML node for all subnodes with qualified name "rdf:type", and return each's "rdf:resource" value in a list.
|
||||||
|
* @param descriptionNode the RDF description node
|
||||||
|
* @return a list of rdf:types of the given description node
|
||||||
|
*/
|
||||||
private ArrayList<String> getRdfTypes(Node descriptionNode) {
|
private ArrayList<String> getRdfTypes(Node descriptionNode) {
|
||||||
ArrayList<String> rdfTypesList = new ArrayList<String>();
|
ArrayList<String> rdfTypesList = new ArrayList<String>();
|
||||||
|
|
||||||
|
@ -631,6 +647,44 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
return rdfTypesList;
|
return rdfTypesList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If a session info object exists for this session ID, abort the thread if it is still running and remove the object.
|
||||||
|
* @param sessionId the session ID for which to clear info
|
||||||
|
*/
|
||||||
|
private void clearSessionInfo(String sessionId) {
|
||||||
|
SessionInfo sessionInfo = this.sessionIdToSessionInfo.get(sessionId);
|
||||||
|
if(sessionInfo != null) {
|
||||||
|
if(!sessionInfo.isFinished()) {
|
||||||
|
if(sessionInfo.harvestThread.isAlive()) {
|
||||||
|
sessionInfo.harvestThread.abortRun();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.sessionIdToSessionInfo.remove(sessionId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If all goes according to plan, clearSessionInfo() should be called once the client gets the last bit of information from the
|
||||||
|
* harvest. However, if the client doesn't request it (because the browser was closed, etc.) then the method will never get called.
|
||||||
|
* This method gets called every time the page is initially loaded, to look for session data that is 6 hours old or more, and remove
|
||||||
|
* it.
|
||||||
|
*/
|
||||||
|
private void cleanUpOldSessions() {
|
||||||
|
int minutesToAllowSession = 360;
|
||||||
|
long millisecondsToAllowSession = minutesToAllowSession * 60 * 1000;
|
||||||
|
|
||||||
|
Date now = new Date();
|
||||||
|
Set<String> keySet = this.sessionIdToSessionInfo.keySet();
|
||||||
|
for(String sessionId : keySet) {
|
||||||
|
SessionInfo info = this.sessionIdToSessionInfo.get(sessionId);
|
||||||
|
Date startTime = info.createTime;
|
||||||
|
long differenceInMilliseconds = now.getTime() - startTime.getTime();
|
||||||
|
if(differenceInMilliseconds > millisecondsToAllowSession) {
|
||||||
|
log.debug("Removing old session: " + sessionId);
|
||||||
|
clearSessionInfo(sessionId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -639,8 +693,59 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Information relating to a particular user session, created just before the harvester thread is starting.
|
||||||
|
* @author mbarbieri
|
||||||
|
*/
|
||||||
|
private class SessionInfo {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The session ID for this user session.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unused")
|
||||||
|
public final String sessionId;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The time this object was created.
|
||||||
|
*/
|
||||||
|
public final Date createTime;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Harvester thread for his user session.
|
||||||
|
*/
|
||||||
|
public final ScriptRunner harvestThread;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Harvester output that has not yet been sent back to the browser, for this user session.
|
||||||
|
*/
|
||||||
|
public final ArrayList<String> unsentLogLines = new ArrayList<String>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flag indicating that the thread has finished.
|
||||||
|
*/
|
||||||
|
private boolean finished = false;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Newly added entries to VIVO, for this user session.
|
||||||
|
*/
|
||||||
|
public final ArrayList<String> newlyAddedUris = new ArrayList<String>();
|
||||||
|
|
||||||
|
public SessionInfo(String sessionId, ScriptRunner harvestThread) {
|
||||||
|
|
||||||
|
this.createTime = new Date();
|
||||||
|
|
||||||
|
this.sessionId = sessionId;
|
||||||
|
this.harvestThread = harvestThread;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void finish() {
|
||||||
|
finished = true;
|
||||||
|
}
|
||||||
|
public boolean isFinished() {
|
||||||
|
return finished;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -657,76 +762,91 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private Map<String, ScriptRunner> sessionIdToHarvestThread = new Hashtable<String, ScriptRunner>(); //Hashtable is threadsafe, HashMap is not
|
|
||||||
private Map<String, ArrayList<String>> sessionIdToUnsentLogLines = new Hashtable<String, ArrayList<String>>(); //Hashtable is threadsafe, HashMap is not
|
|
||||||
private Map<String, ArrayList<String>> sessionIdToNewlyAddedUris = new Hashtable<String, ArrayList<String>>();
|
|
||||||
private class ScriptRunner extends Thread {
|
private class ScriptRunner extends Thread {
|
||||||
|
|
||||||
private final String sessionId;
|
private final String sessionId;
|
||||||
private final String script;
|
private final String script;
|
||||||
private final String additionsFilePath;
|
private final String additionsFilePath;
|
||||||
|
|
||||||
|
private volatile boolean abort = false;
|
||||||
|
|
||||||
public ScriptRunner(String sessionId, String script, String additionsFilePath) {
|
public ScriptRunner(String sessionId, String script, String additionsFilePath) {
|
||||||
this.sessionId = sessionId;
|
this.sessionId = sessionId;
|
||||||
this.script = script;
|
this.script = script;
|
||||||
this.additionsFilePath = additionsFilePath;
|
this.additionsFilePath = additionsFilePath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void abortRun() {
|
||||||
|
abort = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
SessionInfo sessionInfo = sessionIdToSessionInfo.get(sessionId);
|
||||||
ArrayList<String> unsentLogLines = sessionIdToUnsentLogLines.get(sessionId);
|
if(sessionInfo != null) {
|
||||||
if(unsentLogLines == null) {
|
|
||||||
unsentLogLines = new ArrayList<String>();
|
|
||||||
sessionIdToUnsentLogLines.put(this.sessionId, unsentLogLines);
|
|
||||||
}
|
|
||||||
|
|
||||||
File scriptFile = createScriptFile(this.script);
|
|
||||||
|
|
||||||
String command = "/bin/bash " + getHarvesterPath() + "scripts/temp/" + scriptFile.getName();
|
|
||||||
|
|
||||||
log.info("Running command: " + command);
|
|
||||||
Process pr = Runtime.getRuntime().exec(command);
|
|
||||||
|
|
||||||
//try { Thread.sleep(15000); } catch(InterruptedException e) {log.error(e, e);}
|
|
||||||
|
|
||||||
BufferedReader processOutputReader = new BufferedReader(new InputStreamReader(pr.getInputStream()));
|
|
||||||
for(String line = processOutputReader.readLine(); line != null; line = processOutputReader.readLine()) {
|
|
||||||
synchronized(unsentLogLines) {
|
|
||||||
unsentLogLines.add(line);
|
|
||||||
}
|
|
||||||
log.info("Harvester output: " + line);
|
|
||||||
}
|
|
||||||
|
|
||||||
BufferedReader processErrorReader = new BufferedReader(new InputStreamReader(pr.getErrorStream()));
|
|
||||||
for(String line = processErrorReader.readLine(); line != null; line = processErrorReader.readLine()) {
|
|
||||||
log.info("Harvester error: " + line);
|
|
||||||
}
|
|
||||||
|
|
||||||
int exitVal;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
exitVal = pr.waitFor();
|
ArrayList<String> unsentLogLines = sessionInfo.unsentLogLines;
|
||||||
}
|
|
||||||
catch(InterruptedException e) {
|
|
||||||
throw new IOException(e.getMessage(), e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// int exitVal = 0;
|
File scriptFile = createScriptFile(this.script);
|
||||||
// unsentLogLines.add("Screw the harvest, let's get to the last part");
|
|
||||||
|
|
||||||
File additionsFile = new File(this.additionsFilePath);
|
String command = "/bin/bash " + getHarvesterPath() + "scripts/temp/" + scriptFile.getName();
|
||||||
ArrayList<String> newlyAddedUris = extractNewlyAddedUris(additionsFile);
|
|
||||||
log.error("newly added URIs size: " + newlyAddedUris.size());
|
|
||||||
sessionIdToNewlyAddedUris.put(this.sessionId, newlyAddedUris);
|
|
||||||
|
|
||||||
log.debug("Harvester script exited with error code " + exitVal);
|
log.info("Running command: " + command);
|
||||||
log.info("Harvester script execution complete");
|
Process pr = Runtime.getRuntime().exec(command);
|
||||||
} catch (IOException e) {
|
|
||||||
log.error(e, e);
|
//try { Thread.sleep(15000); } catch(InterruptedException e) {log.error(e, e);}
|
||||||
} finally {
|
|
||||||
if(sessionIdToHarvestThread.containsKey(sessionId)) {
|
BufferedReader processOutputReader = new BufferedReader(new InputStreamReader(pr.getInputStream()));
|
||||||
sessionIdToHarvestThread.remove(sessionId);
|
for(String line = processOutputReader.readLine(); line != null; line = processOutputReader.readLine()) {
|
||||||
|
|
||||||
|
//don't add stuff to this list if the main thread is running a "transaction" of copying out the data to send to client and then clearing the list
|
||||||
|
synchronized(unsentLogLines) {
|
||||||
|
unsentLogLines.add(line);
|
||||||
|
}
|
||||||
|
log.info("Harvester output: " + line);
|
||||||
|
|
||||||
|
if(this.abort)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!this.abort){
|
||||||
|
BufferedReader processErrorReader = new BufferedReader(new InputStreamReader(pr.getErrorStream()));
|
||||||
|
for(String line = processErrorReader.readLine(); line != null; line = processErrorReader.readLine()) {
|
||||||
|
log.info("Harvester error: " + line);
|
||||||
|
|
||||||
|
if(this.abort)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(this.abort) {
|
||||||
|
log.debug("Aborting harvester script for session " + this.sessionId + ".");
|
||||||
|
pr.destroy();
|
||||||
|
} else {
|
||||||
|
int exitVal;
|
||||||
|
|
||||||
|
try {
|
||||||
|
exitVal = pr.waitFor();
|
||||||
|
}
|
||||||
|
catch(InterruptedException e) {
|
||||||
|
throw new IOException(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.debug("Harvester script for session " + this.sessionId + " exited with error code " + exitVal);
|
||||||
|
|
||||||
|
File additionsFile = new File(this.additionsFilePath);
|
||||||
|
if(additionsFile.exists())
|
||||||
|
extractNewlyAddedUris(additionsFile, sessionInfo.newlyAddedUris);
|
||||||
|
else
|
||||||
|
log.error("Additions file not found: " + this.additionsFilePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info("Harvester script execution complete");
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error(e, e);
|
||||||
|
} finally {
|
||||||
|
sessionInfo.finish();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue