File Harvest: almost done

This commit is contained in:
mbarbier 2011-06-29 21:13:36 +00:00
parent 7e80fbebf3
commit e28b7d7247
4 changed files with 319 additions and 157 deletions

View file

@ -1,5 +1,11 @@
<#-- $This file is distributed under the terms of the license in /doc/license.txt$ --> <#-- $This file is distributed under the terms of the license in /doc/license.txt$ -->
<#if !(user.loggedIn && user.hasSiteAdminAccess)>
<p>You must be an administrator to use this tool.</p>
<#else>
<#if jobKnown == "false"> <#if jobKnown == "false">
<p>Error: No file harvest job was specified, or an unknown job was specified.</p> <p>Error: No file harvest job was specified, or an unknown job was specified.</p>
<p>The end user should not see this error under normal circumstances, so this is probably a bug and should be reported.</p> <p>The end user should not see this error under normal circumstances, so this is probably a bug and should be reported.</p>
@ -61,10 +67,17 @@
var importedItems = document.getElementById("importedItems") var importedItems = document.getElementById("importedItems")
if(json.newlyAddedUrls.length > 0) {
for(var i = 0; i < json.newlyAddedUrls.length; i++) { for(var i = 0; i < json.newlyAddedUrls.length; i++) {
var newLi = document.createElement("li"); var newLi = document.createElement("li");
newLi.innerHTML = "<a href=\"" + json.newlyAddedUrls[i] + "\">" + json.newlyAddedUris[i] + "</a>"; newLi.innerHTML = "<a href=\"" + json.newlyAddedUrls[i] + "\" target=\"_blank\">" + json.newlyAddedUris[i] + "</a>";
importedItems.appendChild(newLi);
}
} else {
var newLi = document.createElement("li");
newLi.innerHTML = "<a href=\"No new grants were imported.\" target=\"_blank\">" + json.newlyAddedUris[i] + "</a>";
importedItems.appendChild(newLi); importedItems.appendChild(newLi);
} }
@ -250,6 +263,10 @@
<div class="clearBothDiv" /> <div class="clearBothDiv" />
</div> </div>
<#-- if job known -->
</#if>
<#-- if user is logged-in with site admin access -->
</#if> </#if>

View file

@ -28,14 +28,15 @@ class CsvFileHarvestJob implements FileHarvestJob {
* @author mbarbieri * @author mbarbieri
*/ */
public enum JobType { public enum JobType {
GRANT("csvGrant", "granttemplate.csv", "testCSVtoRDFgrant.sh", "Grant", "Imported Grants", new String[] {"http://vivoweb.org/ontology/core#Grant"}), GRANT("csvGrant", "granttemplate.csv", "CSVtoRDFgrant.sh", "Grant", "Imported Grants", "No new grants were imported.", new String[] {"http://vivoweb.org/ontology/core#Grant"}),
PERSON("csvPerson", "persontemplate.csv", "testCSVtoRDFpeople.sh", "Person", "Imported Persons", new String[] {"http://xmlns.com/foaf/0.1/Person"}); PERSON("csvPerson", "persontemplate.csv", "CSVtoRDFperson.sh", "Person", "Imported Persons", "No new persons were imported.", new String[] {"http://xmlns.com/foaf/0.1/Person"});
public final String httpParameterName; public final String httpParameterName;
private final String templateFileName; private final String templateFileName;
private final String scriptFileName; private final String scriptFileName;
private final String friendlyName; private final String friendlyName;
private final String linkHeader; private final String linkHeader;
private final String noNewDataMessage;
private final String[] rdfTypesForLinks; private final String[] rdfTypesForLinks;
/** /**
@ -68,17 +69,18 @@ class CsvFileHarvestJob implements FileHarvestJob {
return returnValue; return returnValue;
} }
private JobType(String httpParameterName, String templateFileName, String scriptFileName, String friendlyName, String linkHeader, String[] rdfTypesForLinks) { private JobType(String httpParameterName, String templateFileName, String scriptFileName, String friendlyName, String linkHeader, String noNewDataMessage, String[] rdfTypesForLinks) {
this.httpParameterName = httpParameterName; this.httpParameterName = httpParameterName;
this.templateFileName = templateFileName; this.templateFileName = templateFileName;
this.scriptFileName = scriptFileName; this.scriptFileName = scriptFileName;
this.friendlyName = friendlyName; this.friendlyName = friendlyName;
this.linkHeader = linkHeader; this.linkHeader = linkHeader;
this.noNewDataMessage = noNewDataMessage;
this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length); this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length);
} }
private CsvFileHarvestJob constructCsvFileHarvestJob(VitroRequest vreq, String namespace) { private CsvFileHarvestJob constructCsvFileHarvestJob(VitroRequest vreq, String namespace) {
return new CsvFileHarvestJob(vreq, this.templateFileName, this.scriptFileName, namespace, this.friendlyName, this.linkHeader, this.rdfTypesForLinks); return new CsvFileHarvestJob(vreq, this.templateFileName, this.scriptFileName, namespace, this.friendlyName, this.linkHeader, this.noNewDataMessage, this.rdfTypesForLinks);
} }
} }
@ -106,6 +108,7 @@ class CsvFileHarvestJob implements FileHarvestJob {
/** /**
* The namespace to be used for the harvest. * The namespace to be used for the harvest.
*/ */
@SuppressWarnings("unused")
private final String namespace; private final String namespace;
/** /**
@ -118,11 +121,21 @@ class CsvFileHarvestJob implements FileHarvestJob {
*/ */
private final String linkHeader; private final String linkHeader;
/**
* The message to show to the user if there are no newly-harvested entities to show them.
*/
private final String noNewDataMessage;
/** /**
* An array of rdf:type values which will be used for links. * An array of rdf:type values which will be used for links.
*/ */
private final String[] rdfTypesForLinks; private final String[] rdfTypesForLinks;
/**
* The session ID of this user session.
*/
private final String sessionId;
public static CsvFileHarvestJob createJob(JobType jobType, VitroRequest vreq, String namespace) { public static CsvFileHarvestJob createJob(JobType jobType, VitroRequest vreq, String namespace) {
return jobType.constructCsvFileHarvestJob(vreq, namespace); return jobType.constructCsvFileHarvestJob(vreq, namespace);
@ -132,15 +145,17 @@ class CsvFileHarvestJob implements FileHarvestJob {
* Constructor. * Constructor.
* @param templateFileName just the name of the template file. The directory is assumed to be standard. * @param templateFileName just the name of the template file. The directory is assumed to be standard.
*/ */
private CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName, String linkHeader, String[] rdfTypesForLinks) { private CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName, String linkHeader, String noNewDataMessage, String[] rdfTypesForLinks) {
this.vreq = vreq; this.vreq = vreq;
this.templateFile = new File(getTemplateFileDirectory() + templateFileName); this.templateFile = new File(getTemplateFileDirectory() + templateFileName);
this.scriptFile = new File(getScriptFileDirectory() + scriptFileName); this.scriptFile = new File(getScriptFileDirectory() + scriptFileName);
log.error(getTemplateFileDirectory() + templateFileName);
this.namespace = namespace; this.namespace = namespace;
this.friendlyName = friendlyName; this.friendlyName = friendlyName;
this.linkHeader = linkHeader; this.linkHeader = linkHeader;
this.noNewDataMessage = noNewDataMessage;
this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length); this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length);
this.sessionId = this.vreq.getSession().getId();
} }
/** /**
@ -273,16 +288,12 @@ class CsvFileHarvestJob implements FileHarvestJob {
String workingDirectory = TestFileController.getHarvesterPath(); String workingDirectory = TestFileController.getHarvesterPath();
String fileDirectory = TestFileController.getUploadPath(vreq); String fileDirectory = TestFileController.getUploadPath(vreq);
String harvestedDataPath = getHarvestedDataPath();
replacements = replacements.replace("${WORKING_DIRECTORY}", workingDirectory); replacements = replacements.replace("${WORKING_DIRECTORY}", workingDirectory);
replacements = replacements.replace("${UPLOADS_FOLDER}", fileDirectory); replacements = replacements.replace("${UPLOADS_FOLDER}", fileDirectory);
replacements = replacements.replace("${HARVESTED_DATA_PATH}", harvestedDataPath);
/*
* What needs to be replaced?
*
* task directory name
*/
//todo: complete
return replacements; return replacements;
} }
@ -310,10 +321,13 @@ class CsvFileHarvestJob implements FileHarvestJob {
return scriptTemplateContents; return scriptTemplateContents;
} }
private String getHarvestedDataPath() {
return TestFileController.getFileHarvestRootPath() + "harvested-data/csv/" + this.sessionId + "/";
}
@Override @Override
public String getAdditionsFilePath() { public String getAdditionsFilePath() {
return getHarvestedDataPath() + "additions.rdf.xml";
return TestFileController.getHarvesterPath() + TestFileController.PATH_TO_ADDITIONS_FILE;
} }
@Override @Override
@ -350,6 +364,11 @@ class CsvFileHarvestJob implements FileHarvestJob {
return help; return help;
} }
@Override
public String getNoNewDataMessage() {
return this.noNewDataMessage;
}
} }

View file

@ -67,5 +67,11 @@ interface FileHarvestJob {
* @return the HTML to be shown in the collapsible "Help" area in the "Fill in data" section of the page. * @return the HTML to be shown in the collapsible "Help" area in the "Fill in data" section of the page.
*/ */
String getTemplateFillInHelp(); String getTemplateFillInHelp();
/**
* Get the message to show to the user if there are no newly-harvested entities to show them.
* @return the message to show to the user if there are no newly-harvested entities to show them
*/
String getNoNewDataMessage();
} }

View file

@ -6,16 +6,17 @@ import java.io.BufferedReader;
import java.io.DataInputStream; import java.io.DataInputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FileWriter; import java.io.FileWriter;
import java.io.IOException; import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Date;
import java.util.HashMap; import java.util.HashMap;
import java.util.Hashtable; import java.util.Hashtable;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.Set;
import javax.servlet.ServletContext; import javax.servlet.ServletContext;
import javax.servlet.ServletException; import javax.servlet.ServletException;
import javax.servlet.ServletOutputStream; import javax.servlet.ServletOutputStream;
@ -29,7 +30,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.json.JSONException; import org.json.JSONException;
import org.json.JSONObject; import org.json.JSONObject;
import org.skife.csv.SimpleReader;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap; import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node; import org.w3c.dom.Node;
@ -62,6 +62,12 @@ public class TestFileController extends FreemarkerHttpServlet {
private static final String MODE_CHECK_STATUS = "checkStatus"; private static final String MODE_CHECK_STATUS = "checkStatus";
private static final String MODE_DOWNLOAD_TEMPLATE = "template"; private static final String MODE_DOWNLOAD_TEMPLATE = "template";
/**
* Stores information about the Harvester thread for a particular user session.
*/
private Map<String, SessionInfo> sessionIdToSessionInfo = new Hashtable<String, SessionInfo>(); //Hashtable is threadsafe, HashMap is not
/** /**
* A list of known job parameters (that is, "job=" values from the query string which we will accept from the browser). * A list of known job parameters (that is, "job=" values from the query string which we will accept from the browser).
* This should be filled in the static initializer and then never written to again. * This should be filled in the static initializer and then never written to again.
@ -81,19 +87,20 @@ public class TestFileController extends FreemarkerHttpServlet {
private static final String PATH_TO_HARVESTER = "/home/mbarbieri/workspace/HarvesterDev/"; private static final String PATH_TO_HARVESTER = "/home/mbarbieri/workspace/HarvesterDev/";
/** /**
* Relative path from the Harvester root directory to the Additions file containing rdf/xml added to VIVO from Harvest run. * Relative path from the Harvester root directory to the main area reserved for the VIVO File Harvest feature. Include
* final slash.
*/ */
public static final String PATH_TO_ADDITIONS_FILE = "harvested-data/csv/additions.rdf.xml"; //todo: this is job-specific private static final String PATH_TO_FILE_HARVEST_ROOT = "vivo/";
/** /**
* Relative path from the Harvester root directory to the directory where user-downloadable template files are stored. * Relative path from the Harvester root directory to the directory where user-downloadable template files are stored.
*/ */
public static final String PATH_TO_TEMPLATE_FILES = "files/"; public static final String PATH_TO_TEMPLATE_FILES = PATH_TO_FILE_HARVEST_ROOT + "templates/";
/** /**
* Relative path from the Harvester root directory to the directory containing the script templates. Include final slash. * Relative path from the Harvester root directory to the directory containing the script templates. Include final slash.
*/ */
public static final String PATH_TO_HARVESTER_SCRIPTS = "scripts/"; public static final String PATH_TO_HARVESTER_SCRIPTS = PATH_TO_FILE_HARVEST_ROOT + "scripts/";
static { static {
@ -121,6 +128,8 @@ public class TestFileController extends FreemarkerHttpServlet {
@Override @Override
protected ResponseValues processRequest(VitroRequest vreq) { protected ResponseValues processRequest(VitroRequest vreq) {
try { try {
cleanUpOldSessions();
String job = vreq.getParameter(PARAMETER_JOB); String job = vreq.getParameter(PARAMETER_JOB);
String jobKnown = "false"; String jobKnown = "false";
if((job != null) && TestFileController.knownJobs.contains(job.toLowerCase())) if((job != null) && TestFileController.knownJobs.contains(job.toLowerCase()))
@ -144,6 +153,7 @@ public class TestFileController extends FreemarkerHttpServlet {
body.put("jobSpecificLinkHeader", (jobObject != null) ? jobObject.getLinkHeader() : ""); body.put("jobSpecificLinkHeader", (jobObject != null) ? jobObject.getLinkHeader() : "");
body.put("jobSpecificDownloadHelp", (jobObject != null) ? jobObject.getTemplateDownloadHelp() : ""); body.put("jobSpecificDownloadHelp", (jobObject != null) ? jobObject.getTemplateDownloadHelp() : "");
body.put("jobSpecificFillInHelp", (jobObject != null) ? jobObject.getTemplateFillInHelp() : ""); body.put("jobSpecificFillInHelp", (jobObject != null) ? jobObject.getTemplateFillInHelp() : "");
body.put("jobSpecificNoNewDataMessage", (jobObject != null) ? jobObject.getNoNewDataMessage() : "");
return new TemplateResponseValues(TEMPLATE_DEFAULT, body); return new TemplateResponseValues(TEMPLATE_DEFAULT, body);
} catch (Throwable e) { } catch (Throwable e) {
log.error(e, e); log.error(e, e);
@ -166,6 +176,16 @@ public class TestFileController extends FreemarkerHttpServlet {
return harvesterPath; return harvesterPath;
} }
/**
* Returns the path on this machine of the area within Harvester reserved for File Harvest.
* @return the path on this machine of the area within Harvester reserved for File Harvest
*/
public static String getFileHarvestRootPath()
{
String fileHarvestRootPath = PATH_TO_HARVESTER + PATH_TO_FILE_HARVEST_ROOT;
return fileHarvestRootPath;
}
/** /**
* Returns the base directory used for all File Harvest uploads. * Returns the base directory used for all File Harvest uploads.
* @param context the current servlet context * @param context the current servlet context
@ -228,8 +248,6 @@ public class TestFileController extends FreemarkerHttpServlet {
public void doPost(HttpServletRequest request, HttpServletResponse response) public void doPost(HttpServletRequest request, HttpServletResponse response)
throws IOException, ServletException { throws IOException, ServletException {
log.error("this is a post.");
try { try {
boolean isMultipart = ServletFileUpload.isMultipartContent(request); boolean isMultipart = ServletFileUpload.isMultipartContent(request);
String mode = request.getParameter(PARAMETER_MODE); String mode = request.getParameter(PARAMETER_MODE);
@ -260,7 +278,6 @@ public class TestFileController extends FreemarkerHttpServlet {
private void doFileUploadPost(HttpServletRequest request, HttpServletResponse response) private void doFileUploadPost(HttpServletRequest request, HttpServletResponse response)
throws IOException, ServletException { throws IOException, ServletException {
log.error("file upload post.");
JSONObject json = new JSONObject(); JSONObject json = new JSONObject();
try { try {
VitroRequest vreq = new VitroRequest(request); VitroRequest vreq = new VitroRequest(request);
@ -285,7 +302,6 @@ public class TestFileController extends FreemarkerHttpServlet {
// upload directory if it exists (a "first upload" parameter, initialized to "true" but which gets set to // upload directory if it exists (a "first upload" parameter, initialized to "true" but which gets set to
// "false" once the user starts uploading stuff is used for this). // "false" once the user starts uploading stuff is used for this).
String firstUpload = req.getParameter(PARAMETER_FIRST_UPLOAD); //clear directory on first upload String firstUpload = req.getParameter(PARAMETER_FIRST_UPLOAD); //clear directory on first upload
log.error(firstUpload);
if(firstUpload.toLowerCase().equals("true")) { if(firstUpload.toLowerCase().equals("true")) {
if(directory.exists()) { if(directory.exists()) {
File[] children = directory.listFiles(); File[] children = directory.listFiles();
@ -385,7 +401,6 @@ public class TestFileController extends FreemarkerHttpServlet {
*/ */
private void doHarvestPost(HttpServletRequest request, HttpServletResponse response) { private void doHarvestPost(HttpServletRequest request, HttpServletResponse response) {
log.error("harvest post.");
try { try {
VitroRequest vreq = new VitroRequest(request); VitroRequest vreq = new VitroRequest(request);
FileHarvestJob job = getJob(vreq, vreq.getParameter(PARAMETER_JOB)); FileHarvestJob job = getJob(vreq, vreq.getParameter(PARAMETER_JOB));
@ -394,9 +409,7 @@ public class TestFileController extends FreemarkerHttpServlet {
String script = job.getScript(); String script = job.getScript();
String additionsFilePath = job.getAdditionsFilePath(); String additionsFilePath = job.getAdditionsFilePath();
log.error("start harvest");
runScript(getSessionId(request), script, additionsFilePath); runScript(getSessionId(request), script, additionsFilePath);
log.error("end harvest");
JSONObject json = new JSONObject(); JSONObject json = new JSONObject();
json.put("progressSinceLastCheck", ""); json.put("progressSinceLastCheck", "");
@ -418,16 +431,19 @@ public class TestFileController extends FreemarkerHttpServlet {
*/ */
private void doCheckHarvestStatusPost(HttpServletRequest request, HttpServletResponse response) { private void doCheckHarvestStatusPost(HttpServletRequest request, HttpServletResponse response) {
log.error("check harvest status post.");
try { try {
String newline = "\n"; String newline = "\n";
String sessionId = getSessionId(request); String sessionId = getSessionId(request);
SessionInfo sessionInfo = sessionIdToSessionInfo.get(sessionId);
//if we have started a thread, check the status and return it to the user
if(sessionInfo != null) {
ArrayList<String> unsentLogLinesList = sessionIdToUnsentLogLines.get(sessionId);
String[] unsentLogLines; String[] unsentLogLines;
if(unsentLogLinesList != null) { ArrayList<String> unsentLogLinesList = sessionInfo.unsentLogLines;
//don't let the harvester thread add data to the unsent log lines list until we have both copied it and cleared it
synchronized (unsentLogLinesList) { synchronized (unsentLogLinesList) {
unsentLogLines = unsentLogLinesList.toArray(new String[unsentLogLinesList.size()]); unsentLogLines = unsentLogLinesList.toArray(new String[unsentLogLinesList.size()]);
unsentLogLinesList.clear(); unsentLogLinesList.clear();
@ -438,26 +454,22 @@ public class TestFileController extends FreemarkerHttpServlet {
progressSinceLastCheck += unsentLogLines[i] + newline; progressSinceLastCheck += unsentLogLines[i] + newline;
} }
boolean finished = !sessionIdToHarvestThread.containsKey(sessionId); boolean finished = sessionInfo.isFinished();
VitroRequest vreq = new VitroRequest(request); VitroRequest vreq = new VitroRequest(request);
ArrayList<String> newlyAddedUrls = new ArrayList<String>(); ArrayList<String> newlyAddedUrls = new ArrayList<String>();
ArrayList<String> newlyAddedUris = new ArrayList<String>(); ArrayList<String> newlyAddedUris = new ArrayList<String>();
if(finished) { if(finished) {
newlyAddedUris = sessionIdToNewlyAddedUris.get(sessionId); newlyAddedUris = sessionInfo.newlyAddedUris;
if(newlyAddedUris != null) { if(newlyAddedUris != null) {
for(String uri : newlyAddedUris) { for(String uri : newlyAddedUris) {
String namespaceRoot = vreq.getWebappDaoFactory().getDefaultNamespace();
String suffix = uri.substring(namespaceRoot.length());
String url = "display/" + suffix;
//newlyAddedUrls.add(uri);
//newlyAddedUrls.add(url);
newlyAddedUrls.add(UrlBuilder.getIndividualProfileUrl(uri, vreq)); newlyAddedUrls.add(UrlBuilder.getIndividualProfileUrl(uri, vreq));
} }
} }
//remove all entries in "sessionIdTo..." mappings for this session ID
clearSessionInfo(sessionId);
} }
JSONObject json = new JSONObject(); JSONObject json = new JSONObject();
@ -467,6 +479,8 @@ public class TestFileController extends FreemarkerHttpServlet {
json.put("newlyAddedUrls", newlyAddedUrls); json.put("newlyAddedUrls", newlyAddedUrls);
response.getWriter().write(json.toString()); response.getWriter().write(json.toString());
} else { //if we have not started a harvest thread, the browser should not have made this request to begin with. Bad browser, very bad browser.
log.error("Attempt to check status of a harvest that was never started! (Session ID " + sessionId + ")");
} }
} catch(Exception e) { } catch(Exception e) {
log.error(e, e); log.error(e, e);
@ -518,14 +532,13 @@ public class TestFileController extends FreemarkerHttpServlet {
private void runScript(String sessionId, String script, String additionsFilePath) { private void runScript(String sessionId, String script, String additionsFilePath) {
clearSessionInfo(sessionId);
if(!sessionIdToHarvestThread.containsKey(sessionId)) {
ScriptRunner runner = new ScriptRunner(sessionId, script, additionsFilePath); ScriptRunner runner = new ScriptRunner(sessionId, script, additionsFilePath);
sessionIdToHarvestThread.put(sessionId, runner); SessionInfo info = new SessionInfo(sessionId, runner);
sessionIdToSessionInfo.put(sessionId, info);
runner.start(); runner.start();
} }
}
@ -567,10 +580,13 @@ public class TestFileController extends FreemarkerHttpServlet {
return request.getSession().getId(); return request.getSession().getId();
} }
private ArrayList<String> extractNewlyAddedUris(File additionsFile) { /**
ArrayList<String> newlyAddedUris = new ArrayList<String>(); * Parse an additions file (RDF/XML) to get the URIs of newly-harvested data, which will be sent to the browser and
* displayed to the user as links.
log.error(additionsFile.getAbsolutePath()); * @param additionsFile the file containing the newly-added RDF/XML
* @param newlyAddedUris a list in which to place the newly added URIs
*/
private void extractNewlyAddedUris(File additionsFile, List<String> newlyAddedUris) {
try { try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
@ -578,7 +594,6 @@ public class TestFileController extends FreemarkerHttpServlet {
Document document = factory.newDocumentBuilder().parse(additionsFile); Document document = factory.newDocumentBuilder().parse(additionsFile);
//Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile); //Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile);
NodeList descriptionNodes = document.getElementsByTagNameNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "Description"); NodeList descriptionNodes = document.getElementsByTagNameNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "Description");
log.error("Description nodes: " + descriptionNodes.getLength());
int numNodes = descriptionNodes.getLength(); int numNodes = descriptionNodes.getLength();
for(int i = 0; i < numNodes; i++) { for(int i = 0; i < numNodes; i++) {
@ -596,15 +611,16 @@ public class TestFileController extends FreemarkerHttpServlet {
} }
} }
} catch(Exception e) { } catch(Exception e) {
log.error(e, e); log.error(e, e);
} }
return newlyAddedUris;
} }
/**
* Parse an XML node for all subnodes with qualified name "rdf:type", and return each's "rdf:resource" value in a list.
* @param descriptionNode the RDF description node
* @return a list of rdf:types of the given description node
*/
private ArrayList<String> getRdfTypes(Node descriptionNode) { private ArrayList<String> getRdfTypes(Node descriptionNode) {
ArrayList<String> rdfTypesList = new ArrayList<String>(); ArrayList<String> rdfTypesList = new ArrayList<String>();
@ -631,6 +647,44 @@ public class TestFileController extends FreemarkerHttpServlet {
return rdfTypesList; return rdfTypesList;
} }
/**
* If a session info object exists for this session ID, abort the thread if it is still running and remove the object.
* @param sessionId the session ID for which to clear info
*/
private void clearSessionInfo(String sessionId) {
SessionInfo sessionInfo = this.sessionIdToSessionInfo.get(sessionId);
if(sessionInfo != null) {
if(!sessionInfo.isFinished()) {
if(sessionInfo.harvestThread.isAlive()) {
sessionInfo.harvestThread.abortRun();
}
}
this.sessionIdToSessionInfo.remove(sessionId);
}
}
/**
* If all goes according to plan, clearSessionInfo() should be called once the client gets the last bit of information from the
* harvest. However, if the client doesn't request it (because the browser was closed, etc.) then the method will never get called.
* This method gets called every time the page is initially loaded, to look for session data that is 6 hours old or more, and remove
* it.
*/
private void cleanUpOldSessions() {
int minutesToAllowSession = 360;
long millisecondsToAllowSession = minutesToAllowSession * 60 * 1000;
Date now = new Date();
Set<String> keySet = this.sessionIdToSessionInfo.keySet();
for(String sessionId : keySet) {
SessionInfo info = this.sessionIdToSessionInfo.get(sessionId);
Date startTime = info.createTime;
long differenceInMilliseconds = now.getTime() - startTime.getTime();
if(differenceInMilliseconds > millisecondsToAllowSession) {
log.debug("Removing old session: " + sessionId);
clearSessionInfo(sessionId);
}
}
}
@ -639,8 +693,59 @@ public class TestFileController extends FreemarkerHttpServlet {
/**
* Information relating to a particular user session, created just before the harvester thread is starting.
* @author mbarbieri
*/
private class SessionInfo {
/**
* The session ID for this user session.
*/
@SuppressWarnings("unused")
public final String sessionId;
/**
* The time this object was created.
*/
public final Date createTime;
/**
* The Harvester thread for his user session.
*/
public final ScriptRunner harvestThread;
/**
* Harvester output that has not yet been sent back to the browser, for this user session.
*/
public final ArrayList<String> unsentLogLines = new ArrayList<String>();
/**
* Flag indicating that the thread has finished.
*/
private boolean finished = false;
/**
* Newly added entries to VIVO, for this user session.
*/
public final ArrayList<String> newlyAddedUris = new ArrayList<String>();
public SessionInfo(String sessionId, ScriptRunner harvestThread) {
this.createTime = new Date();
this.sessionId = sessionId;
this.harvestThread = harvestThread;
}
public void finish() {
finished = true;
}
public boolean isFinished() {
return finished;
}
}
@ -657,29 +762,31 @@ public class TestFileController extends FreemarkerHttpServlet {
} }
private Map<String, ScriptRunner> sessionIdToHarvestThread = new Hashtable<String, ScriptRunner>(); //Hashtable is threadsafe, HashMap is not
private Map<String, ArrayList<String>> sessionIdToUnsentLogLines = new Hashtable<String, ArrayList<String>>(); //Hashtable is threadsafe, HashMap is not
private Map<String, ArrayList<String>> sessionIdToNewlyAddedUris = new Hashtable<String, ArrayList<String>>();
private class ScriptRunner extends Thread { private class ScriptRunner extends Thread {
private final String sessionId; private final String sessionId;
private final String script; private final String script;
private final String additionsFilePath; private final String additionsFilePath;
private volatile boolean abort = false;
public ScriptRunner(String sessionId, String script, String additionsFilePath) { public ScriptRunner(String sessionId, String script, String additionsFilePath) {
this.sessionId = sessionId; this.sessionId = sessionId;
this.script = script; this.script = script;
this.additionsFilePath = additionsFilePath; this.additionsFilePath = additionsFilePath;
} }
public void abortRun() {
abort = true;
}
@Override @Override
public void run() { public void run() {
SessionInfo sessionInfo = sessionIdToSessionInfo.get(sessionId);
if(sessionInfo != null) {
try { try {
ArrayList<String> unsentLogLines = sessionIdToUnsentLogLines.get(sessionId); ArrayList<String> unsentLogLines = sessionInfo.unsentLogLines;
if(unsentLogLines == null) {
unsentLogLines = new ArrayList<String>();
sessionIdToUnsentLogLines.put(this.sessionId, unsentLogLines);
}
File scriptFile = createScriptFile(this.script); File scriptFile = createScriptFile(this.script);
@ -692,17 +799,31 @@ public class TestFileController extends FreemarkerHttpServlet {
BufferedReader processOutputReader = new BufferedReader(new InputStreamReader(pr.getInputStream())); BufferedReader processOutputReader = new BufferedReader(new InputStreamReader(pr.getInputStream()));
for(String line = processOutputReader.readLine(); line != null; line = processOutputReader.readLine()) { for(String line = processOutputReader.readLine(); line != null; line = processOutputReader.readLine()) {
//don't add stuff to this list if the main thread is running a "transaction" of copying out the data to send to client and then clearing the list
synchronized(unsentLogLines) { synchronized(unsentLogLines) {
unsentLogLines.add(line); unsentLogLines.add(line);
} }
log.info("Harvester output: " + line); log.info("Harvester output: " + line);
if(this.abort)
break;
} }
if(!this.abort){
BufferedReader processErrorReader = new BufferedReader(new InputStreamReader(pr.getErrorStream())); BufferedReader processErrorReader = new BufferedReader(new InputStreamReader(pr.getErrorStream()));
for(String line = processErrorReader.readLine(); line != null; line = processErrorReader.readLine()) { for(String line = processErrorReader.readLine(); line != null; line = processErrorReader.readLine()) {
log.info("Harvester error: " + line); log.info("Harvester error: " + line);
if(this.abort)
break;
}
} }
if(this.abort) {
log.debug("Aborting harvester script for session " + this.sessionId + ".");
pr.destroy();
} else {
int exitVal; int exitVal;
try { try {
@ -712,21 +833,20 @@ public class TestFileController extends FreemarkerHttpServlet {
throw new IOException(e.getMessage(), e); throw new IOException(e.getMessage(), e);
} }
// int exitVal = 0; log.debug("Harvester script for session " + this.sessionId + " exited with error code " + exitVal);
// unsentLogLines.add("Screw the harvest, let's get to the last part");
File additionsFile = new File(this.additionsFilePath); File additionsFile = new File(this.additionsFilePath);
ArrayList<String> newlyAddedUris = extractNewlyAddedUris(additionsFile); if(additionsFile.exists())
log.error("newly added URIs size: " + newlyAddedUris.size()); extractNewlyAddedUris(additionsFile, sessionInfo.newlyAddedUris);
sessionIdToNewlyAddedUris.put(this.sessionId, newlyAddedUris); else
log.error("Additions file not found: " + this.additionsFilePath);
}
log.debug("Harvester script exited with error code " + exitVal);
log.info("Harvester script execution complete"); log.info("Harvester script execution complete");
} catch (IOException e) { } catch (IOException e) {
log.error(e, e); log.error(e, e);
} finally { } finally {
if(sessionIdToHarvestThread.containsKey(sessionId)) { sessionInfo.finish();
sessionIdToHarvestThread.remove(sessionId);
} }
} }
} }