Improved parsing of additions file, add some job-specific UI stuff

This commit is contained in:
mbarbier 2011-06-23 20:41:21 +00:00
parent fff37871bc
commit 5ceda63a65
4 changed files with 77 additions and 19 deletions

View file

@ -7,6 +7,7 @@ import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
@ -52,17 +53,23 @@ class CsvFileHarvestJob implements FileHarvestJob {
*/
private final String friendlyName;
/**
* An array of rdf:type values which will be used for links.
*/
private final String[] rdfTypesForLinks;
/**
* Constructor.
* @param templateFileName just the name of the template file. The directory is assumed to be standard.
*/
public CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName) {
public CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName, String[] rdfTypesForLinks) {
this.vreq = vreq;
this.templateFile = new File(getTemplateFileDirectory() + templateFileName);
this.scriptFile = new File(getScriptFileDirectory() + scriptFileName);
log.error(getTemplateFileDirectory() + templateFileName);
this.namespace = namespace;
this.friendlyName = friendlyName;
this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length);
}
/**
@ -193,8 +200,10 @@ class CsvFileHarvestJob implements FileHarvestJob {
private String performScriptTemplateReplacements(String scriptTemplateContents) {
String replacements = scriptTemplateContents;
String workingDirectory = TestFileController.getHarvesterPath();
String fileDirectory = TestFileController.getUploadPath(vreq);
replacements = replacements.replace("${WORKING_DIRECTORY}", workingDirectory);
replacements = replacements.replace("${UPLOADS_FOLDER}", fileDirectory);
/*
@ -240,12 +249,29 @@ class CsvFileHarvestJob implements FileHarvestJob {
public String getPageHeader() {
return "Harvest " + this.friendlyName + " data from CSV file(s)";
}
@Override
public String getLinkHeader() {
return "Imported " + pluralize(this.friendlyName);
}
private String pluralize(String input) {
String plural = input + "s";
if(input.endsWith("s") || input.endsWith("x"))
plural = input + "es";
return plural;
}
@Override
public String getTemplateFilePath() {
return this.templateFile.getPath();
}
@Override
public String[] getRdfTypesForLinks() {
return Arrays.copyOf(this.rdfTypesForLinks, this.rdfTypesForLinks.length);
}
}

View file

@ -24,23 +24,36 @@ interface FileHarvestJob {
* @return the path on the server of the file which the user can download to serve as a guide for what to upload.
*/
String getTemplateFilePath();
/**
* Gets the console script which can be used to run the harvest job.
* @return the console script which can be used to run the harvest job
*/
String getScript();
/**
* The path to the file containing the RDF/XML triples that get added to VIVO.
* @return the path to the file containing the RDF/XML triples that get added to VIVO
*/
String getAdditionsFilePath();
/**
* A heading to be shown at the top of the page.
* @return a heading to be shown at the top of the page
*/
String getPageHeader();
/**
* A heading to be shown above the area where links to profiles of newly-harvested entities are listed.
* @return a heading to be shown above the area where links to profiles of newly-harvested entities are listed
*/
String getLinkHeader();
/**
* Get an array of fully-qualified rdf:type values. When the harvest run is complete, any new entities which have an rdf:type represented
* in this array will have a link displayed on the page allowing the user to visit the new profile.
* @return an array of types to be used in links
*/
String[] getRdfTypesForLinks();
}

View file

@ -38,6 +38,7 @@ import org.w3c.dom.NodeList;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.FreemarkerHttpServlet;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ExceptionResponseValues;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues;
import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues;
@ -105,6 +106,8 @@ public class TestFileController extends FreemarkerHttpServlet {
if((job != null) && TestFileController.knownJobs.contains(job.toLowerCase()))
jobKnown = "true";
FileHarvestJob jobObject = getJob(vreq, job);
Map<String, Object> body = new HashMap<String, Object>();
//body.put("uploadPostback", "false");
body.put("paramFirstUpload", PARAMETER_FIRST_UPLOAD);
@ -117,7 +120,8 @@ public class TestFileController extends FreemarkerHttpServlet {
body.put("job", job);
body.put("jobKnown", jobKnown);
body.put("postTo", POST_TO + "?" + PARAMETER_JOB + "=" + job);
body.put("jobSpecificHeader", getJob(vreq, job).getPageHeader());
body.put("jobSpecificHeader", jobObject.getPageHeader());
body.put("jobSpecificLinkHeader", jobObject.getLinkHeader());
return new TemplateResponseValues(TEMPLATE_DEFAULT, body);
} catch (Throwable e) {
log.error(e, e);
@ -175,9 +179,9 @@ public class TestFileController extends FreemarkerHttpServlet {
if(jobParameter == null)
log.error("No job specified.");
else if(jobParameter.equalsIgnoreCase(JOB_CSV_GRANT))
job = new CsvFileHarvestJob(vreq, "granttemplate.csv", "testCSVtoRDFgrant.sh", namespace, "Grant");
job = new CsvFileHarvestJob(vreq, "granttemplate.csv", "testCSVtoRDFgrant.sh", namespace, "Grant", new String[] {"http://vivoweb.org/ontology/core#Grant"});
else if(jobParameter.equalsIgnoreCase(JOB_CSV_PERSON))
job = new CsvFileHarvestJob(vreq, "persontemplate.csv", "testCSVtoRDFperson.sh", namespace, "Person");
job = new CsvFileHarvestJob(vreq, "persontemplate.csv", "testCSVtoRDFpeople.sh", namespace, "Person", new String[] {"http://xmlns.com/foaf/0.1/Person"});
else
log.error("Invalid job: " + jobParameter);
@ -419,8 +423,9 @@ public class TestFileController extends FreemarkerHttpServlet {
VitroRequest vreq = new VitroRequest(request);
ArrayList<String> newlyAddedUrls = new ArrayList<String>();
ArrayList<String> newlyAddedUris = new ArrayList<String>();
if(finished) {
ArrayList<String> newlyAddedUris = sessionIdToNewlyAddedUris.get(sessionId);
newlyAddedUris = sessionIdToNewlyAddedUris.get(sessionId);
if(newlyAddedUris != null) {
for(String uri : newlyAddedUris) {
@ -429,7 +434,9 @@ public class TestFileController extends FreemarkerHttpServlet {
String suffix = uri.substring(namespaceRoot.length());
String url = "display/" + suffix;
newlyAddedUrls.add(uri);
//newlyAddedUrls.add(uri);
//newlyAddedUrls.add(url);
newlyAddedUrls.add(UrlBuilder.getIndividualProfileUrl(uri, vreq));
}
}
}
@ -437,6 +444,7 @@ public class TestFileController extends FreemarkerHttpServlet {
JSONObject json = new JSONObject();
json.put("progressSinceLastCheck", progressSinceLastCheck);
json.put("finished", finished);
json.put("newlyAddedUris", newlyAddedUris);
json.put("newlyAddedUrls", newlyAddedUrls);
response.getWriter().write(json.toString());
@ -546,18 +554,22 @@ public class TestFileController extends FreemarkerHttpServlet {
log.error(additionsFile.getAbsolutePath());
try {
Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile);
NodeList descriptionNodes = document.getElementsByTagName("http://www.w3.org/1999/02/22-rdf-syntax-ns#Description");
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
Document document = factory.newDocumentBuilder().parse(additionsFile);
//Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile);
NodeList descriptionNodes = document.getElementsByTagNameNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "Description");
log.error("Description nodes: " + descriptionNodes.getLength());
int numNodes = descriptionNodes.getLength();
for(int i = 0; i < numNodes; i++) {
Node node = descriptionNodes.item(i);
ArrayList<String> types = getRdfTypes(node);
if(types.contains("http://vivoweb.org/ontology/core#Grant")) { //todo: generalize
NamedNodeMap attributes = node.getAttributes();
Node aboutAttribute = attributes.getNamedItem("http://www.w3.org/1999/02/22-rdf-syntax-ns#about");
Node aboutAttribute = attributes.getNamedItemNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about");
if(aboutAttribute != null) {
String value = aboutAttribute.getNodeValue();
newlyAddedUris.add(value);
@ -582,12 +594,16 @@ public class TestFileController extends FreemarkerHttpServlet {
for(int i = 0; i < numChildren; i++) {
Node child = children.item(i);
String name = child.getNodeName();
if(name.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) {
String namespace = child.getNamespaceURI();
String name = child.getLocalName();
String fullName = namespace + name;
if(fullName.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) {
NamedNodeMap attributes = child.getAttributes();
Node resourceAttribute = attributes.getNamedItem("http://www.w3.org/1999/02/22-rdf-syntax-ns#resource");
Node resourceAttribute = attributes.getNamedItemNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "resource");
if(resourceAttribute != null) {
//String attributeNamespace = resourceAttribute.getNamespaceURI();
String value = resourceAttribute.getNodeValue();
//rdfTypesList.add(attributeNamespace + value);
rdfTypesList.add(value);
}
}
@ -676,6 +692,9 @@ public class TestFileController extends FreemarkerHttpServlet {
catch(InterruptedException e) {
throw new IOException(e.getMessage(), e);
}
// int exitVal = 0;
// unsentLogLines.add("Screw the harvest, let's get to the last part");
File additionsFile = new File(this.additionsFilePath);
ArrayList<String> newlyAddedUris = extractNewlyAddedUris(additionsFile);