From 5ceda63a65117810a30d228dfe7a61cd879bcae8 Mon Sep 17 00:00:00 2001 From: mbarbier Date: Thu, 23 Jun 2011 20:41:21 +0000 Subject: [PATCH] Improved parsing of additions file, add some job-specific UI stuff --- .../freemarker/body/harvester/testfile.ftl | 4 +- .../harvester/CsvFileHarvestJob.java | 28 +++++++++++- .../controller/harvester/FileHarvestJob.java | 19 ++++++-- .../harvester/TestFileController.java | 45 +++++++++++++------ 4 files changed, 77 insertions(+), 19 deletions(-) diff --git a/productMods/templates/freemarker/body/harvester/testfile.ftl b/productMods/templates/freemarker/body/harvester/testfile.ftl index d9cfa7bc..ee35ec66 100644 --- a/productMods/templates/freemarker/body/harvester/testfile.ftl +++ b/productMods/templates/freemarker/body/harvester/testfile.ftl @@ -58,7 +58,7 @@ for(var i = 0; i < json.newlyAddedUrls.length; i++) { var newLi = document.createElement("li"); - newLi.innerHTML = "" + json.newlyAddedUrls[i] + ""; + newLi.innerHTML = "" + json.newlyAddedUris[i] + ""; importedGrants.appendChild(newLi); } } @@ -233,7 +233,7 @@
-
Imported grants
+
${jobSpecificLinkHeader}
diff --git a/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/CsvFileHarvestJob.java b/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/CsvFileHarvestJob.java index bff4bed7..2a654940 100644 --- a/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/CsvFileHarvestJob.java +++ b/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/CsvFileHarvestJob.java @@ -7,6 +7,7 @@ import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; @@ -52,17 +53,23 @@ class CsvFileHarvestJob implements FileHarvestJob { */ private final String friendlyName; + /** + * An array of rdf:type values which will be used for links. + */ + private final String[] rdfTypesForLinks; + /** * Constructor. * @param templateFileName just the name of the template file. The directory is assumed to be standard. */ - public CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName) { + public CsvFileHarvestJob(VitroRequest vreq, String templateFileName, String scriptFileName, String namespace, String friendlyName, String[] rdfTypesForLinks) { this.vreq = vreq; this.templateFile = new File(getTemplateFileDirectory() + templateFileName); this.scriptFile = new File(getScriptFileDirectory() + scriptFileName); log.error(getTemplateFileDirectory() + templateFileName); this.namespace = namespace; this.friendlyName = friendlyName; + this.rdfTypesForLinks = Arrays.copyOf(rdfTypesForLinks, rdfTypesForLinks.length); } /** @@ -193,8 +200,10 @@ class CsvFileHarvestJob implements FileHarvestJob { private String performScriptTemplateReplacements(String scriptTemplateContents) { String replacements = scriptTemplateContents; + String workingDirectory = TestFileController.getHarvesterPath(); String fileDirectory = TestFileController.getUploadPath(vreq); + replacements = replacements.replace("${WORKING_DIRECTORY}", workingDirectory); replacements = replacements.replace("${UPLOADS_FOLDER}", fileDirectory); /* @@ -240,12 +249,29 @@ class CsvFileHarvestJob implements FileHarvestJob { public String getPageHeader() { return "Harvest " + this.friendlyName + " data from CSV file(s)"; } + + @Override + public String getLinkHeader() { + return "Imported " + pluralize(this.friendlyName); + } + private String pluralize(String input) { + String plural = input + "s"; + if(input.endsWith("s") || input.endsWith("x")) + plural = input + "es"; + return plural; + } + @Override public String getTemplateFilePath() { return this.templateFile.getPath(); } + @Override + public String[] getRdfTypesForLinks() { + return Arrays.copyOf(this.rdfTypesForLinks, this.rdfTypesForLinks.length); + } + } diff --git a/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/FileHarvestJob.java b/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/FileHarvestJob.java index 8c7db7c7..b26c5a54 100644 --- a/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/FileHarvestJob.java +++ b/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/FileHarvestJob.java @@ -24,23 +24,36 @@ interface FileHarvestJob { * @return the path on the server of the file which the user can download to serve as a guide for what to upload. */ String getTemplateFilePath(); - + /** * Gets the console script which can be used to run the harvest job. * @return the console script which can be used to run the harvest job */ String getScript(); - + /** * The path to the file containing the RDF/XML triples that get added to VIVO. * @return the path to the file containing the RDF/XML triples that get added to VIVO */ String getAdditionsFilePath(); - + /** * A heading to be shown at the top of the page. * @return a heading to be shown at the top of the page */ String getPageHeader(); + + /** + * A heading to be shown above the area where links to profiles of newly-harvested entities are listed. + * @return a heading to be shown above the area where links to profiles of newly-harvested entities are listed + */ + String getLinkHeader(); + + /** + * Get an array of fully-qualified rdf:type values. When the harvest run is complete, any new entities which have an rdf:type represented + * in this array will have a link displayed on the page allowing the user to visit the new profile. + * @return an array of types to be used in links + */ + String[] getRdfTypesForLinks(); } diff --git a/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/TestFileController.java b/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/TestFileController.java index b0d9a8ed..61120253 100644 --- a/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/TestFileController.java +++ b/src/edu/cornell/mannlib/vitro/webapp/controller/harvester/TestFileController.java @@ -38,6 +38,7 @@ import org.w3c.dom.NodeList; import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.FreemarkerHttpServlet; +import edu.cornell.mannlib.vitro.webapp.controller.freemarker.UrlBuilder; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ExceptionResponseValues; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.ResponseValues; import edu.cornell.mannlib.vitro.webapp.controller.freemarker.responsevalues.TemplateResponseValues; @@ -105,6 +106,8 @@ public class TestFileController extends FreemarkerHttpServlet { if((job != null) && TestFileController.knownJobs.contains(job.toLowerCase())) jobKnown = "true"; + FileHarvestJob jobObject = getJob(vreq, job); + Map body = new HashMap(); //body.put("uploadPostback", "false"); body.put("paramFirstUpload", PARAMETER_FIRST_UPLOAD); @@ -117,7 +120,8 @@ public class TestFileController extends FreemarkerHttpServlet { body.put("job", job); body.put("jobKnown", jobKnown); body.put("postTo", POST_TO + "?" + PARAMETER_JOB + "=" + job); - body.put("jobSpecificHeader", getJob(vreq, job).getPageHeader()); + body.put("jobSpecificHeader", jobObject.getPageHeader()); + body.put("jobSpecificLinkHeader", jobObject.getLinkHeader()); return new TemplateResponseValues(TEMPLATE_DEFAULT, body); } catch (Throwable e) { log.error(e, e); @@ -175,9 +179,9 @@ public class TestFileController extends FreemarkerHttpServlet { if(jobParameter == null) log.error("No job specified."); else if(jobParameter.equalsIgnoreCase(JOB_CSV_GRANT)) - job = new CsvFileHarvestJob(vreq, "granttemplate.csv", "testCSVtoRDFgrant.sh", namespace, "Grant"); + job = new CsvFileHarvestJob(vreq, "granttemplate.csv", "testCSVtoRDFgrant.sh", namespace, "Grant", new String[] {"http://vivoweb.org/ontology/core#Grant"}); else if(jobParameter.equalsIgnoreCase(JOB_CSV_PERSON)) - job = new CsvFileHarvestJob(vreq, "persontemplate.csv", "testCSVtoRDFperson.sh", namespace, "Person"); + job = new CsvFileHarvestJob(vreq, "persontemplate.csv", "testCSVtoRDFpeople.sh", namespace, "Person", new String[] {"http://xmlns.com/foaf/0.1/Person"}); else log.error("Invalid job: " + jobParameter); @@ -419,8 +423,9 @@ public class TestFileController extends FreemarkerHttpServlet { VitroRequest vreq = new VitroRequest(request); ArrayList newlyAddedUrls = new ArrayList(); + ArrayList newlyAddedUris = new ArrayList(); if(finished) { - ArrayList newlyAddedUris = sessionIdToNewlyAddedUris.get(sessionId); + newlyAddedUris = sessionIdToNewlyAddedUris.get(sessionId); if(newlyAddedUris != null) { for(String uri : newlyAddedUris) { @@ -429,7 +434,9 @@ public class TestFileController extends FreemarkerHttpServlet { String suffix = uri.substring(namespaceRoot.length()); String url = "display/" + suffix; - newlyAddedUrls.add(uri); + //newlyAddedUrls.add(uri); + //newlyAddedUrls.add(url); + newlyAddedUrls.add(UrlBuilder.getIndividualProfileUrl(uri, vreq)); } } } @@ -437,6 +444,7 @@ public class TestFileController extends FreemarkerHttpServlet { JSONObject json = new JSONObject(); json.put("progressSinceLastCheck", progressSinceLastCheck); json.put("finished", finished); + json.put("newlyAddedUris", newlyAddedUris); json.put("newlyAddedUrls", newlyAddedUrls); response.getWriter().write(json.toString()); @@ -546,18 +554,22 @@ public class TestFileController extends FreemarkerHttpServlet { log.error(additionsFile.getAbsolutePath()); try { - Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile); - NodeList descriptionNodes = document.getElementsByTagName("http://www.w3.org/1999/02/22-rdf-syntax-ns#Description"); - + DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); + factory.setNamespaceAware(true); + Document document = factory.newDocumentBuilder().parse(additionsFile); + //Document document = DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(additionsFile); + NodeList descriptionNodes = document.getElementsByTagNameNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "Description"); + log.error("Description nodes: " + descriptionNodes.getLength()); + int numNodes = descriptionNodes.getLength(); for(int i = 0; i < numNodes; i++) { Node node = descriptionNodes.item(i); ArrayList types = getRdfTypes(node); if(types.contains("http://vivoweb.org/ontology/core#Grant")) { //todo: generalize - + NamedNodeMap attributes = node.getAttributes(); - Node aboutAttribute = attributes.getNamedItem("http://www.w3.org/1999/02/22-rdf-syntax-ns#about"); + Node aboutAttribute = attributes.getNamedItemNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "about"); if(aboutAttribute != null) { String value = aboutAttribute.getNodeValue(); newlyAddedUris.add(value); @@ -582,12 +594,16 @@ public class TestFileController extends FreemarkerHttpServlet { for(int i = 0; i < numChildren; i++) { Node child = children.item(i); - String name = child.getNodeName(); - if(name.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) { + String namespace = child.getNamespaceURI(); + String name = child.getLocalName(); + String fullName = namespace + name; + if(fullName.equals("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")) { NamedNodeMap attributes = child.getAttributes(); - Node resourceAttribute = attributes.getNamedItem("http://www.w3.org/1999/02/22-rdf-syntax-ns#resource"); + Node resourceAttribute = attributes.getNamedItemNS("http://www.w3.org/1999/02/22-rdf-syntax-ns#", "resource"); if(resourceAttribute != null) { + //String attributeNamespace = resourceAttribute.getNamespaceURI(); String value = resourceAttribute.getNodeValue(); + //rdfTypesList.add(attributeNamespace + value); rdfTypesList.add(value); } } @@ -676,6 +692,9 @@ public class TestFileController extends FreemarkerHttpServlet { catch(InterruptedException e) { throw new IOException(e.getMessage(), e); } + +// int exitVal = 0; +// unsentLogLines.add("Screw the harvest, let's get to the last part"); File additionsFile = new File(this.additionsFilePath); ArrayList newlyAddedUris = extractNewlyAddedUris(additionsFile);