FileHarvestJob.java: added getScript() and performHarvest() methods

Harvester.java: added stringsToArray(), modified API methods to use it
TestFileController.java: implemented preliminary harvest
This commit is contained in:
mbarbier 2011-05-10 20:12:46 +00:00
parent 3804cf4d70
commit 663090318d
3 changed files with 301 additions and 61 deletions

View file

@ -18,5 +18,17 @@ interface FileHarvestJob {
* @return null if success, message to be returned to the user if failure
*/
String validateUpload(File file);
/**
* Gets the console script which can be used to run the harvest job.
* @return the console script which can be used to run the harvest job
*/
String getScript();
/**
* Runs a harvest on the files in the specified directory.
* @param directory the directory containing files to harvest
*/
void performHarvest(File directory);
}

View file

@ -2,6 +2,8 @@
package edu.cornell.mannlib.vitro.webapp.controller.harvester;
import java.util.ArrayList;
/* //PLEASE SEE JAVADOC COMMENT FOR CLASS BELOW
import org.vivoweb.harvester.diff.Diff;
import org.vivoweb.harvester.fetch.CSVtoRDF;
@ -52,99 +54,135 @@ import org.vivoweb.harvester.util.XPathTool;
class Harvester {
/*
// diff
public static void runDiff(String ... args) {
Diff.main(args);
public static void runDiff(Object ... args) {
Diff.main(stringsToArray(args));
}
// fetch
public static void runCSVtoRDF(String ... args) {
CSVtoRDF.main(args);
public static void runCSVtoRDF(Object ... args) {
CSVtoRDF.main(stringsToArray(args));
}
public static void runD2RMapFetch(String ... args) {
D2RMapFetch.main(args);
public static void runD2RMapFetch(Object ... args) {
D2RMapFetch.main(stringsToArray(args));
}
public static void runJDBCFetch(String ... args) {
JDBCFetch.main(args);
public static void runJDBCFetch(Object ... args) {
JDBCFetch.main(stringsToArray(args));
}
public static void runNLMJournalFetch(String ... args) {
NLMJournalFetch.main(args);
public static void runNLMJournalFetch(Object ... args) {
NLMJournalFetch.main(stringsToArray(args));
}
public static void runOAIFetch(String ... args) {
OAIFetch.main(args);
public static void runOAIFetch(Object ... args) {
OAIFetch.main(stringsToArray(args));
}
public static void runPubmedFetch(String ... args) {
PubmedFetch.main(args);
public static void runPubmedFetch(Object ... args) {
PubmedFetch.main(stringsToArray(args));
}
public static void runPubmedHTTPFetch(String ... args) {
PubmedHTTPFetch.main(args);
public static void runPubmedHTTPFetch(Object ... args) {
PubmedHTTPFetch.main(stringsToArray(args));
}
// qualify
public static void runChangeNamespace(String ... args) {
ChangeNamespace.main(args);
public static void runChangeNamespace(Object ... args) {
ChangeNamespace.main(stringsToArray(args));
}
public static void runQualify(String ... args) {
Qualify.main(args);
public static void runQualify(Object ... args) {
Qualify.main(stringsToArray(args));
}
public static void runRenameBlankNodes(String ... args) {
RenameBlankNodes.main(args);
public static void runRenameBlankNodes(Object ... args) {
RenameBlankNodes.main(stringsToArray(args));
}
public static void runRenameResources(String ... args) {
RenameResources.main(args);
public static void runRenameResources(Object ... args) {
RenameResources.main(stringsToArray(args));
}
public static void runSmush(String ... args) {
Smush.main(args);
public static void runSmush(Object ... args) {
Smush.main(stringsToArray(args));
}
public static void runSplitProperty(String ... args) {
SplitProperty.main(args);
public static void runSplitProperty(Object ... args) {
SplitProperty.main(stringsToArray(args));
}
// score
public static void runMatch(String ... args) {
Match.main(args);
public static void runMatch(Object ... args) {
Match.main(stringsToArray(args));
}
public static void runPubmedScore(String ... args) {
PubmedScore.main(args);
public static void runPubmedScore(Object ... args) {
PubmedScore.main(stringsToArray(args));
}
public static void runScore(String ... args) {
Score.main(args);
public static void runScore(Object ... args) {
Score.main(stringsToArray(args));
}
// transfer
public static void transfer(String ... args) {
Transfer.main(args);
public static void runTransfer(Object ... args) {
Transfer.main(stringsToArray(args));
}
// translate
public static void runGlozeTranslator(String ... args) {
GlozeTranslator.main(args);
public static void runGlozeTranslator(Object ... args) {
GlozeTranslator.main(stringsToArray(args));
}
public static void runRunBibutils(String ... args) {
RunBibutils.main(args);
public static void runRunBibutils(Object ... args) {
RunBibutils.main(stringsToArray(args));
}
public static void runSanitizeMODSXML(String ... args) {
SanitizeMODSXML.main(args);
public static void runSanitizeMODSXML(Object ... args) {
SanitizeMODSXML.main(stringsToArray(args));
}
public static void runSPARQLTranslator(String ... args) {
SPARQLTranslator.main(args);
public static void runSPARQLTranslator(Object ... args) {
SPARQLTranslator.main(stringsToArray(args));
}
public static void runXSLTranslator(String ... args) {
XSLTranslator.main(args);
public static void runXSLTranslator(Object ... args) {
XSLTranslator.main(stringsToArray(args));
}
// util
public static void runCSVtoJDBC(String ... args) {
CSVtoJDBC.main(args);
public static void runCSVtoJDBC(Object ... args) {
CSVtoJDBC.main(stringsToArray(args));
}
public static void runDatabaseClone(String ... args) {
DatabaseClone.main(args);
public static void runDatabaseClone(Object ... args) {
DatabaseClone.main(stringsToArray(args));
}
public static void runMerge(String ... args) {
Merge.main(args);
public static void runMerge(Object ... args) {
Merge.main(stringsToArray(args));
}
public static void runXPathTool(String ... args) {
XPathTool.main(args);
public static void runXPathTool(Object ... args) {
XPathTool.main(stringsToArray(args));
}
*/
/**
* Convenience method to expand the ability to use Java's "..." arg list. Harvester scripts frequently declare sub-macros,
* so for example you might have:
*
* SCOREINPUT="-i $H2MODEL -ImodelName=$MODELNAME -IdbUrl=$MODELDBURL -IcheckEmpty=$CHECKEMPTY"
* SCOREDATA="-s $H2MODEL -SmodelName=$SCOREDATANAME -SdbUrl=$SCOREDATADBURL -ScheckEmpty=$CHECKEMPTY"
* SCOREMODELS="$SCOREINPUT -v $VIVOCONFIG -VcheckEmpty=$CHECKEMPTY $SCOREDATA -t $TEMPCOPYDIR -b $SCOREBATCHSIZE"
* $Score $SCOREMODELS -AGrantNumber=$EQTEST -WGrantNumber=1.0 -FGrantNumber=$GRANTIDNUM -PGrantNumber=$GRANTIDNUM -n ${BASEURI}grant/
*
* In order to mimic this functionality for easy use in Java, this method has been created. It takes a "..." arg list of Object
* objects, and returns an array of Strings. For each object, if it's an array of Strings, each String is added to the output
* array. Otherwise, its toString() method is called and that value is added to the output array.
*
* It is intended to be used with a combination of String and String[] values, in any arbitrary order.
*
* All static Harvester methods in this class take an Object arg list rather than a String arg list, and automatically call
* this method.
*
* @param args an array of objects, which ought to be a combination of String and String[] values, in any arbitrary order
* @return all the strings put together as one array
*/
public static String[] stringsToArray(Object ... args) {
ArrayList<String> allData = new ArrayList<String>();
for(int i = 0; i < args.length; i++) {
if(args[i] instanceof String[]) {
String[] array = (String[])(args[i]);
for(int j = 0; j < array.length; j++) {
allData.add(array[j]);
}
} else {
allData.add(args[i].toString());
}
}
return allData.toArray(new String[allData.size()]);
}
}

View file

@ -2,8 +2,11 @@
package edu.cornell.mannlib.vitro.webapp.controller.harvester;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -235,8 +238,7 @@ public class TestFileController extends FreemarkerHttpServlet {
@SuppressWarnings("unused")
private void doHarvest()
{
private void doHarvest() {
/*
Harvest will entail:
@ -260,6 +262,8 @@ public class TestFileController extends FreemarkerHttpServlet {
/**
* Provides a way of throwing an exception whose message it is OK to display unedited to the user.
*/
@ -361,6 +365,192 @@ class CsvHarvestJob implements FileHarvestJob {
}
return null;
}
@Override
public String getScript()
{
String path = ""; //todo: complete
File scriptTemplate = new File(path);
String scriptTemplateContents = readScriptTemplate(scriptTemplate);
String replacements = performScriptTemplateReplacements(scriptTemplateContents);
return replacements;
}
private String performScriptTemplateReplacements(String scriptTemplateContents) {
String replacements = scriptTemplateContents;
/*
* What needs to be replaced?
*
* task directory name
*/
//todo: complete
return replacements;
}
private String readScriptTemplate(File scriptTemplate) {
String scriptTemplateContents = null;
BufferedReader reader = null;
try {
int fileSize = (int)(scriptTemplate.length());
char[] buffer = new char[fileSize];
reader = new BufferedReader(new FileReader(scriptTemplate), fileSize);
reader.read(buffer);
scriptTemplateContents = new String(buffer);
} catch (IOException e) {
log.error(e, e);
} finally {
try {
if(reader != null)
reader.close();
} catch(IOException e) {
log.error(e, e);
}
}
return scriptTemplateContents;
}
@Override
public void performHarvest(File directory) {
/* //COMMENTED OUT UNTIL HARVESTER INTEGRATION IS WORKING
String vivoconfig = "config/models/vivo.xml";
String scorebatchsize = "100";
String checkempty = "true";
String namespace = ""; //todo: get namespace
String h2model = "config/models/h2-sdb.xml";
String prevharvdburlbase = "jdbc:h2:harvested-data/prevHarvs/";
String tfrh = "config/recordhandlers/h2-jdbc.xml";
String harvesterTask = "csv";
String basedir = "harvested-data/" + harvesterTask;
String rawrhdir = basedir + "/rh-raw";
String rdfrhdir = basedir + "/rh-rdf";
String modeldir = basedir + "/model";
String scoredatadir = basedir + "/score-data";
String modeldburl = "jdbc:h2:" + modeldir + "/store";
String scoredatadburl = "jdbc:h2:" + scoredatadir + "/store";
String modelname = "csvTempTransfer";
String scoredataname = "csvScoreData";
String tempcopydir = basedir + "/temp-copy";
String[] scoreinput = Harvester.stringsToArray("-i", h2model, "-ImodelName=" + modelname, "-IdbUrl=" + modeldburl, "-IcheckEmpty=" + checkempty);
String[] scoredata = Harvester.stringsToArray("-s", h2model, "-SmodelName=" + scoredataname, "-SdbUrl=" + scoredatadburl, "-ScheckEmpty=" + checkempty);
String[] scoremodels = Harvester.stringsToArray(scoreinput, "-v", vivoconfig, "-VcheckEmpty=" + checkempty, scoredata, "-t", tempcopydir, "-b", scorebatchsize);
String[] cnflags = Harvester.stringsToArray(scoreinput, "-v", vivoconfig, "-n", namespace);
String eqtest = "org.vivoweb.harvester.score.algorithm.EqualityTest";
String grantidnum = "http://vivoweb.org/ontology/score#grantID";
String rdfslabel = "http://www.w3.org/2000/01/rdf-schema#label";
String personidnum = "http://vivoweb.org/ontology/score#personID";
String deptidnum = "http://vivoweb.org/ontology/score#deptID";
String rolein = "http://vivoweb.org/ontology/core#roleIn";
String piroleof = "http://vivoweb.org/ontology/core#principalInvestigatorRoleOf";
String copiroleof = "http://vivoweb.org/ontology/core#co-PrincipalInvestigatorRoleOf";
String datetime = "http://vivoweb.org/ontology/core#dateTime";
String baseuri = "http://vivoweb.org/harvest/csvfile/";
//execute fetch
Harvester.runCSVtoRDF("-o", tfrh, "-O", "fileDir=" + rawrhdir, "-i", "filepath");
//execute translate
Harvester.runXSLTranslator("-i", tfrh, "-IfileDir=" + rawrhdir, "-o", tfrh, "-OfileDir=" + rdfrhdir, "-x", "config/datamaps/csv-grant-to-vivo.xsl");
//execute transfer to import from record handler into local temp model
Harvester.runTransfer("-o", h2model, "-OmodelName=" + modelname, "-OdbUrl=" + modeldburl, "-h", tfrh, "-HfileDir=" + rdfrhdir, "-n", namespace);
//smushes in-place(-r) on the Grant id THEN on the person ID then deptID
Harvester.runSmush(scoreinput, "-P", grantidnum, "-P", personidnum, "-P", deptidnum, "-P", datetime, "-n", baseuri, "-r");
//scoring of Grants on GrantNumber
Harvester.runScore(scoremodels, "-AGrantNumber=" + eqtest, "-WGrantNumber=1.0", "-FGrantNumber=" + grantidnum, "-PGrantNumber=" + grantidnum, "-n", baseuri + "grant/");
//scoring of people on PERSONIDNUM
Harvester.runScore(scoremodels, "-Aufid=" + eqtest, "-Wufid=1.0", "-Fufid=" + personidnum, "-Pufid=" + personidnum, "-n", baseuri + "person/");
Harvester.runSmush(scoreinput, "-P", deptidnum, "-n", baseuri + "org/", "-r");
//scoring of orgs on DeptID
Harvester.runScore(scoremodels, "-AdeptID=" + eqtest, "-WdeptID=1.0", "-FdeptID=" + deptidnum, "-PdeptID=" + deptidnum, "-n", baseuri + "org/");
Harvester.runSmush(scoreinput, "-P", rdfslabel, "-n", baseuri + "sponsor/", "-r");
//scoring sponsors by labels
Harvester.runScore(scoremodels, "-Alabel=" + eqtest, "-Wlabel=1.0", "-Flabel=" + rdfslabel, "-Plabel=" + rdfslabel, "-n", baseuri + "sponsor/");
//scoring of PI Roles
String[] piuri = Harvester.stringsToArray("-Aperson=" + eqtest, "-Wperson=0.5", "-Fperson=" + piroleof, "-Pperson=" + piroleof);
String[] granturi = Harvester.stringsToArray("-Agrant=" + eqtest, "-Wgrant=0.5", "-Fgrant=" + rolein, "-Pgrant=" + rolein);
Harvester.runScore(scoremodels, piuri, granturi, "-n", baseuri + "piRole/");
//scoring of coPI Roles
String[] copiuri = Harvester.stringsToArray("-Aperson=" + eqtest, "-Wperson=0.5", "-Fperson=" + copiroleof, "-Pperson=" + copiroleof);
Harvester.runScore(scoremodels, copiuri, granturi, "-n", baseuri + "coPiRole/");
//find matches using scores and rename nodes to matching uri
Harvester.runMatch(scoreinput, scoredata, "-b", scorebatchsize, "-t", "1.0", "-r");
//execute ChangeNamespace to get grants into current namespace
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "grant/");
//execute ChangeNamespace to get orgs into current namespace
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "org/");
//execute ChangeNamespace to get sponsors into current namespace
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "sponsor/");
//execute ChangeNamespace to get people into current namespace
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "person/");
//execute ChangeNamespace to get PI roles into current namespace
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "piRole/");
//execute ChangeNamespace to get co-PI roles into current namespace
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "coPiRole/");
//execute ChangeNamespace to get co-PI roles into current namespace
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "timeInterval");
//todo: we probably don't want to do prev harvest stuff for this
String prevharvestmodel = "http://vivoweb.org/ingest/dsr";
String addfile = basedir + "/additions.rdf.xml";
String subfile = basedir + "/subtractions.rdf.xml";
//find Subtractions
Harvester.runDiff("-m", h2model, "-MdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-McheckEmpty=" + checkempty, "-MmodelName=" + prevharvestmodel, "-s", h2model, "-ScheckEmpty=" + checkempty, "-SdbUrl=" + modeldburl, "-SmodelName=" + modelname, "-d", subfile);
//find Additions
Harvester.runDiff("-m", h2model, "-McheckEmpty=" + checkempty, "-MdbUrl=" + modeldburl, "-MmodelName=" + modelname, "-s", h2model, "-ScheckEmpty=" + checkempty, "-SdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-SmodelName=" + prevharvestmodel, "-d", addfile);
//apply Subtractions to Previous model
Harvester.runTransfer("-o", h2model, "-OdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-OcheckEmpty=" + checkempty, "-OmodelName=" + prevharvestmodel, "-r", subfile, "-m");
//apply Additions to Previous model
Harvester.runTransfer("-o", h2model, "-OdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-OcheckEmpty=" + checkempty, "-OmodelName=" + prevharvestmodel, "-r", addfile);
//apply Subtractions to VIVO
Harvester.runTransfer("-o", vivoconfig, "-OcheckEmpty=" + checkempty, "-r", subfile, "-m");
//apply Additions to VIVO
Harvester.runTransfer("-o", vivoconfig, "-OcheckEmpty=" + checkempty, "-r", addfile);
*/
}
}