FileHarvestJob.java: added getScript() and performHarvest() methods
Harvester.java: added stringsToArray(), modified API methods to use it TestFileController.java: implemented preliminary harvest
This commit is contained in:
parent
3804cf4d70
commit
663090318d
3 changed files with 301 additions and 61 deletions
|
@ -18,5 +18,17 @@ interface FileHarvestJob {
|
|||
* @return null if success, message to be returned to the user if failure
|
||||
*/
|
||||
String validateUpload(File file);
|
||||
|
||||
/**
|
||||
* Gets the console script which can be used to run the harvest job.
|
||||
* @return the console script which can be used to run the harvest job
|
||||
*/
|
||||
String getScript();
|
||||
|
||||
/**
|
||||
* Runs a harvest on the files in the specified directory.
|
||||
* @param directory the directory containing files to harvest
|
||||
*/
|
||||
void performHarvest(File directory);
|
||||
}
|
||||
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.controller.harvester;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
/* //PLEASE SEE JAVADOC COMMENT FOR CLASS BELOW
|
||||
import org.vivoweb.harvester.diff.Diff;
|
||||
import org.vivoweb.harvester.fetch.CSVtoRDF;
|
||||
|
@ -52,99 +54,135 @@ import org.vivoweb.harvester.util.XPathTool;
|
|||
class Harvester {
|
||||
/*
|
||||
// diff
|
||||
public static void runDiff(String ... args) {
|
||||
Diff.main(args);
|
||||
public static void runDiff(Object ... args) {
|
||||
Diff.main(stringsToArray(args));
|
||||
}
|
||||
|
||||
// fetch
|
||||
public static void runCSVtoRDF(String ... args) {
|
||||
CSVtoRDF.main(args);
|
||||
public static void runCSVtoRDF(Object ... args) {
|
||||
CSVtoRDF.main(stringsToArray(args));
|
||||
}
|
||||
public static void runD2RMapFetch(String ... args) {
|
||||
D2RMapFetch.main(args);
|
||||
public static void runD2RMapFetch(Object ... args) {
|
||||
D2RMapFetch.main(stringsToArray(args));
|
||||
}
|
||||
public static void runJDBCFetch(String ... args) {
|
||||
JDBCFetch.main(args);
|
||||
public static void runJDBCFetch(Object ... args) {
|
||||
JDBCFetch.main(stringsToArray(args));
|
||||
}
|
||||
public static void runNLMJournalFetch(String ... args) {
|
||||
NLMJournalFetch.main(args);
|
||||
public static void runNLMJournalFetch(Object ... args) {
|
||||
NLMJournalFetch.main(stringsToArray(args));
|
||||
}
|
||||
public static void runOAIFetch(String ... args) {
|
||||
OAIFetch.main(args);
|
||||
public static void runOAIFetch(Object ... args) {
|
||||
OAIFetch.main(stringsToArray(args));
|
||||
}
|
||||
public static void runPubmedFetch(String ... args) {
|
||||
PubmedFetch.main(args);
|
||||
public static void runPubmedFetch(Object ... args) {
|
||||
PubmedFetch.main(stringsToArray(args));
|
||||
}
|
||||
public static void runPubmedHTTPFetch(String ... args) {
|
||||
PubmedHTTPFetch.main(args);
|
||||
public static void runPubmedHTTPFetch(Object ... args) {
|
||||
PubmedHTTPFetch.main(stringsToArray(args));
|
||||
}
|
||||
|
||||
// qualify
|
||||
public static void runChangeNamespace(String ... args) {
|
||||
ChangeNamespace.main(args);
|
||||
public static void runChangeNamespace(Object ... args) {
|
||||
ChangeNamespace.main(stringsToArray(args));
|
||||
}
|
||||
public static void runQualify(String ... args) {
|
||||
Qualify.main(args);
|
||||
public static void runQualify(Object ... args) {
|
||||
Qualify.main(stringsToArray(args));
|
||||
}
|
||||
public static void runRenameBlankNodes(String ... args) {
|
||||
RenameBlankNodes.main(args);
|
||||
public static void runRenameBlankNodes(Object ... args) {
|
||||
RenameBlankNodes.main(stringsToArray(args));
|
||||
}
|
||||
public static void runRenameResources(String ... args) {
|
||||
RenameResources.main(args);
|
||||
public static void runRenameResources(Object ... args) {
|
||||
RenameResources.main(stringsToArray(args));
|
||||
}
|
||||
public static void runSmush(String ... args) {
|
||||
Smush.main(args);
|
||||
public static void runSmush(Object ... args) {
|
||||
Smush.main(stringsToArray(args));
|
||||
}
|
||||
public static void runSplitProperty(String ... args) {
|
||||
SplitProperty.main(args);
|
||||
public static void runSplitProperty(Object ... args) {
|
||||
SplitProperty.main(stringsToArray(args));
|
||||
}
|
||||
|
||||
// score
|
||||
public static void runMatch(String ... args) {
|
||||
Match.main(args);
|
||||
public static void runMatch(Object ... args) {
|
||||
Match.main(stringsToArray(args));
|
||||
}
|
||||
public static void runPubmedScore(String ... args) {
|
||||
PubmedScore.main(args);
|
||||
public static void runPubmedScore(Object ... args) {
|
||||
PubmedScore.main(stringsToArray(args));
|
||||
}
|
||||
public static void runScore(String ... args) {
|
||||
Score.main(args);
|
||||
public static void runScore(Object ... args) {
|
||||
Score.main(stringsToArray(args));
|
||||
}
|
||||
|
||||
// transfer
|
||||
public static void transfer(String ... args) {
|
||||
Transfer.main(args);
|
||||
public static void runTransfer(Object ... args) {
|
||||
Transfer.main(stringsToArray(args));
|
||||
}
|
||||
|
||||
// translate
|
||||
public static void runGlozeTranslator(String ... args) {
|
||||
GlozeTranslator.main(args);
|
||||
public static void runGlozeTranslator(Object ... args) {
|
||||
GlozeTranslator.main(stringsToArray(args));
|
||||
}
|
||||
public static void runRunBibutils(String ... args) {
|
||||
RunBibutils.main(args);
|
||||
public static void runRunBibutils(Object ... args) {
|
||||
RunBibutils.main(stringsToArray(args));
|
||||
}
|
||||
public static void runSanitizeMODSXML(String ... args) {
|
||||
SanitizeMODSXML.main(args);
|
||||
public static void runSanitizeMODSXML(Object ... args) {
|
||||
SanitizeMODSXML.main(stringsToArray(args));
|
||||
}
|
||||
public static void runSPARQLTranslator(String ... args) {
|
||||
SPARQLTranslator.main(args);
|
||||
public static void runSPARQLTranslator(Object ... args) {
|
||||
SPARQLTranslator.main(stringsToArray(args));
|
||||
}
|
||||
public static void runXSLTranslator(String ... args) {
|
||||
XSLTranslator.main(args);
|
||||
public static void runXSLTranslator(Object ... args) {
|
||||
XSLTranslator.main(stringsToArray(args));
|
||||
}
|
||||
|
||||
// util
|
||||
public static void runCSVtoJDBC(String ... args) {
|
||||
CSVtoJDBC.main(args);
|
||||
public static void runCSVtoJDBC(Object ... args) {
|
||||
CSVtoJDBC.main(stringsToArray(args));
|
||||
}
|
||||
public static void runDatabaseClone(String ... args) {
|
||||
DatabaseClone.main(args);
|
||||
public static void runDatabaseClone(Object ... args) {
|
||||
DatabaseClone.main(stringsToArray(args));
|
||||
}
|
||||
public static void runMerge(String ... args) {
|
||||
Merge.main(args);
|
||||
public static void runMerge(Object ... args) {
|
||||
Merge.main(stringsToArray(args));
|
||||
}
|
||||
public static void runXPathTool(String ... args) {
|
||||
XPathTool.main(args);
|
||||
public static void runXPathTool(Object ... args) {
|
||||
XPathTool.main(stringsToArray(args));
|
||||
}
|
||||
*/
|
||||
|
||||
/**
|
||||
* Convenience method to expand the ability to use Java's "..." arg list. Harvester scripts frequently declare sub-macros,
|
||||
* so for example you might have:
|
||||
*
|
||||
* SCOREINPUT="-i $H2MODEL -ImodelName=$MODELNAME -IdbUrl=$MODELDBURL -IcheckEmpty=$CHECKEMPTY"
|
||||
* SCOREDATA="-s $H2MODEL -SmodelName=$SCOREDATANAME -SdbUrl=$SCOREDATADBURL -ScheckEmpty=$CHECKEMPTY"
|
||||
* SCOREMODELS="$SCOREINPUT -v $VIVOCONFIG -VcheckEmpty=$CHECKEMPTY $SCOREDATA -t $TEMPCOPYDIR -b $SCOREBATCHSIZE"
|
||||
* $Score $SCOREMODELS -AGrantNumber=$EQTEST -WGrantNumber=1.0 -FGrantNumber=$GRANTIDNUM -PGrantNumber=$GRANTIDNUM -n ${BASEURI}grant/
|
||||
*
|
||||
* In order to mimic this functionality for easy use in Java, this method has been created. It takes a "..." arg list of Object
|
||||
* objects, and returns an array of Strings. For each object, if it's an array of Strings, each String is added to the output
|
||||
* array. Otherwise, its toString() method is called and that value is added to the output array.
|
||||
*
|
||||
* It is intended to be used with a combination of String and String[] values, in any arbitrary order.
|
||||
*
|
||||
* All static Harvester methods in this class take an Object arg list rather than a String arg list, and automatically call
|
||||
* this method.
|
||||
*
|
||||
* @param args an array of objects, which ought to be a combination of String and String[] values, in any arbitrary order
|
||||
* @return all the strings put together as one array
|
||||
*/
|
||||
public static String[] stringsToArray(Object ... args) {
|
||||
ArrayList<String> allData = new ArrayList<String>();
|
||||
for(int i = 0; i < args.length; i++) {
|
||||
if(args[i] instanceof String[]) {
|
||||
String[] array = (String[])(args[i]);
|
||||
for(int j = 0; j < array.length; j++) {
|
||||
allData.add(array[j]);
|
||||
}
|
||||
} else {
|
||||
allData.add(args[i].toString());
|
||||
}
|
||||
}
|
||||
return allData.toArray(new String[allData.size()]);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
|
|
|
@ -2,8 +2,11 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.controller.harvester;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -235,8 +238,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
|||
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private void doHarvest()
|
||||
{
|
||||
private void doHarvest() {
|
||||
/*
|
||||
Harvest will entail:
|
||||
|
||||
|
@ -260,6 +262,8 @@ public class TestFileController extends FreemarkerHttpServlet {
|
|||
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Provides a way of throwing an exception whose message it is OK to display unedited to the user.
|
||||
*/
|
||||
|
@ -361,6 +365,192 @@ class CsvHarvestJob implements FileHarvestJob {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getScript()
|
||||
{
|
||||
String path = ""; //todo: complete
|
||||
File scriptTemplate = new File(path);
|
||||
|
||||
String scriptTemplateContents = readScriptTemplate(scriptTemplate);
|
||||
String replacements = performScriptTemplateReplacements(scriptTemplateContents);
|
||||
return replacements;
|
||||
}
|
||||
|
||||
|
||||
private String performScriptTemplateReplacements(String scriptTemplateContents) {
|
||||
String replacements = scriptTemplateContents;
|
||||
/*
|
||||
* What needs to be replaced?
|
||||
*
|
||||
* task directory name
|
||||
*/
|
||||
//todo: complete
|
||||
return replacements;
|
||||
}
|
||||
|
||||
|
||||
private String readScriptTemplate(File scriptTemplate) {
|
||||
String scriptTemplateContents = null;
|
||||
BufferedReader reader = null;
|
||||
try {
|
||||
int fileSize = (int)(scriptTemplate.length());
|
||||
char[] buffer = new char[fileSize];
|
||||
reader = new BufferedReader(new FileReader(scriptTemplate), fileSize);
|
||||
reader.read(buffer);
|
||||
scriptTemplateContents = new String(buffer);
|
||||
} catch (IOException e) {
|
||||
log.error(e, e);
|
||||
} finally {
|
||||
try {
|
||||
if(reader != null)
|
||||
reader.close();
|
||||
} catch(IOException e) {
|
||||
log.error(e, e);
|
||||
}
|
||||
}
|
||||
|
||||
return scriptTemplateContents;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void performHarvest(File directory) {
|
||||
|
||||
/* //COMMENTED OUT UNTIL HARVESTER INTEGRATION IS WORKING
|
||||
String vivoconfig = "config/models/vivo.xml";
|
||||
String scorebatchsize = "100";
|
||||
String checkempty = "true";
|
||||
String namespace = ""; //todo: get namespace
|
||||
String h2model = "config/models/h2-sdb.xml";
|
||||
String prevharvdburlbase = "jdbc:h2:harvested-data/prevHarvs/";
|
||||
String tfrh = "config/recordhandlers/h2-jdbc.xml";
|
||||
|
||||
String harvesterTask = "csv";
|
||||
|
||||
String basedir = "harvested-data/" + harvesterTask;
|
||||
|
||||
String rawrhdir = basedir + "/rh-raw";
|
||||
String rdfrhdir = basedir + "/rh-rdf";
|
||||
String modeldir = basedir + "/model";
|
||||
String scoredatadir = basedir + "/score-data";
|
||||
|
||||
String modeldburl = "jdbc:h2:" + modeldir + "/store";
|
||||
String scoredatadburl = "jdbc:h2:" + scoredatadir + "/store";
|
||||
|
||||
String modelname = "csvTempTransfer";
|
||||
String scoredataname = "csvScoreData";
|
||||
|
||||
String tempcopydir = basedir + "/temp-copy";
|
||||
|
||||
String[] scoreinput = Harvester.stringsToArray("-i", h2model, "-ImodelName=" + modelname, "-IdbUrl=" + modeldburl, "-IcheckEmpty=" + checkempty);
|
||||
String[] scoredata = Harvester.stringsToArray("-s", h2model, "-SmodelName=" + scoredataname, "-SdbUrl=" + scoredatadburl, "-ScheckEmpty=" + checkempty);
|
||||
String[] scoremodels = Harvester.stringsToArray(scoreinput, "-v", vivoconfig, "-VcheckEmpty=" + checkempty, scoredata, "-t", tempcopydir, "-b", scorebatchsize);
|
||||
|
||||
String[] cnflags = Harvester.stringsToArray(scoreinput, "-v", vivoconfig, "-n", namespace);
|
||||
|
||||
String eqtest = "org.vivoweb.harvester.score.algorithm.EqualityTest";
|
||||
|
||||
String grantidnum = "http://vivoweb.org/ontology/score#grantID";
|
||||
String rdfslabel = "http://www.w3.org/2000/01/rdf-schema#label";
|
||||
String personidnum = "http://vivoweb.org/ontology/score#personID";
|
||||
String deptidnum = "http://vivoweb.org/ontology/score#deptID";
|
||||
String rolein = "http://vivoweb.org/ontology/core#roleIn";
|
||||
String piroleof = "http://vivoweb.org/ontology/core#principalInvestigatorRoleOf";
|
||||
String copiroleof = "http://vivoweb.org/ontology/core#co-PrincipalInvestigatorRoleOf";
|
||||
String datetime = "http://vivoweb.org/ontology/core#dateTime";
|
||||
String baseuri = "http://vivoweb.org/harvest/csvfile/";
|
||||
|
||||
|
||||
|
||||
//execute fetch
|
||||
Harvester.runCSVtoRDF("-o", tfrh, "-O", "fileDir=" + rawrhdir, "-i", "filepath");
|
||||
|
||||
//execute translate
|
||||
Harvester.runXSLTranslator("-i", tfrh, "-IfileDir=" + rawrhdir, "-o", tfrh, "-OfileDir=" + rdfrhdir, "-x", "config/datamaps/csv-grant-to-vivo.xsl");
|
||||
|
||||
//execute transfer to import from record handler into local temp model
|
||||
Harvester.runTransfer("-o", h2model, "-OmodelName=" + modelname, "-OdbUrl=" + modeldburl, "-h", tfrh, "-HfileDir=" + rdfrhdir, "-n", namespace);
|
||||
|
||||
//smushes in-place(-r) on the Grant id THEN on the person ID then deptID
|
||||
Harvester.runSmush(scoreinput, "-P", grantidnum, "-P", personidnum, "-P", deptidnum, "-P", datetime, "-n", baseuri, "-r");
|
||||
|
||||
//scoring of Grants on GrantNumber
|
||||
Harvester.runScore(scoremodels, "-AGrantNumber=" + eqtest, "-WGrantNumber=1.0", "-FGrantNumber=" + grantidnum, "-PGrantNumber=" + grantidnum, "-n", baseuri + "grant/");
|
||||
|
||||
//scoring of people on PERSONIDNUM
|
||||
Harvester.runScore(scoremodels, "-Aufid=" + eqtest, "-Wufid=1.0", "-Fufid=" + personidnum, "-Pufid=" + personidnum, "-n", baseuri + "person/");
|
||||
|
||||
Harvester.runSmush(scoreinput, "-P", deptidnum, "-n", baseuri + "org/", "-r");
|
||||
|
||||
//scoring of orgs on DeptID
|
||||
Harvester.runScore(scoremodels, "-AdeptID=" + eqtest, "-WdeptID=1.0", "-FdeptID=" + deptidnum, "-PdeptID=" + deptidnum, "-n", baseuri + "org/");
|
||||
|
||||
|
||||
Harvester.runSmush(scoreinput, "-P", rdfslabel, "-n", baseuri + "sponsor/", "-r");
|
||||
|
||||
//scoring sponsors by labels
|
||||
Harvester.runScore(scoremodels, "-Alabel=" + eqtest, "-Wlabel=1.0", "-Flabel=" + rdfslabel, "-Plabel=" + rdfslabel, "-n", baseuri + "sponsor/");
|
||||
|
||||
//scoring of PI Roles
|
||||
String[] piuri = Harvester.stringsToArray("-Aperson=" + eqtest, "-Wperson=0.5", "-Fperson=" + piroleof, "-Pperson=" + piroleof);
|
||||
String[] granturi = Harvester.stringsToArray("-Agrant=" + eqtest, "-Wgrant=0.5", "-Fgrant=" + rolein, "-Pgrant=" + rolein);
|
||||
Harvester.runScore(scoremodels, piuri, granturi, "-n", baseuri + "piRole/");
|
||||
|
||||
//scoring of coPI Roles
|
||||
String[] copiuri = Harvester.stringsToArray("-Aperson=" + eqtest, "-Wperson=0.5", "-Fperson=" + copiroleof, "-Pperson=" + copiroleof);
|
||||
Harvester.runScore(scoremodels, copiuri, granturi, "-n", baseuri + "coPiRole/");
|
||||
|
||||
//find matches using scores and rename nodes to matching uri
|
||||
Harvester.runMatch(scoreinput, scoredata, "-b", scorebatchsize, "-t", "1.0", "-r");
|
||||
|
||||
//execute ChangeNamespace to get grants into current namespace
|
||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "grant/");
|
||||
|
||||
//execute ChangeNamespace to get orgs into current namespace
|
||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "org/");
|
||||
|
||||
//execute ChangeNamespace to get sponsors into current namespace
|
||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "sponsor/");
|
||||
|
||||
//execute ChangeNamespace to get people into current namespace
|
||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "person/");
|
||||
|
||||
//execute ChangeNamespace to get PI roles into current namespace
|
||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "piRole/");
|
||||
|
||||
//execute ChangeNamespace to get co-PI roles into current namespace
|
||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "coPiRole/");
|
||||
|
||||
//execute ChangeNamespace to get co-PI roles into current namespace
|
||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "timeInterval");
|
||||
|
||||
|
||||
//todo: we probably don't want to do prev harvest stuff for this
|
||||
String prevharvestmodel = "http://vivoweb.org/ingest/dsr";
|
||||
String addfile = basedir + "/additions.rdf.xml";
|
||||
String subfile = basedir + "/subtractions.rdf.xml";
|
||||
|
||||
//find Subtractions
|
||||
Harvester.runDiff("-m", h2model, "-MdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-McheckEmpty=" + checkempty, "-MmodelName=" + prevharvestmodel, "-s", h2model, "-ScheckEmpty=" + checkempty, "-SdbUrl=" + modeldburl, "-SmodelName=" + modelname, "-d", subfile);
|
||||
|
||||
//find Additions
|
||||
Harvester.runDiff("-m", h2model, "-McheckEmpty=" + checkempty, "-MdbUrl=" + modeldburl, "-MmodelName=" + modelname, "-s", h2model, "-ScheckEmpty=" + checkempty, "-SdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-SmodelName=" + prevharvestmodel, "-d", addfile);
|
||||
|
||||
//apply Subtractions to Previous model
|
||||
Harvester.runTransfer("-o", h2model, "-OdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-OcheckEmpty=" + checkempty, "-OmodelName=" + prevharvestmodel, "-r", subfile, "-m");
|
||||
|
||||
//apply Additions to Previous model
|
||||
Harvester.runTransfer("-o", h2model, "-OdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-OcheckEmpty=" + checkempty, "-OmodelName=" + prevharvestmodel, "-r", addfile);
|
||||
|
||||
//apply Subtractions to VIVO
|
||||
Harvester.runTransfer("-o", vivoconfig, "-OcheckEmpty=" + checkempty, "-r", subfile, "-m");
|
||||
|
||||
//apply Additions to VIVO
|
||||
Harvester.runTransfer("-o", vivoconfig, "-OcheckEmpty=" + checkempty, "-r", addfile);
|
||||
*/
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue