testfile.ftl and TestFileController.java: modifications for calling a script instead of directly calling Harvester
This commit is contained in:
parent
bcdc7ee981
commit
09daaa58ac
2 changed files with 315 additions and 464 deletions
|
@ -2,34 +2,63 @@
|
||||||
|
|
||||||
<script type="text/javascript">
|
<script type="text/javascript">
|
||||||
|
|
||||||
var alreadyDone = false;
|
var harvestProgressResponse;
|
||||||
function doFunStuff()
|
function doFunStuff()
|
||||||
{
|
{
|
||||||
if(!alreadyDone)
|
doHarvest();
|
||||||
|
}
|
||||||
|
|
||||||
|
function doHarvest()
|
||||||
{
|
{
|
||||||
alreadyDone = true;
|
var request = createRequest();
|
||||||
document.getElementById("progress").style.visibility = "visible"
|
request.onreadystatechange=function() {
|
||||||
document.getElementById("progressUploading").style.visibility = "visible"
|
if(request.readyState == 4 && request.status == 200) {
|
||||||
window.setTimeout(uploadingSuccess, 2000);
|
harvestProgressResponse = request.responseText;
|
||||||
|
window.setTimeout(continueHarvest, 1000);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
function uploadingSuccess()
|
request.open("POST", "/vivo/harvester/testfile", true);
|
||||||
|
request.setRequestHeader("content-type","application/x-www-form-urlencoded");
|
||||||
|
request.send("${paramIsHarvestClick}=true");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
function continueHarvest()
|
||||||
{
|
{
|
||||||
document.getElementById("progressUploadingSuccess").style.visibility = "visible"
|
var response = harvestProgressResponse;
|
||||||
document.getElementById("progressGenerating").style.visibility = "visible"
|
var json = eval("(" + response + ")");
|
||||||
window.setTimeout(generatingSuccess, 2000);
|
|
||||||
|
if(!json.finished) {
|
||||||
|
var logAppend = json.progressSinceLastCheck;
|
||||||
|
var progressTextArea = document.getElementById("progressTextArea");
|
||||||
|
progressTextArea.innerHTML = progressTextArea.innerHTML + logAppend;
|
||||||
|
progressTextArea.scrollTop = progressTextArea.scrollHeight;
|
||||||
|
|
||||||
|
var request = createRequest();
|
||||||
|
request.onreadystatechange=function() {
|
||||||
|
if(request.readyState == 4 && request.status == 200) {
|
||||||
|
harvestProgressResponse = request.responseText;
|
||||||
|
window.setTimeout(continueHarvest, 1000);
|
||||||
}
|
}
|
||||||
function generatingSuccess()
|
|
||||||
{
|
|
||||||
document.getElementById("progressGeneratingSuccess").style.visibility = "visible"
|
|
||||||
document.getElementById("progressExecuting").style.visibility = "visible"
|
|
||||||
window.setTimeout(executingSuccess, 2000);
|
|
||||||
}
|
}
|
||||||
function executingSuccess()
|
request.open("POST", "/vivo/harvester/testfile", true);
|
||||||
{
|
request.setRequestHeader("Content-type","application/x-www-form-urlencoded");
|
||||||
document.getElementById("progressExecutingSuccess").style.visibility = "visible"
|
request.send("${paramIsHarvestClick}=false");
|
||||||
document.getElementById("results").style.visibility = "visible"
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
function createRequest() {
|
||||||
|
var request;
|
||||||
|
if (window.XMLHttpRequest) {
|
||||||
|
request = new XMLHttpRequest();
|
||||||
|
} else {
|
||||||
|
request = new ActiveXObject("Microsoft.XMLHTTP");
|
||||||
|
}
|
||||||
|
return request;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function toggleCollapsibleDiv(divName)
|
function toggleCollapsibleDiv(divName)
|
||||||
{
|
{
|
||||||
|
@ -72,6 +101,7 @@
|
||||||
|
|
||||||
function init()
|
function init()
|
||||||
{
|
{
|
||||||
|
document.getElementById("${paramFirstUpload}").value = "true";
|
||||||
document.getElementById("fileUploadForm").onsubmit = function()
|
document.getElementById("fileUploadForm").onsubmit = function()
|
||||||
{
|
{
|
||||||
document.getElementById("fileUploadForm").target = "uploadTarget";
|
document.getElementById("fileUploadForm").target = "uploadTarget";
|
||||||
|
@ -168,270 +198,14 @@
|
||||||
</div>
|
</div>
|
||||||
<div class="clearBothDiv" />
|
<div class="clearBothDiv" />
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<div id="step5" class="testfile-step">
|
<div id="step5" class="testfile-step">
|
||||||
<h3 class="testfile-step-header">Step 5</h3>
|
<h3 class="testfile-step-header">Step 5</h3>
|
||||||
<div id="step5-inner" class="testfile-step-body">
|
<div id="step5-inner" class="testfile-step-body">
|
||||||
<h4 class="testfile-step-subheader">View results</h4>
|
<h4 class="testfile-step-subheader">View results</h4>
|
||||||
<div id="progress" style="visibility:hidden">
|
<div id="progress">
|
||||||
<ul>
|
<textarea cols="100" rows="50" readonly="readonly" id="progressTextArea"></textarea>
|
||||||
<li id="progressUploading" style="visibility:hidden">Validating files... <span id="progressUploadingSuccess" style="visibility:hidden">success</span></li>
|
|
||||||
<li id="progressGenerating" style="visibility:hidden">Generating script... <span id="progressGeneratingSuccess" style="visibility:hidden">success</span></li>
|
|
||||||
<li id="progressExecuting" style="visibility:hidden">Executing script... <span id="progressExecutingSuccess" style="visibility:hidden">success</span></li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div style="height:30px"></div>
|
|
||||||
<div id="results" style="visibility:hidden">
|
|
||||||
<h3>Results</h3>
|
|
||||||
<span style="color:green;font-weight:bold">Harvest successful.</span>
|
|
||||||
<h4>Script used for harvest</h4>
|
|
||||||
<div><input type="button" value="Save" style="margin-bottom:10px" /></div>
|
|
||||||
<textarea cols="100" rows="50" readonly="readonly"><#noparse>#!/bin/bash
|
|
||||||
|
|
||||||
# Copyright (c) 2010 Christopher Haines, Dale Scheppler, Nicholas Skaggs, Stephen V. Williams, Michael Barbieri.
|
|
||||||
# All rights reserved. This program and the accompanying materials
|
|
||||||
# are made available under the terms of the new BSD license
|
|
||||||
# which accompanies this distribution, and is available at
|
|
||||||
# http://www.opensource.org/licenses/bsd-license.html
|
|
||||||
#
|
|
||||||
# Contributors:
|
|
||||||
# Christopher Haines, Dale Scheppler, Nicholas Skaggs, Stephen V. Williams, Michael Barbieri - initial API and implementation
|
|
||||||
|
|
||||||
#KNOWN ISSUE: Seems to tie in matches that were originally in VIVO into the input model, so that if the input model is cleaned out of VIVO,
|
|
||||||
# then those matches will be removed. Actually they remain, hidden, but much of their data including their rdf:type is gone. An
|
|
||||||
# RDF export will show this.
|
|
||||||
|
|
||||||
# Exit on first error
|
|
||||||
set -e
|
|
||||||
|
|
||||||
# Set working directory
|
|
||||||
HARVESTERDIR=`dirname "$(cd "${0%/*}" 2>/dev/null; echo "$PWD"/"${0##*/}")"`
|
|
||||||
HARVESTERDIR=$(cd $HARVESTERDIR; cd ..; pwd)
|
|
||||||
|
|
||||||
HARVESTER_TASK=mods
|
|
||||||
|
|
||||||
if [ -f scripts/env ]; then
|
|
||||||
. scripts/env
|
|
||||||
else
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
echo "Full Logging in $HARVESTER_TASK_DATE.log"
|
|
||||||
|
|
||||||
BASEDIR=harvested-data/$HARVESTER_TASK
|
|
||||||
BIBINDIR=$BASEDIR/rh-bibutils-in
|
|
||||||
BIBOUTDIR=$BASEDIR/rh-bibutils-out
|
|
||||||
RAWRHDIR=$BASEDIR/rh-raw
|
|
||||||
RAWRHDBURL=jdbc:h2:$RAWRHDIR/store
|
|
||||||
RDFRHDIR=$BASEDIR/rh-rdf
|
|
||||||
RDFRHDBURL=jdbc:h2:$RDFRHDIR/store
|
|
||||||
MODELDIR=$BASEDIR/model
|
|
||||||
MODELDBURL=jdbc:h2:$MODELDIR/store
|
|
||||||
MODELNAME=modsTempTransfer
|
|
||||||
SCOREDATADIR=$BASEDIR/score-data
|
|
||||||
SCOREDATADBURL=jdbc:h2:$SCOREDATADIR/store
|
|
||||||
SCOREDATANAME=modsScoreData
|
|
||||||
TEMPCOPYDIR=$BASEDIR/temp-copy
|
|
||||||
MATCHEDDIR=$BASEDIR/matched
|
|
||||||
MATCHEDDBURL=jdbc:h2:$MATCHEDDIR/store
|
|
||||||
MATCHEDNAME=matchedData
|
|
||||||
|
|
||||||
#scoring algorithms
|
|
||||||
EQTEST="org.vivoweb.harvester.score.algorithm.EqualityTest"
|
|
||||||
LEVDIFF="org.vivoweb.harvester.score.algorithm.NormalizedLevenshteinDifference"
|
|
||||||
|
|
||||||
#matching properties
|
|
||||||
CWEMAIL="http://vivoweb.org/ontology/core#workEmail"
|
|
||||||
SWEMAIL="http://vivoweb.org/ontology/score#workEmail"
|
|
||||||
FFNAME="http://xmlns.com/foaf/0.1/firstName"
|
|
||||||
SFNAME="http://vivoweb.org/ontology/score#foreName"
|
|
||||||
FLNAME="http://xmlns.com/foaf/0.1/lastName"
|
|
||||||
CMNAME="http://vivoweb.org/ontology/core#middleName"
|
|
||||||
BPMID="http://purl.org/ontology/bibo/pmid"
|
|
||||||
CTITLE="http://vivoweb.org/ontology/core#title"
|
|
||||||
BISSN="http://purl.org/ontology/bibo/ISSN"
|
|
||||||
PVENUEFOR="http://vivoweb.org/ontology/core#publicationVenueFor"
|
|
||||||
LINKAUTH="http://vivoweb.org/ontology/core#linkedAuthor"
|
|
||||||
LINKINFORES="http://vivoweb.org/ontology/core#linkedInformationResource"
|
|
||||||
AUTHINAUTH="http://vivoweb.org/ontology/core#authorInAuthorship"
|
|
||||||
RDFTYPE="http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
|
|
||||||
RDFSLABEL="http://www.w3.org/2000/01/rdf-schema#label"
|
|
||||||
BASEURI="http://vivoweb.org/harvest/mods/"
|
|
||||||
|
|
||||||
#BIBUTILSBASE="lib/bibutils/bibutils_4.12_x86_64"
|
|
||||||
BIBUTILSBASE="lib/bibutils/bibutils_4.12_i386"
|
|
||||||
BIBUTILSINPUTFORMAT="med"
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#clear old fetches
|
|
||||||
rm -rf $BIBINDIR
|
|
||||||
|
|
||||||
# Execute Fetch for Pubmed
|
|
||||||
$PubmedFetch -X config/tasks/ufl.pubmedfetch.xml -o $TFRH -OfileDir=$BIBINDIR
|
|
||||||
|
|
||||||
# clear old bibutils runs
|
|
||||||
rm -rf $BIBOUTDIR
|
|
||||||
|
|
||||||
# run bibutils
|
|
||||||
$RunBibutils -b $BIBUTILSBASE -m $BIBUTILSINPUTFORMAT -i $TFRH -IfileDir=$BIBINDIR -o $TFRH -OfileDir=$BIBOUTDIR
|
|
||||||
|
|
||||||
# clear old sanitizes
|
|
||||||
rm -rf $RAWRHDIR
|
|
||||||
|
|
||||||
# Sanitize data
|
|
||||||
$SanitizeMODSXML -i $TFRH -IfileDir=$BIBOUTDIR -o $TFRH -OfileDir=$RAWRHDIR
|
|
||||||
|
|
||||||
# clear old translates
|
|
||||||
rm -rf $RDFRHDIR
|
|
||||||
|
|
||||||
# Execute Translate using the mods-to-vivo.xsl file
|
|
||||||
$XSLTranslator -i $TFRH -IfileDir=$RAWRHDIR -o $H2RH -OdbUrl=$RDFRHDBURL -x config/datamaps/mods-to-vivo.xsl -f
|
|
||||||
|
|
||||||
# backup translate
|
|
||||||
BACKRDF="rdf"
|
|
||||||
backup-path $RDFRHDIR $BACKRDF
|
|
||||||
# uncomment to restore previous translate
|
|
||||||
#restore-path $RDFRHDIR $BACKRDF
|
|
||||||
|
|
||||||
# Clear old H2 transfer model
|
|
||||||
rm -rf $MODELDIR
|
|
||||||
|
|
||||||
# Execute Transfer to import from record handler into local temp model
|
|
||||||
$Transfer -o $H2MODEL -OmodelName=$MODELNAME -OcheckEmpty=$CHECKEMPTY -OdbUrl=$MODELDBURL -h $H2RH -HdbUrl=$RDFRHDBURL
|
|
||||||
|
|
||||||
# backup H2 transfer Model
|
|
||||||
BACKMODEL="model"
|
|
||||||
backup-path $MODELDIR $BACKMODEL
|
|
||||||
# uncomment to restore previous H2 transfer Model
|
|
||||||
#restore-path $MODELDIR $BACKMODEL
|
|
||||||
|
|
||||||
# Clear old H2 score data
|
|
||||||
rm -rf $SCOREDATADIR
|
|
||||||
|
|
||||||
# Clear old H2 match data
|
|
||||||
rm -rf $MATCHEDDIR
|
|
||||||
|
|
||||||
# Clear old H2 temp copy
|
|
||||||
rm -rf $TEMPCOPYDIR
|
|
||||||
|
|
||||||
# Score variables for cleaner lines
|
|
||||||
SCOREINPUT="-i $H2MODEL -ImodelName=$MODELNAME -IdbUrl=$MODELDBURL -IcheckEmpty=$CHECKEMPTY"
|
|
||||||
SCOREDATA="-s $H2MODEL -SmodelName=$SCOREDATANAME -SdbUrl=$SCOREDATADBURL -ScheckEmpty=$CHECKEMPTY"
|
|
||||||
MATCHOUTPUT="-o $H2MODEL -OmodelName=$MATCHEDNAME -OdbUrl=$MATCHEDDBURL -OcheckEmpty=$CHECKEMPTY"
|
|
||||||
MATCHEDINPUT="-i $H2MODEL -ImodelName=$MATCHEDNAME -IdbUrl=$MATCHEDDBURL -IcheckEmpty=$CHECKEMPTY"
|
|
||||||
SCOREMODELS="$SCOREINPUT -v $VIVOCONFIG -VcheckEmpty=$CHECKEMPTY $SCOREDATA -t $TEMPCOPYDIR -b $SCOREBATCHSIZE"
|
|
||||||
|
|
||||||
# Execute Score to disambiguate data in "scoring" JENA model
|
|
||||||
TITLE="-Atitle=$EQTEST -Ftitle=$RDFSLABEL -Wtitle=1.0 -Ptitle=$RDFSLABEL"
|
|
||||||
|
|
||||||
$Score $SCOREMODELS $TITLE -n ${BASEURI}pub/
|
|
||||||
$Match $SCOREINPUT $SCOREDATA -t 0.7 -r
|
|
||||||
|
|
||||||
# clear H2 score data Model
|
|
||||||
rm -rf $SCOREDATADIR
|
|
||||||
|
|
||||||
|
|
||||||
#Author, Organization, Geographic Location, Journal match
|
|
||||||
LNAME="-AlName=$LEVDIFF -FlName=$FLNAME -WlName=0.5 -PlName=$FLNAME"
|
|
||||||
FNAME="-AfName=$LEVDIFF -FfName=$FFNAME -WfName=0.3 -PfName=$FFNAME"
|
|
||||||
RDFSLABELSCORE="-ArdfsLabel=$LEVDIFF -FrdfsLabel=$RDFSLABEL -WrdfsLabel=1.0 -PrdfsLabel=$RDFSLABEL"
|
|
||||||
|
|
||||||
$Score $SCOREMODELS $FNAME $LNAME -n ${BASEURI}author/
|
|
||||||
$Score $SCOREMODELS $RDFSLABELSCORE -n ${BASEURI}org/
|
|
||||||
$Score $SCOREMODELS $RDFSLABELSCORE -n ${BASEURI}geo/
|
|
||||||
$Score $SCOREMODELS $RDFSLABELSCORE -n ${BASEURI}journal/
|
|
||||||
$Match $SCOREINPUT $SCOREDATA -t 0.7 -r
|
|
||||||
|
|
||||||
|
|
||||||
# clear H2 score data Model
|
|
||||||
rm -rf $SCOREDATADIR
|
|
||||||
|
|
||||||
# Clear old H2 temp copy of input (URI here is hardcoded in Score)
|
|
||||||
$JenaConnect -Jtype=tdb -JdbDir=$TEMPCOPYDIR -JmodelName=http://vivoweb.org/harvester/model/scoring#inputClone -t
|
|
||||||
|
|
||||||
|
|
||||||
#Authorship match
|
|
||||||
AUTHPUB="-Aauthpub=$EQTEST -Fauthpub=$LINKINFORES -Wauthpub=0.5 -Pauthpub=$LINKINFORES"
|
|
||||||
AUTHAUTH="-Aauthauth=$EQTEST -Fauthauth=$LINKAUTH -Wauthauth=0.5 -Pauthauth=$LINKAUTH"
|
|
||||||
|
|
||||||
$Score $SCOREMODELS $AUTHPUB $AUTHAUTH -n ${BASEURI}authorship/
|
|
||||||
$Match $SCOREINPUT $SCOREDATA -t 0.7 -r
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# backup H2 score data Model
|
|
||||||
BACKSCOREDATA="scoredata-auths"
|
|
||||||
backup-path $SCOREDATADIR $BACKSCOREDATA
|
|
||||||
# uncomment to restore previous H2 matched Model
|
|
||||||
#restore-path $SCOREDATADIR $BACKSCOREDATA
|
|
||||||
|
|
||||||
# clear H2 score data Model
|
|
||||||
rm -rf $SCOREDATADIR
|
|
||||||
|
|
||||||
# Clear old H2 temp copy
|
|
||||||
rm -rf $TEMPCOPYDIR
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Execute ChangeNamespace lines: the -o flag value is determined by the XSLT used to translate the data
|
|
||||||
CNFLAGS="$SCOREINPUT -v $VIVOCONFIG -VcheckEmpty=$CHECKEMPTY -n $NAMESPACE"
|
|
||||||
# Execute ChangeNamespace to get unmatched Publications into current namespace
|
|
||||||
$ChangeNamespace $CNFLAGS -u ${BASEURI}pub/
|
|
||||||
# Execute ChangeNamespace to get unmatched Authorships into current namespace
|
|
||||||
$ChangeNamespace $CNFLAGS -u ${BASEURI}authorship/
|
|
||||||
# Execute ChangeNamespace to get unmatched Authors into current namespace
|
|
||||||
$ChangeNamespace $CNFLAGS -u ${BASEURI}author/
|
|
||||||
# Execute ChangeNamespace to get unmatched Organizations into current namespace
|
|
||||||
$ChangeNamespace $CNFLAGS -u ${BASEURI}org/
|
|
||||||
# Execute ChangeNamespace to get unmatched Geographic Locations into current namespace
|
|
||||||
$ChangeNamespace $CNFLAGS -u ${BASEURI}geo/
|
|
||||||
# Execute ChangeNamespace to get unmatched Journals into current namespace
|
|
||||||
$ChangeNamespace $CNFLAGS -u ${BASEURI}journal/
|
|
||||||
|
|
||||||
|
|
||||||
# Backup pretransfer vivo database, symlink latest to latest.sql
|
|
||||||
BACKPREDB="pretransfer"
|
|
||||||
backup-mysqldb $BACKPREDB
|
|
||||||
# uncomment to restore pretransfer vivo database
|
|
||||||
#restore-mysqldb $BACKPREDB
|
|
||||||
|
|
||||||
PREVHARVESTMODEL="http://vivoweb.org/ingest/mods"
|
|
||||||
ADDFILE="$BASEDIR/additions.rdf.xml"
|
|
||||||
SUBFILE="$BASEDIR/subtractions.rdf.xml"
|
|
||||||
|
|
||||||
# Find Subtractions
|
|
||||||
$Diff -m $VIVOCONFIG -MmodelName=$PREVHARVESTMODEL -McheckEmpty=$CHECKEMPTY -s $H2MODEL -ScheckEmpty=$CHECKEMPTY -SdbUrl=$MODELDBURL -SmodelName=$MODELNAME -d $SUBFILE
|
|
||||||
# Find Additions
|
|
||||||
$Diff -m $H2MODEL -McheckEmpty=$CHECKEMPTY -MdbUrl=$MODELDBURL -MmodelName=$MODELNAME -s $VIVOCONFIG -ScheckEmpty=$CHECKEMPTY -SmodelName=$PREVHARVESTMODEL -d $ADDFILE
|
|
||||||
|
|
||||||
PREVHARVESTMODELINPUT="-i $VIVOCONFIG -ImodelName=$PREVHARVESTMODEL -IcheckEmpty=$CHECKEMPTY"
|
|
||||||
|
|
||||||
|
|
||||||
# Backup adds and subs
|
|
||||||
backup-file $ADDFILE adds.rdf.xml
|
|
||||||
backup-file $SUBFILE subs.rdf.xml
|
|
||||||
|
|
||||||
# Apply Subtractions to Previous model
|
|
||||||
$Transfer -o $H2MODEL -OdbUrl=${PREVHARVDBURLBASE}${HARVESTER_TASK}/store -OcheckEmpty=$CHECKEMPTY -OmodelName=$PREVHARVESTMODEL -r $SUBFILE -m
|
|
||||||
# Apply Additions to Previous model
|
|
||||||
$Transfer -o $H2MODEL -OdbUrl=${PREVHARVDBURLBASE}${HARVESTER_TASK}/store -OcheckEmpty=$CHECKEMPTY -OmodelName=$PREVHARVESTMODEL -r $ADDFILE
|
|
||||||
# Apply Subtractions to VIVO
|
|
||||||
$Transfer -o $VIVOCONFIG -OcheckEmpty=$CHECKEMPTY -r $SUBFILE -m
|
|
||||||
# Apply Additions to VIVO
|
|
||||||
$Transfer -o $VIVOCONFIG -OcheckEmpty=$CHECKEMPTY -r $ADDFILE
|
|
||||||
|
|
||||||
|
|
||||||
#Restart Tomcat
|
|
||||||
#Tomcat must be restarted in order for the harvested data to appear in VIVO
|
|
||||||
echo $HARVESTER_TASK ' completed successfully'
|
|
||||||
/etc/init.d/tomcat stop
|
|
||||||
/etc/init.d/apache2 reload
|
|
||||||
/etc/init.d/tomcat start</#noparse></textarea>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="clearBothDiv" />
|
<div class="clearBothDiv" />
|
||||||
|
|
|
@ -5,17 +5,20 @@ package edu.cornell.mannlib.vitro.webapp.controller.harvester;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
|
import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Hashtable;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import javax.servlet.ServletContext;
|
import javax.servlet.ServletContext;
|
||||||
import javax.servlet.ServletException;
|
import javax.servlet.ServletException;
|
||||||
import javax.servlet.http.HttpServletRequest;
|
import javax.servlet.http.HttpServletRequest;
|
||||||
import javax.servlet.http.HttpServletResponse;
|
import javax.servlet.http.HttpServletResponse;
|
||||||
import org.apache.commons.fileupload.FileItem;
|
import org.apache.commons.fileupload.FileItem;
|
||||||
|
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
import org.json.JSONException;
|
import org.json.JSONException;
|
||||||
|
@ -39,6 +42,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
|
|
||||||
private static final String PARAMETER_FIRST_UPLOAD = "firstUpload";
|
private static final String PARAMETER_FIRST_UPLOAD = "firstUpload";
|
||||||
private static final String PARAMETER_UPLOADED_FILE = "uploadedFile";
|
private static final String PARAMETER_UPLOADED_FILE = "uploadedFile";
|
||||||
|
private static final String PARAMETER_IS_HARVEST_CLICK = "isHarvestClick";
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected ResponseValues processRequest(VitroRequest vreq) {
|
protected ResponseValues processRequest(VitroRequest vreq) {
|
||||||
|
@ -47,6 +51,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
//body.put("uploadPostback", "false");
|
//body.put("uploadPostback", "false");
|
||||||
body.put("paramFirstUpload", PARAMETER_FIRST_UPLOAD);
|
body.put("paramFirstUpload", PARAMETER_FIRST_UPLOAD);
|
||||||
body.put("paramUploadedFile", PARAMETER_UPLOADED_FILE);
|
body.put("paramUploadedFile", PARAMETER_UPLOADED_FILE);
|
||||||
|
body.put("paramIsHarvestClick", PARAMETER_IS_HARVEST_CLICK);
|
||||||
return new TemplateResponseValues(TEMPLATE_DEFAULT, body);
|
return new TemplateResponseValues(TEMPLATE_DEFAULT, body);
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
log.error(e, e);
|
log.error(e, e);
|
||||||
|
@ -65,7 +70,8 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
*/
|
*/
|
||||||
public static String getHarvesterPath()
|
public static String getHarvesterPath()
|
||||||
{
|
{
|
||||||
String harvesterPath = "/usr/share/vivo/harvester/"; //todo: hack
|
//String harvesterPath = "/usr/share/vivo/harvester/"; //todo: hack
|
||||||
|
String harvesterPath = "/home/mbarbieri/workspace/HarvesterDevTomcat2/";
|
||||||
return harvesterPath;
|
return harvesterPath;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,7 +81,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
* @return the base directory for file harvest uploads
|
* @return the base directory for file harvest uploads
|
||||||
* @throws Exception if the Vitro home directory could not be found
|
* @throws Exception if the Vitro home directory could not be found
|
||||||
*/
|
*/
|
||||||
private String getUploadPathBase(ServletContext context) throws Exception
|
private static String getUploadPathBase(ServletContext context) throws Exception
|
||||||
{
|
{
|
||||||
String vitroHomeDirectoryName = ConfigurationProperties.getBean(context).getProperty(FileStorageSetup.PROPERTY_VITRO_HOME_DIR);
|
String vitroHomeDirectoryName = ConfigurationProperties.getBean(context).getProperty(FileStorageSetup.PROPERTY_VITRO_HOME_DIR);
|
||||||
if (vitroHomeDirectoryName == null) {
|
if (vitroHomeDirectoryName == null) {
|
||||||
|
@ -89,35 +95,66 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
/**
|
/**
|
||||||
* Gets the FileHarvestJob implementation that is needed to handle the specified request. This
|
* Gets the FileHarvestJob implementation that is needed to handle the specified request. This
|
||||||
* will depend on the type of harvest being performed (CSV, RefWorks, etc.)
|
* will depend on the type of harvest being performed (CSV, RefWorks, etc.)
|
||||||
* @param request the request from the browser
|
* @param vreq the request from the browser
|
||||||
* @return the FileHarvestJob that will provide harvest-type-specific services for this request
|
* @return the FileHarvestJob that will provide harvest-type-specific services for this request
|
||||||
*/
|
*/
|
||||||
private FileHarvestJob getJob(HttpServletRequest request)
|
private FileHarvestJob getJob(VitroRequest vreq)
|
||||||
{
|
{
|
||||||
|
String namespace = vreq.getWebappDaoFactory().getDefaultNamespace();
|
||||||
|
|
||||||
//todo: complete
|
//todo: complete
|
||||||
return new CsvHarvestJob("persontemplate.csv");
|
return new CsvHarvestJob(vreq, "granttemplate.csv", namespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static String getUploadPath(VitroRequest vreq) {
|
||||||
|
try {
|
||||||
|
String path = getUploadPathBase(vreq.getSession().getServletContext()) + getSessionId(vreq) + "/";
|
||||||
|
return path;
|
||||||
|
} catch(Exception e) {
|
||||||
|
log.error(e, e);
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
public void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
throws IOException, ServletException {
|
throws IOException, ServletException {
|
||||||
|
|
||||||
|
log.error("this is a post.");
|
||||||
|
|
||||||
|
try {
|
||||||
|
boolean isMultipart = ServletFileUpload.isMultipartContent(request);
|
||||||
|
if(isMultipart)
|
||||||
|
doFileUploadPost(request, response);
|
||||||
|
else if(request.getParameter(PARAMETER_IS_HARVEST_CLICK).toLowerCase().equals("true"))
|
||||||
|
doHarvestPost(request, response);
|
||||||
|
else
|
||||||
|
doCheckHarvestStatusPost(request, response);
|
||||||
|
} catch(Exception e) {
|
||||||
|
log.error(e, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doFileUploadPost(HttpServletRequest request, HttpServletResponse response)
|
||||||
|
throws IOException, ServletException {
|
||||||
|
|
||||||
|
log.error("file upload post.");
|
||||||
JSONObject json = new JSONObject();
|
JSONObject json = new JSONObject();
|
||||||
try {
|
try {
|
||||||
|
VitroRequest vreq = new VitroRequest(request);
|
||||||
String path = getUploadPathBase(request.getSession().getServletContext()) + getSessionId(request) + "/";
|
|
||||||
File directory = new File(path);
|
|
||||||
|
|
||||||
|
|
||||||
int maxFileSize = 1024 * 1024;
|
int maxFileSize = 1024 * 1024;
|
||||||
FileUploadServletRequest req = FileUploadServletRequest.parseRequest(request, maxFileSize);
|
FileUploadServletRequest req = FileUploadServletRequest.parseRequest(vreq, maxFileSize);
|
||||||
if(req.hasFileUploadException()) {
|
if(req.hasFileUploadException()) {
|
||||||
Exception e = req.getFileUploadException();
|
Exception e = req.getFileUploadException();
|
||||||
new ExceptionVisibleToUser(e);
|
new ExceptionVisibleToUser(e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
String path = getUploadPath(vreq);
|
||||||
|
File directory = new File(path);
|
||||||
|
|
||||||
String firstUpload = req.getParameter(PARAMETER_FIRST_UPLOAD); //clear directory on first upload
|
String firstUpload = req.getParameter(PARAMETER_FIRST_UPLOAD); //clear directory on first upload
|
||||||
|
log.error(firstUpload);
|
||||||
if(firstUpload.toLowerCase().equals("true")) {
|
if(firstUpload.toLowerCase().equals("true")) {
|
||||||
if(directory.exists()) {
|
if(directory.exists()) {
|
||||||
File[] children = directory.listFiles();
|
File[] children = directory.listFiles();
|
||||||
|
@ -130,7 +167,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
if(!directory.exists())
|
if(!directory.exists())
|
||||||
directory.mkdirs();
|
directory.mkdirs();
|
||||||
|
|
||||||
FileHarvestJob job = getJob(req);
|
FileHarvestJob job = getJob(vreq);
|
||||||
|
|
||||||
Map<String, List<FileItem>> fileStreams = req.getFiles();
|
Map<String, List<FileItem>> fileStreams = req.getFiles();
|
||||||
if(fileStreams.get(PARAMETER_UPLOADED_FILE) != null && fileStreams.get(PARAMETER_UPLOADED_FILE).size() > 0) {
|
if(fileStreams.get(PARAMETER_UPLOADED_FILE) != null && fileStreams.get(PARAMETER_UPLOADED_FILE).size() > 0) {
|
||||||
|
@ -194,6 +231,96 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
response.getWriter().write(json.toString());
|
response.getWriter().write(json.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void doHarvestPost(HttpServletRequest request, HttpServletResponse response) {
|
||||||
|
|
||||||
|
log.error("harvest post.");
|
||||||
|
try {
|
||||||
|
VitroRequest vreq = new VitroRequest(request);
|
||||||
|
FileHarvestJob job = getJob(vreq);
|
||||||
|
|
||||||
|
//String path = getUploadPath(vreq);
|
||||||
|
|
||||||
|
String script = job.getScript();
|
||||||
|
log.error("start harvest");
|
||||||
|
runScript(getSessionId(request), script);
|
||||||
|
log.error("end harvest");
|
||||||
|
|
||||||
|
JSONObject json = new JSONObject();
|
||||||
|
json.put("progressSinceLastCheck", "");
|
||||||
|
json.put("finished", false);
|
||||||
|
|
||||||
|
response.getWriter().write(json.toString());
|
||||||
|
|
||||||
|
} catch(Exception e) {
|
||||||
|
log.error(e, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doCheckHarvestStatusPost(HttpServletRequest request, HttpServletResponse response) {
|
||||||
|
|
||||||
|
log.error("check harvest status post.");
|
||||||
|
|
||||||
|
try {
|
||||||
|
String newline = "\n";
|
||||||
|
|
||||||
|
String sessionId = getSessionId(request);
|
||||||
|
|
||||||
|
ArrayList<String> unsentLogLinesList = sessionIdToUnsentLogLines.get(sessionId);
|
||||||
|
String[] unsentLogLines;
|
||||||
|
if(unsentLogLinesList != null) {
|
||||||
|
synchronized (unsentLogLinesList) {
|
||||||
|
unsentLogLines = unsentLogLinesList.toArray(new String[unsentLogLinesList.size()]);
|
||||||
|
unsentLogLinesList.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
String progressSinceLastCheck = "";
|
||||||
|
for(int i = 0; i < unsentLogLines.length; i++) {
|
||||||
|
progressSinceLastCheck += unsentLogLines[i] + newline;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean finished = !sessionIdToHarvestThread.containsKey(sessionId);
|
||||||
|
|
||||||
|
JSONObject json = new JSONObject();
|
||||||
|
json.put("progressSinceLastCheck", progressSinceLastCheck);
|
||||||
|
json.put("finished", finished);
|
||||||
|
|
||||||
|
response.getWriter().write(json.toString());
|
||||||
|
}
|
||||||
|
} catch(Exception e) {
|
||||||
|
log.error(e, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
private File createScriptFile(String script) throws IOException {
|
||||||
|
File scriptDirectory = new File(getHarvesterPath() + "scripts/temp");
|
||||||
|
if(!scriptDirectory.exists()) {
|
||||||
|
scriptDirectory.mkdirs();
|
||||||
|
}
|
||||||
|
|
||||||
|
File tempFile = File.createTempFile("harv", ".sh", scriptDirectory);
|
||||||
|
|
||||||
|
FileWriter writer = new FileWriter(tempFile);
|
||||||
|
writer.write(script);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
return tempFile;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void runScript(String sessionId, String script) {
|
||||||
|
|
||||||
|
if(!sessionIdToHarvestThread.containsKey(sessionId)) {
|
||||||
|
|
||||||
|
ScriptRunner runner = new ScriptRunner(sessionId, script);
|
||||||
|
sessionIdToHarvestThread.put(sessionId, runner);
|
||||||
|
runner.start();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handles a name conflict in a directory by providing a new name that does not conflict with the
|
* Handles a name conflict in a directory by providing a new name that does not conflict with the
|
||||||
|
@ -228,7 +355,7 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
* @param request the request coming in from the browser
|
* @param request the request coming in from the browser
|
||||||
* @return the session ID
|
* @return the session ID
|
||||||
*/
|
*/
|
||||||
private String getSessionId(HttpServletRequest request) {
|
private static String getSessionId(HttpServletRequest request) {
|
||||||
return request.getSession().getId();
|
return request.getSession().getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,21 +364,6 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private void doHarvest() {
|
|
||||||
/*
|
|
||||||
Harvest will entail:
|
|
||||||
|
|
||||||
D2RMapFetch
|
|
||||||
Transfer to local temp model
|
|
||||||
Diffs
|
|
||||||
Transfers
|
|
||||||
|
|
||||||
If this is being done with a script, then we should probably use a templating system.
|
|
||||||
run-csv.sh
|
|
||||||
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -273,9 +385,81 @@ public class TestFileController extends FreemarkerHttpServlet {
|
||||||
super(cause);
|
super(cause);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private Map<String, ScriptRunner> sessionIdToHarvestThread = new Hashtable<String, ScriptRunner>(); //Hashtable is threadsafe, HashMap is not
|
||||||
|
private Map<String, ArrayList<String>> sessionIdToUnsentLogLines = new Hashtable<String, ArrayList<String>>(); //Hashtable is threadsafe, HashMap is not
|
||||||
|
private class ScriptRunner extends Thread {
|
||||||
|
|
||||||
|
private final String sessionId;
|
||||||
|
private final String script;
|
||||||
|
|
||||||
|
public ScriptRunner(String sessionId, String script) {
|
||||||
|
this.sessionId = sessionId;
|
||||||
|
this.script = script;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() {
|
||||||
|
try {
|
||||||
|
ArrayList<String> unsentLogLines = sessionIdToUnsentLogLines.get(sessionId);
|
||||||
|
if(unsentLogLines == null) {
|
||||||
|
unsentLogLines = new ArrayList<String>();
|
||||||
|
sessionIdToUnsentLogLines.put(sessionId, unsentLogLines);
|
||||||
|
}
|
||||||
|
|
||||||
|
File scriptFile = createScriptFile(script);
|
||||||
|
|
||||||
|
String command = "/bin/bash " + getHarvesterPath() + "scripts/temp/" + scriptFile.getName();
|
||||||
|
|
||||||
|
log.info("Running command: " + command);
|
||||||
|
Process pr = Runtime.getRuntime().exec(command);
|
||||||
|
|
||||||
|
//try { Thread.sleep(15000); } catch(InterruptedException e) {log.error(e, e);}
|
||||||
|
|
||||||
|
BufferedReader processOutputReader = new BufferedReader(new InputStreamReader(pr.getInputStream()));
|
||||||
|
for(String line = processOutputReader.readLine(); line != null; line = processOutputReader.readLine()) {
|
||||||
|
synchronized(unsentLogLines) {
|
||||||
|
unsentLogLines.add(line);
|
||||||
|
}
|
||||||
|
log.info("Harvester output: " + line);
|
||||||
|
}
|
||||||
|
|
||||||
|
BufferedReader processErrorReader = new BufferedReader(new InputStreamReader(pr.getErrorStream()));
|
||||||
|
for(String line = processErrorReader.readLine(); line != null; line = processErrorReader.readLine()) {
|
||||||
|
log.info("Harvester error: " + line);
|
||||||
|
}
|
||||||
|
|
||||||
|
int exitVal;
|
||||||
|
|
||||||
|
try {
|
||||||
|
exitVal = pr.waitFor();
|
||||||
|
}
|
||||||
|
catch(InterruptedException e) {
|
||||||
|
throw new IOException(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
log.debug("Harvester script exited with error code " + exitVal);
|
||||||
|
log.info("Harvester script execution complete");
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error(e, e);
|
||||||
|
} finally {
|
||||||
|
if(sessionIdToHarvestThread.containsKey(sessionId)) {
|
||||||
|
sessionIdToHarvestThread.remove(sessionId);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An implementation of FileHarvestJob that can be used for any CSV file harvest.
|
* An implementation of FileHarvestJob that can be used for any CSV file harvest.
|
||||||
*/
|
*/
|
||||||
|
@ -286,17 +470,29 @@ class CsvHarvestJob implements FileHarvestJob {
|
||||||
*/
|
*/
|
||||||
private static final Log log = LogFactory.getLog(CsvHarvestJob.class);
|
private static final Log log = LogFactory.getLog(CsvHarvestJob.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The HTTP request.
|
||||||
|
*/
|
||||||
|
private VitroRequest vreq;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The template file against which uploaded CSV files will be validated.
|
* The template file against which uploaded CSV files will be validated.
|
||||||
*/
|
*/
|
||||||
private File templateFile;
|
private File templateFile;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The namespace to be used for the harvest.
|
||||||
|
*/
|
||||||
|
private final String namespace;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
* @param templateFileName just the name of the template file. The directory is assumed to be standard.
|
* @param templateFileName just the name of the template file. The directory is assumed to be standard.
|
||||||
*/
|
*/
|
||||||
public CsvHarvestJob(String templateFileName) {
|
public CsvHarvestJob(VitroRequest vreq, String templateFileName, String namespace) {
|
||||||
templateFile = new File(getTemplateFileDirectory() + templateFileName);
|
this.vreq = vreq;
|
||||||
|
this.templateFile = new File(getTemplateFileDirectory() + templateFileName);
|
||||||
|
this.namespace = namespace;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -334,8 +530,14 @@ class CsvHarvestJob implements FileHarvestJob {
|
||||||
return errorMessage;
|
return errorMessage;
|
||||||
}
|
}
|
||||||
else if(line.length != 0) {
|
else if(line.length != 0) {
|
||||||
if(line.length != templateFirstLine.length)
|
if(line.length != templateFirstLine.length) {
|
||||||
return "Mismatch in number of entries in row " + i;
|
String retval = "Mismatch in number of entries in row " + i + ": expected , " + templateFirstLine.length + ", found " + line.length + " ";
|
||||||
|
for(int j = 0; j < line.length; j++) {
|
||||||
|
retval += "\"" + line[j] + "\", ";
|
||||||
|
}
|
||||||
|
//return retval;
|
||||||
|
return "Mismatch in number of entries in row " + i + ": expected , " + templateFirstLine.length + ", found " + line.length;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -369,7 +571,7 @@ class CsvHarvestJob implements FileHarvestJob {
|
||||||
@Override
|
@Override
|
||||||
public String getScript()
|
public String getScript()
|
||||||
{
|
{
|
||||||
String path = ""; //todo: complete
|
String path = TestFileController.getHarvesterPath() + "scripts/" + "testCSVtoRDFgrant.sh"; //todo: complete
|
||||||
File scriptTemplate = new File(path);
|
File scriptTemplate = new File(path);
|
||||||
|
|
||||||
String scriptTemplateContents = readScriptTemplate(scriptTemplate);
|
String scriptTemplateContents = readScriptTemplate(scriptTemplate);
|
||||||
|
@ -380,6 +582,11 @@ class CsvHarvestJob implements FileHarvestJob {
|
||||||
|
|
||||||
private String performScriptTemplateReplacements(String scriptTemplateContents) {
|
private String performScriptTemplateReplacements(String scriptTemplateContents) {
|
||||||
String replacements = scriptTemplateContents;
|
String replacements = scriptTemplateContents;
|
||||||
|
|
||||||
|
String fileDirectory = TestFileController.getUploadPath(vreq);
|
||||||
|
|
||||||
|
replacements = replacements.replace("${UPLOADS_FOLDER}", fileDirectory);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* What needs to be replaced?
|
* What needs to be replaced?
|
||||||
*
|
*
|
||||||
|
@ -417,140 +624,10 @@ class CsvHarvestJob implements FileHarvestJob {
|
||||||
@Override
|
@Override
|
||||||
public void performHarvest(File directory) {
|
public void performHarvest(File directory) {
|
||||||
|
|
||||||
/* //COMMENTED OUT UNTIL HARVESTER INTEGRATION IS WORKING
|
|
||||||
String vivoconfig = "config/models/vivo.xml";
|
|
||||||
String scorebatchsize = "100";
|
|
||||||
String checkempty = "true";
|
|
||||||
String namespace = ""; //todo: get namespace
|
|
||||||
String h2model = "config/models/h2-sdb.xml";
|
|
||||||
String prevharvdburlbase = "jdbc:h2:harvested-data/prevHarvs/";
|
|
||||||
String tfrh = "config/recordhandlers/h2-jdbc.xml";
|
|
||||||
|
|
||||||
String harvesterTask = "csv";
|
|
||||||
|
|
||||||
String basedir = "harvested-data/" + harvesterTask;
|
|
||||||
|
|
||||||
String rawrhdir = basedir + "/rh-raw";
|
|
||||||
String rdfrhdir = basedir + "/rh-rdf";
|
|
||||||
String modeldir = basedir + "/model";
|
|
||||||
String scoredatadir = basedir + "/score-data";
|
|
||||||
|
|
||||||
String modeldburl = "jdbc:h2:" + modeldir + "/store";
|
|
||||||
String scoredatadburl = "jdbc:h2:" + scoredatadir + "/store";
|
|
||||||
|
|
||||||
String modelname = "csvTempTransfer";
|
|
||||||
String scoredataname = "csvScoreData";
|
|
||||||
|
|
||||||
String tempcopydir = basedir + "/temp-copy";
|
|
||||||
|
|
||||||
String[] scoreinput = Harvester.stringsToArray("-i", h2model, "-ImodelName=" + modelname, "-IdbUrl=" + modeldburl, "-IcheckEmpty=" + checkempty);
|
|
||||||
String[] scoredata = Harvester.stringsToArray("-s", h2model, "-SmodelName=" + scoredataname, "-SdbUrl=" + scoredatadburl, "-ScheckEmpty=" + checkempty);
|
|
||||||
String[] scoremodels = Harvester.stringsToArray(scoreinput, "-v", vivoconfig, "-VcheckEmpty=" + checkempty, scoredata, "-t", tempcopydir, "-b", scorebatchsize);
|
|
||||||
|
|
||||||
String[] cnflags = Harvester.stringsToArray(scoreinput, "-v", vivoconfig, "-n", namespace);
|
|
||||||
|
|
||||||
String eqtest = "org.vivoweb.harvester.score.algorithm.EqualityTest";
|
|
||||||
|
|
||||||
String grantidnum = "http://vivoweb.org/ontology/score#grantID";
|
|
||||||
String rdfslabel = "http://www.w3.org/2000/01/rdf-schema#label";
|
|
||||||
String personidnum = "http://vivoweb.org/ontology/score#personID";
|
|
||||||
String deptidnum = "http://vivoweb.org/ontology/score#deptID";
|
|
||||||
String rolein = "http://vivoweb.org/ontology/core#roleIn";
|
|
||||||
String piroleof = "http://vivoweb.org/ontology/core#principalInvestigatorRoleOf";
|
|
||||||
String copiroleof = "http://vivoweb.org/ontology/core#co-PrincipalInvestigatorRoleOf";
|
|
||||||
String datetime = "http://vivoweb.org/ontology/core#dateTime";
|
|
||||||
String baseuri = "http://vivoweb.org/harvest/csvfile/";
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//execute fetch
|
|
||||||
Harvester.runCSVtoRDF("-o", tfrh, "-O", "fileDir=" + rawrhdir, "-i", "filepath");
|
|
||||||
|
|
||||||
//execute translate
|
|
||||||
Harvester.runXSLTranslator("-i", tfrh, "-IfileDir=" + rawrhdir, "-o", tfrh, "-OfileDir=" + rdfrhdir, "-x", "config/datamaps/csv-grant-to-vivo.xsl");
|
|
||||||
|
|
||||||
//execute transfer to import from record handler into local temp model
|
|
||||||
Harvester.runTransfer("-o", h2model, "-OmodelName=" + modelname, "-OdbUrl=" + modeldburl, "-h", tfrh, "-HfileDir=" + rdfrhdir, "-n", namespace);
|
|
||||||
|
|
||||||
//smushes in-place(-r) on the Grant id THEN on the person ID then deptID
|
|
||||||
Harvester.runSmush(scoreinput, "-P", grantidnum, "-P", personidnum, "-P", deptidnum, "-P", datetime, "-n", baseuri, "-r");
|
|
||||||
|
|
||||||
//scoring of Grants on GrantNumber
|
|
||||||
Harvester.runScore(scoremodels, "-AGrantNumber=" + eqtest, "-WGrantNumber=1.0", "-FGrantNumber=" + grantidnum, "-PGrantNumber=" + grantidnum, "-n", baseuri + "grant/");
|
|
||||||
|
|
||||||
//scoring of people on PERSONIDNUM
|
|
||||||
Harvester.runScore(scoremodels, "-Aufid=" + eqtest, "-Wufid=1.0", "-Fufid=" + personidnum, "-Pufid=" + personidnum, "-n", baseuri + "person/");
|
|
||||||
|
|
||||||
Harvester.runSmush(scoreinput, "-P", deptidnum, "-n", baseuri + "org/", "-r");
|
|
||||||
|
|
||||||
//scoring of orgs on DeptID
|
|
||||||
Harvester.runScore(scoremodels, "-AdeptID=" + eqtest, "-WdeptID=1.0", "-FdeptID=" + deptidnum, "-PdeptID=" + deptidnum, "-n", baseuri + "org/");
|
|
||||||
|
|
||||||
|
|
||||||
Harvester.runSmush(scoreinput, "-P", rdfslabel, "-n", baseuri + "sponsor/", "-r");
|
|
||||||
|
|
||||||
//scoring sponsors by labels
|
|
||||||
Harvester.runScore(scoremodels, "-Alabel=" + eqtest, "-Wlabel=1.0", "-Flabel=" + rdfslabel, "-Plabel=" + rdfslabel, "-n", baseuri + "sponsor/");
|
|
||||||
|
|
||||||
//scoring of PI Roles
|
|
||||||
String[] piuri = Harvester.stringsToArray("-Aperson=" + eqtest, "-Wperson=0.5", "-Fperson=" + piroleof, "-Pperson=" + piroleof);
|
|
||||||
String[] granturi = Harvester.stringsToArray("-Agrant=" + eqtest, "-Wgrant=0.5", "-Fgrant=" + rolein, "-Pgrant=" + rolein);
|
|
||||||
Harvester.runScore(scoremodels, piuri, granturi, "-n", baseuri + "piRole/");
|
|
||||||
|
|
||||||
//scoring of coPI Roles
|
|
||||||
String[] copiuri = Harvester.stringsToArray("-Aperson=" + eqtest, "-Wperson=0.5", "-Fperson=" + copiroleof, "-Pperson=" + copiroleof);
|
|
||||||
Harvester.runScore(scoremodels, copiuri, granturi, "-n", baseuri + "coPiRole/");
|
|
||||||
|
|
||||||
//find matches using scores and rename nodes to matching uri
|
|
||||||
Harvester.runMatch(scoreinput, scoredata, "-b", scorebatchsize, "-t", "1.0", "-r");
|
|
||||||
|
|
||||||
//execute ChangeNamespace to get grants into current namespace
|
|
||||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "grant/");
|
|
||||||
|
|
||||||
//execute ChangeNamespace to get orgs into current namespace
|
|
||||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "org/");
|
|
||||||
|
|
||||||
//execute ChangeNamespace to get sponsors into current namespace
|
|
||||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "sponsor/");
|
|
||||||
|
|
||||||
//execute ChangeNamespace to get people into current namespace
|
|
||||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "person/");
|
|
||||||
|
|
||||||
//execute ChangeNamespace to get PI roles into current namespace
|
|
||||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "piRole/");
|
|
||||||
|
|
||||||
//execute ChangeNamespace to get co-PI roles into current namespace
|
|
||||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "coPiRole/");
|
|
||||||
|
|
||||||
//execute ChangeNamespace to get co-PI roles into current namespace
|
|
||||||
Harvester.runChangeNamespace(cnflags, "-u", baseuri + "timeInterval");
|
|
||||||
|
|
||||||
|
|
||||||
//todo: we probably don't want to do prev harvest stuff for this
|
|
||||||
String prevharvestmodel = "http://vivoweb.org/ingest/dsr";
|
|
||||||
String addfile = basedir + "/additions.rdf.xml";
|
|
||||||
String subfile = basedir + "/subtractions.rdf.xml";
|
|
||||||
|
|
||||||
//find Subtractions
|
|
||||||
Harvester.runDiff("-m", h2model, "-MdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-McheckEmpty=" + checkempty, "-MmodelName=" + prevharvestmodel, "-s", h2model, "-ScheckEmpty=" + checkempty, "-SdbUrl=" + modeldburl, "-SmodelName=" + modelname, "-d", subfile);
|
|
||||||
|
|
||||||
//find Additions
|
|
||||||
Harvester.runDiff("-m", h2model, "-McheckEmpty=" + checkempty, "-MdbUrl=" + modeldburl, "-MmodelName=" + modelname, "-s", h2model, "-ScheckEmpty=" + checkempty, "-SdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-SmodelName=" + prevharvestmodel, "-d", addfile);
|
|
||||||
|
|
||||||
//apply Subtractions to Previous model
|
|
||||||
Harvester.runTransfer("-o", h2model, "-OdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-OcheckEmpty=" + checkempty, "-OmodelName=" + prevharvestmodel, "-r", subfile, "-m");
|
|
||||||
|
|
||||||
//apply Additions to Previous model
|
|
||||||
Harvester.runTransfer("-o", h2model, "-OdbUrl=" + prevharvdburlbase + harvesterTask + "/store", "-OcheckEmpty=" + checkempty, "-OmodelName=" + prevharvestmodel, "-r", addfile);
|
|
||||||
|
|
||||||
//apply Subtractions to VIVO
|
|
||||||
Harvester.runTransfer("-o", vivoconfig, "-OcheckEmpty=" + checkempty, "-r", subfile, "-m");
|
|
||||||
|
|
||||||
//apply Additions to VIVO
|
|
||||||
Harvester.runTransfer("-o", vivoconfig, "-OcheckEmpty=" + checkempty, "-r", addfile);
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue