NIHVIVO-3130 NIHVIVO-3089 ingest tools fixes and improvements

This commit is contained in:
brianjlowe 2011-11-04 21:01:34 +00:00
parent 4d8206b018
commit f5e1661f9a
22 changed files with 331 additions and 309 deletions

View file

@ -5,6 +5,7 @@ package edu.cornell.mannlib.vitro.webapp.controller.jena;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@ -34,7 +35,6 @@ import edu.cornell.mannlib.vitro.webapp.utils.jena.JenaIngestUtils;
public class JenaCsv2RdfController extends BaseEditController{
private static final String CSV2RDF_JSP = "/jenaIngest/csv2rdf.jsp";
private static final String INGEST_MENU_JSP = "/jenaIngest/ingestMenu.jsp";
private static final String CSV2RDF_SELECT_URI_JSP = "/jenaIngest/csv2rdfSelectUri.jsp";
private static int maxFileSizeInBytes = 1024 * 1024 * 2000; //2000mb
@ -62,17 +62,20 @@ public class JenaCsv2RdfController extends BaseEditController{
if ("csv2rdf".equals(actionStr)) {
String csvUrl = request.getParameter("csvUrl");
if (!csvUrl.isEmpty() || !filePath.isEmpty()) {
String[] sourceModel = new String[1];
sourceModel[0] = doExecuteCsv2Rdf(request,fileStream,filePath);
Model model = ModelFactory.createDefaultModel();
String destinationModelNameStr = request.getParameter(
"destinationModelName");
Model csv2rdfResult = doExecuteCsv2Rdf(
request, fileStream, filePath);
ModelMaker maker = getVitroJenaModelMaker(request);
Boolean csv2rdf = true;
JenaIngestUtils utils = new JenaIngestUtils();
Map<String,LinkedList<String>> propertyMap = utils.generatePropertyMap(sourceModel, model, maker);
List<Model> resultList = new ArrayList<Model>();
resultList.add(csv2rdfResult);
Map<String,LinkedList<String>> propertyMap =
utils.generatePropertyMap(resultList, maker);
request.setAttribute("propertyMap",propertyMap);
getServletContext().setAttribute("sourceModel", sourceModel);
getServletContext().setAttribute("csv2rdf",csv2rdf);
request.setAttribute("destinationModelName", sourceModel[0]);
request.setAttribute("csv2rdf", csv2rdf);
request.setAttribute("destinationModelName", destinationModelNameStr);
request.setAttribute("title","URI Select");
request.setAttribute("bodyJsp", CSV2RDF_SELECT_URI_JSP);
} else {
@ -88,9 +91,7 @@ public class JenaCsv2RdfController extends BaseEditController{
try {
rd.forward(request, response);
} catch (Exception e) {
System.out.println(this.getClass().getName()+" could not forward to view.");
System.out.println(e.getMessage());
System.out.println(e.getStackTrace());
throw new RuntimeException(e);
}
}
@ -106,7 +107,7 @@ public class JenaCsv2RdfController extends BaseEditController{
return;
}
public String doExecuteCsv2Rdf(VitroRequest vreq,FileItem fileStream, String filePath) {
public Model doExecuteCsv2Rdf(VitroRequest vreq, FileItem fileStream, String filePath) {
char[] quoteChars = {'"'};
String namespace = "";
String tboxNamespace = vreq.getParameter("tboxNamespace");
@ -139,25 +140,26 @@ public class JenaCsv2RdfController extends BaseEditController{
is = fileStream.getInputStream();
} catch (IOException e) {
System.out.println("IOException opening URL "+csvUrl);
return null;
throw new RuntimeException("Unable to access URL " + csvUrl);
}
Model[] models = null;
try {
models = c2r.convertToRdf(is,vreq,destination);
models = c2r.convertToRdf(
is, vreq.getWebappDaoFactory(), destination);
} catch (IOException e) {
System.out.println("IOException converting "+csvUrl+" to RDF");
throw new RuntimeException(
"Unable to convert " + csvUrl + " to RDF");
}
if (destination != null) {
destination.add(models[0]);
}
// TODO: rework this
vreq.getSession().setAttribute("csv2rdfResult", models[0]);
if (tboxDestination != null) {
tboxDestination.add(models[1]);
}
return destinationModelNameStr;
return models[0];
}
private Model getModel(String name, HttpServletRequest request) {

View file

@ -11,6 +11,7 @@ import java.lang.reflect.Method;
import java.sql.SQLException;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
@ -68,7 +69,6 @@ import edu.cornell.mannlib.vitro.webapp.beans.Ontology;
import edu.cornell.mannlib.vitro.webapp.controller.Controllers;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
import edu.cornell.mannlib.vitro.webapp.dao.OntologyDao;
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.dao.jena.JenaBaseDao;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.VitroJenaModelMaker;
@ -145,8 +145,6 @@ public class JenaIngestController extends BaseEditController {
return; // don't attempt to display a JSP
} else if("clearModel".equals(actionStr)) {
processClearModelRequest(vreq, maker, modelType);
} else if("setWriteLayer".equals(actionStr)) {
processSetWriteLayerRequest(vreq, maker, modelType);
} else if("attachModel".equals(actionStr)) {
processAttachModelRequest(vreq, maker, modelType);
} else if("detachModel".equals(actionStr)) {
@ -337,13 +335,13 @@ public class JenaIngestController extends BaseEditController {
} catch (com.hp.hpl.jena.shared.CannotEncodeCharacterException cece) {
// there's got to be a better way to do this
byte[] badCharBytes = String.valueOf(cece.getBadChar()).getBytes();
System.out.println("Cannot encode character with byte values: (decimal) ");
String errorMsg = "Cannot encode character with byte values: (decimal) ";
for (int i=0; i<badCharBytes.length; i++) {
System.out.println(badCharBytes[i]);
errorMsg += badCharBytes[i];
}
throw new RuntimeException(errorMsg, cece);
} catch (Exception e) {
// Well if we can't write out to the response I guess there ain't much we can do.
e.printStackTrace();
log.error(e, e);
} finally {
model.leaveCriticalSection();
}
@ -357,30 +355,6 @@ public class JenaIngestController extends BaseEditController {
vreq.setAttribute("bodyJsp",INGEST_MENU_JSP);
}
private void processSetWriteLayerRequest(VitroRequest vreq, ModelMaker maker, String modelType) {
// String modelName = vreq.getParameter("modelName");
// if (modelName != null) {
// OntModel mainModel = (OntModel) getServletContext().getAttribute("jenaOntModel");
// WebappDaoFactoryJena existingDaoFactory = null;
// try {
// existingDaoFactory = (WebappDaoFactoryJena) getServletContext().getAttribute("webappDaoFactory");
// } catch (Exception e) {}
// Model writeModel = maker.getModel(modelName);
// Model dynamicUnion = ModelFactory.createUnion(writeModel,mainModel);
// OntModel ontModelForDaos = ModelFactory.createOntologyModel(ONT_MODEL_SPEC, dynamicUnion);
// WebappDaoFactory wadf = new WebappDaoFactoryJena(new SimpleOntModelSelector(ontModelForDaos), (existingDaoFactory != null) ? existingDaoFactory.getDefaultNamespace() : null, null, null);
// request.getSession().setAttribute("webappDaoFactory", wadf);
// request.getSession().setAttribute("jenaOntModel",ontModelForDaos);
// System.out.println("Setting jenaOntModel session attribute");
// Model baseModel = (OntModel) getServletContext().getAttribute("baseOntModel");
// OntModel ontModelForAssertions = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM,ModelFactory.createUnion(writeModel,baseModel));
// request.getSession().setAttribute("assertionsWebappDaoFactory", new WebappDaoFactoryJena(new SimpleOntModelSelector(ontModelForAssertions)));
// request.getSession().setAttribute("baseOntModel", ontModelForAssertions);
// }
// request.setAttribute("title","Ingest Menu");
// request.setAttribute("bodyJsp",INGEST_MENU_JSP);
}
private void processAttachModelRequest(VitroRequest vreq, ModelMaker maker, String modelType) {
String modelName = vreq.getParameter("modelName");
if (modelName != null) {
@ -403,12 +377,21 @@ public class JenaIngestController extends BaseEditController {
private void processRenameBNodesRequest(VitroRequest vreq, ModelMaker maker, String modelType) {
String[] sourceModel = vreq.getParameterValues("sourceModelName");
Model model = ModelFactory.createDefaultModel();
JenaIngestUtils utils = new JenaIngestUtils();
if(sourceModel!=null && sourceModel.length!=0){
Map<String,LinkedList<String>> propertyMap = utils.generatePropertyMap(sourceModel, model, maker);
getServletContext().setAttribute("sourceModel",sourceModel);
if(sourceModel != null && sourceModel.length != 0) {
List<Model> sourceModelList = new ArrayList<Model>();
for (int i = 0; i < sourceModel.length ; i++) {
Model m = maker.getModel(sourceModel[i]);
if (m != null) {
sourceModelList.add(m);
}
}
Map<String,LinkedList<String>> propertyMap =
utils.generatePropertyMap(sourceModelList, maker);
List<String> sourceModelNameList = Arrays.asList(sourceModel);
vreq.setAttribute("sourceModel",sourceModelNameList);
vreq.setAttribute("propertyMap", propertyMap);
vreq.setAttribute("enablePropertyPatternURIs", !propertyMap.isEmpty());
vreq.setAttribute("title","URI Select");
vreq.setAttribute("bodyJsp",RENAME_BNODES_URI_SELECT_JSP);
} else {
@ -421,12 +404,21 @@ public class JenaIngestController extends BaseEditController {
String namespaceEtcStr = vreq.getParameter("namespaceEtcStr");
String pattern = vreq.getParameter("pattern");
String concatenate = vreq.getParameter("concatenate");
String[] sourceModel = (String[])getServletContext().getAttribute("sourceModel");
if(namespaceEtcStr!=null && !namespaceEtcStr.isEmpty()){
if(concatenate.equals("integer")){
doRenameBNodes(vreq,namespaceEtcStr, false, null, sourceModel);
String[] sourceModel = (String[]) vreq.getParameterValues("sourceModelName");
if(namespaceEtcStr != null) {
if (namespaceEtcStr.isEmpty()) {
if ("true".equals(vreq.getParameter("csv2rdf"))) {
processCsv2rdfRequest(vreq, maker, modelType);
return;
} else {
vreq.setAttribute("errorMsg", "Please enter a value.");
processRenameBNodesRequest(vreq, maker, modelType);
return;
}
}
else{
if (concatenate.equals("integer")) {
doRenameBNodes(vreq,namespaceEtcStr, false, null, sourceModel);
} else {
pattern = pattern.trim();
doRenameBNodes(vreq,namespaceEtcStr, true, pattern, sourceModel);
}
@ -695,7 +687,7 @@ public class JenaIngestController extends BaseEditController {
* calling method that does the merge operation.
*/
String result = utils.doMerge(uri1,uri2,baseOntModel,ontModel,infOntModel,usePrimaryLabelOnly);
getServletContext().setAttribute("leftoverModel", utils.getLeftOverModel());
vreq.getSession().setAttribute("leftoverModel", utils.getLeftOverModel());
vreq.setAttribute("result",result);
vreq.setAttribute("title","Merge Resources");
vreq.setAttribute("bodyJsp",MERGE_RESULT);
@ -710,15 +702,32 @@ public class JenaIngestController extends BaseEditController {
HttpServletResponse response,
ModelMaker maker,
String modelType) {
String uri1 = vreq.getParameter("uri1");
String uri2 = vreq.getParameter("uri2");
if(uri1!=null){
String result = doRename(uri1,uri2,response);
vreq.setAttribute("result",result);
vreq.setAttribute("title","Rename Resources");
vreq.setAttribute("bodyJsp",RENAME_RESULT);
}
else{
String oldNamespace = vreq.getParameter("oldNamespace");
String newNamespace = vreq.getParameter("newNamespace");
String errorMsg = "";
if (oldNamespace != null) {
if (oldNamespace.isEmpty() && !newNamespace.isEmpty()) {
errorMsg = "Please enter the old namespace to be changed.";
} else if (!oldNamespace.isEmpty() && newNamespace.isEmpty()) {
errorMsg = "Please enter the new namespace.";
} else if (oldNamespace.isEmpty() && newNamespace.isEmpty()) {
errorMsg = "Please enter the namespaces.";
} else if (oldNamespace.equals(newNamespace)) {
errorMsg = "Please enter two different namespaces.";
}
if (!errorMsg.isEmpty()) {
vreq.setAttribute("errorMsg", errorMsg);
vreq.setAttribute("oldNamespace", oldNamespace);
vreq.setAttribute("newNamespace", newNamespace);
vreq.setAttribute("title","Rename Resource");
vreq.setAttribute("bodyJsp",RENAME_RESOURCE);
} else {
String result = doRename(oldNamespace, newNamespace, response);
vreq.setAttribute("result",result);
vreq.setAttribute("title","Rename Resources");
vreq.setAttribute("bodyJsp",RENAME_RESULT);
}
} else{
vreq.setAttribute("title","Rename Resource");
vreq.setAttribute("bodyJsp",RENAME_RESOURCE);
}
@ -728,8 +737,8 @@ public class JenaIngestController extends BaseEditController {
HttpServletResponse response,
ModelMaker maker,
String modelType) {
//Model lmodel = (Model)request.getSession().getAttribute("leftoverModel");
Model lmodel = (Model)getServletContext().getAttribute("leftoverModel");
Model lmodel = (Model) vreq.getSession().getAttribute("leftoverModel");
response.setContentType("RDF/XML-ABBREV");
try {
OutputStream outStream = response.getOutputStream();
@ -745,7 +754,7 @@ public class JenaIngestController extends BaseEditController {
private ModelMaker getVitroJenaModelMaker(HttpServletRequest request) {
ModelMaker myVjmm = (ModelMaker) request.getSession().getAttribute("vitroJenaModelMaker");
myVjmm = (myVjmm == null) ? (ModelMaker) getServletContext().getAttribute("vitroJenaModelMaker") : myVjmm;
myVjmm = (myVjmm == null) ? (ModelMaker) getServletContext().getAttribute("vitroJenaSDBModelMaker") : myVjmm;
return new VitroJenaSpecialModelMaker(myVjmm, request);
}
@ -783,7 +792,7 @@ public class JenaIngestController extends BaseEditController {
private void doRemoveModel(String modelName, ModelMaker modelMaker) {
//Try to detach first since it cause problems to remove an attached model.
doDetachModel(modelName, modelMaker);
System.out.println("Removing "+modelName+" from webapp");
log.debug("Removing " + modelName + " from webapp");
modelMaker.removeModel(modelName);
}
@ -861,13 +870,28 @@ public class JenaIngestController extends BaseEditController {
private void doRenameBNodes(VitroRequest vreq, String namespaceEtc, boolean patternBoolean, String pattern, String[] sourceModel) {
OntModel source = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM);
String property = vreq.getParameter("property");
Boolean csv2rdf = (Boolean)getServletContext().getAttribute("csv2rdf");
for (int i=0; i<sourceModel.length; i++) {
Model m = getModel(sourceModel[i],vreq);
source.addSubModel(m);
Boolean csv2rdf = false;
try {
csv2rdf = (Boolean) Boolean.parseBoolean(vreq.getParameter("csv2rdf"));
} catch (Exception e) {
log.error(e, e);
}
System.out.println(vreq.getParameter("destinationModelName"));
Model destination = getModel(vreq.getParameter("destinationModelName"),vreq);
if (csv2rdf) {
source.addSubModel(
(Model) vreq.getSession().getAttribute("csv2rdfResult"));
} else {
for (int i=0; i<sourceModel.length; i++) {
Model m = getModel(sourceModel[i],vreq);
source.addSubModel(m);
}
}
Model destination = (csv2rdf)
? ModelFactory.createDefaultModel()
: getModel(vreq.getParameter("destinationModelName"),vreq);
JenaIngestUtils utils = new JenaIngestUtils();
destination.enterCriticalSection(Lock.WRITE);
try {
@ -877,52 +901,10 @@ public class JenaIngestController extends BaseEditController {
else{
destination.add(utils.renameBNodesByPattern(source, namespaceEtc, vreq.getJenaOntModel(), pattern, property));
}
if(csv2rdf!=null){
if(csv2rdf && property!=null){
ClosableIterator closeIt = destination.listSubjects();
Property prop = ResourceFactory.createProperty(property);
try {
for (Iterator it = closeIt; it.hasNext();) {
Resource res = (Resource) it.next();
if (res.isAnon()) {
ClosableIterator closfIt = destination.listStatements(res,prop,(RDFNode)null);
Statement stmt = null;
try {
if (closfIt.hasNext()) {
stmt = (Statement) closfIt.next();
}
} finally {
closfIt.close();
}
if (stmt != null) {
Resource outRes = stmt.getSubject();
destination.removeAll(outRes,(Property)null,(RDFNode)null);
}
}
}
} finally {
closeIt.close();
}
csv2rdf = false;
getServletContext().setAttribute("csv2rdf", csv2rdf);
}
else if(csv2rdf && property == null){
ClosableIterator closeIt = destination.listSubjects();
try {
for (Iterator it = closeIt; it.hasNext();) {
Resource res = (Resource) it.next();
if (res.isAnon()) {
destination.removeAll(res,(Property)null,(RDFNode)null);
}
}
} finally {
closeIt.close();
}
csv2rdf = false;
getServletContext().setAttribute("csv2rdf", csv2rdf);
}
if (csv2rdf) {
Model ultimateDestination = getModel(vreq.getParameter("destinationModelName"),vreq);
ultimateDestination.add(destination);
}
} finally {
destination.leaveCriticalSection();
}
@ -965,21 +947,20 @@ public class JenaIngestController extends BaseEditController {
String savedQueryURIStr = vreq.getParameter("savedQuery");
String queryStr;
if (savedQueryURIStr.length()==0) {
System.out.println("Using entered query");
log.debug("Using entered query");
queryStr = sparqlQueryStr;
} else {
Property queryStrProp = ResourceFactory.createProperty(SPARQL_QUERYSTR_PROP);
jenaOntModel.enterCriticalSection(Lock.READ);
try {
Individual ind = jenaOntModel.getIndividual(savedQueryURIStr);
System.out.println("Using query "+savedQueryURIStr);
log.debug("Using query "+savedQueryURIStr);
queryStr = ( (Literal) ind.getPropertyValue(queryStrProp)).getLexicalForm();
queryStr = StringEscapeUtils.unescapeHtml(queryStr); // !!! We need to turn off automatic HTML-escaping for data property editing.
} finally {
jenaOntModel.leaveCriticalSection();
}
}
//System.out.println(queryStr);
Model tempModel = ModelFactory.createDefaultModel();
Query query = QueryFactory.create(queryStr, Syntax.syntaxARQ);
QueryExecution qexec = QueryExecutionFactory.create(query,source);
@ -1020,7 +1001,7 @@ public class JenaIngestController extends BaseEditController {
}
dbTypeObj = DatabaseType.fetch(dbType);
String driver = loadDriver(dbTypeObj);
System.out.println("Connecting to DB at "+jdbcUrl);
log.debug("Connecting to DB at "+jdbcUrl);
StoreDesc storeDesc = new StoreDesc(LayoutType.LayoutTripleNodesHash,dbTypeObj) ;
ServletContext ctx = vreq.getSession().getServletContext();
BasicDataSource bds = JenaDataSourceSetup.makeBasicDataSource(
@ -1124,7 +1105,7 @@ public class JenaIngestController extends BaseEditController {
try {
newLex = (String) meth.invoke(processor,args);
} catch (InvocationTargetException e) {
e.getTargetException().printStackTrace();
throw new RuntimeException(e);
}
if (!newLex.equals(lex)) {
retractionsModel.add(stmt);
@ -1156,7 +1137,7 @@ public class JenaIngestController extends BaseEditController {
destination.leaveCriticalSection();
}
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
}
@ -1181,8 +1162,8 @@ public class JenaIngestController extends BaseEditController {
cleanChars[cleanPos] = chars[i];
cleanPos++;
} else {
System.out.println("Bad char in "+lex);
System.out.println("Numeric value "+java.lang.Character.getNumericValue(chars[i]));
log.error("Bad char in " + lex);
log.error("Numeric value " + java.lang.Character.getNumericValue(chars[i]));
badChar = true;
}
}
@ -1206,7 +1187,7 @@ public class JenaIngestController extends BaseEditController {
}
model.remove(retractionsModel);
model.add(additionsModel);
System.out.println("Cleaned "+additionsModel.size()+" literals");
log.debug("Cleaned " + additionsModel.size() + " literals");
} finally {
model.leaveCriticalSection();
}
@ -1222,7 +1203,6 @@ public class JenaIngestController extends BaseEditController {
}
private String doRename(String oldNamespace,String newNamespace,HttpServletResponse response){
String userURI = null;
String uri = null;
String result = null;
Integer counter = 0;
@ -1233,8 +1213,6 @@ public class JenaIngestController extends BaseEditController {
getServletContext().getAttribute("jenaOntModel");
OntModel infOntModel = (OntModel)
getServletContext().getAttribute(JenaBaseDao.INFERENCE_ONT_MODEL_ATTRIBUTE_NAME);
WebappDaoFactory wdf =
(WebappDaoFactory)getServletContext().getAttribute("webappDaoFactory");
List<String> urisToChange = new LinkedList<String>();
ontModel.enterCriticalSection(Lock.READ);
try {
@ -1272,37 +1250,22 @@ public class JenaIngestController extends BaseEditController {
Matcher matcher = p.matcher(candidateString);
newURIStr = matcher.replaceFirst(newNamespace);
long time3 = System.currentTimeMillis();
log.info("time to get new uri: " +
log.debug("time to get new uri: " +
Long.toString(time3 - time2));
log.info("Renaming "+ oldURIStr + " to " + newURIStr);
log.debug("Renaming "+ oldURIStr + " to " + newURIStr);
ResourceUtils.renameResource(res,newURIStr);
ResourceUtils.renameResource(infRes,newURIStr);
long time4 = System.currentTimeMillis();
log.info(" time to rename : " + Long.toString( time4 - time3));
log.info(" time for one resource: " +
log.debug(" time to rename : " + Long.toString( time4 - time3));
log.debug(" time for one resource: " +
Long.toString( time4 -time1));
} finally {
infOntModel.leaveCriticalSection();
ontModel.leaveCriticalSection();
baseOntModel.leaveCriticalSection();
}
try {
Thread.currentThread().sleep(200);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
counter++;
}
/*baseOntModel.enterCriticalSection(Lock.WRITE);
ontModel.enterCriticalSection(Lock.WRITE);
try{
baseOntModel.getBaseModel().notifyEvent(new EditEvent(null,true));
baseOntModel.getBaseModel().notifyEvent(new EditEvent(null,false));
} finally {
ontModel.leaveCriticalSection();
baseOntModel.leaveCriticalSection();
}*/
result = counter.toString() + " resources renamed";
return result;
}

View file

@ -60,7 +60,7 @@ public class Csv2Rdf {
return convertToRdf(fis, null, null);
}
public Model[] convertToRdf(InputStream fis,VitroRequest vreq, Model destination) throws IOException {
public Model[] convertToRdf(InputStream fis, WebappDaoFactory wadf, Model destination) throws IOException {
OntModel ontModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM);
OntModel tboxOntModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM);
ontModel.addSubModel(tboxOntModel);
@ -70,8 +70,8 @@ public class Csv2Rdf {
cReader.setSeperator(separatorChar);
cReader.setQuoteCharacters(quoteChars);
URIGenerator uriGen = (vreq != null && destination != null)
? new RandomURIGenerator(vreq, destination)
URIGenerator uriGen = (wadf != null && destination != null)
? new RandomURIGenerator(wadf, destination)
: new SequentialURIGenerator();
List<String[]> fileRows = cReader.parse(fis);
@ -115,12 +115,12 @@ public class Csv2Rdf {
private class RandomURIGenerator implements URIGenerator {
private VitroRequest vreq;
private WebappDaoFactory wadf;
private Model destination;
private Random random = new Random(System.currentTimeMillis());
public RandomURIGenerator(VitroRequest vreq, Model destination) {
this.vreq = vreq;
public RandomURIGenerator(WebappDaoFactory webappDaoFactory, Model destination) {
this.wadf = webappDaoFactory;
this.destination = destination;
}
@ -132,7 +132,7 @@ public class Csv2Rdf {
if(namespace!=null && !namespace.isEmpty()){
while( uriIsGood == false && attempts < 30 ){
uri = namespace+individualNameBase+random.nextInt( Math.min(Integer.MAX_VALUE,(int)Math.pow(2,attempts + 13)) );
String errMsg = vreq.getWebappDaoFactory().checkURI(uri);
String errMsg = wadf.checkURI(uri);
Resource res = ResourceFactory.createResource(uri);
inDestination = destination.contains(res, null);
if( errMsg != null && !inDestination)

View file

@ -173,7 +173,11 @@ public class JenaIngestUtils {
if (stmt != null) {
Resource outRes = stmt.getSubject();
if(stmt.getObject().isLiteral()){
ResourceUtils.renameResource(outRes,namespaceEtc+pattern+"_"+stmt.getObject().toString());
String value = ((Literal) stmt.getObject()).getLexicalForm();
String suffix = (pattern.contains("$$$"))
? pattern.replace("$$$", value)
: pattern + value;
ResourceUtils.renameResource(outRes, namespaceEtc + suffix);
}
doneSet.add(res.getId().toString());
}
@ -191,70 +195,58 @@ public class JenaIngestUtils {
}
public Map generatePropertyMap(String[] sourceModel, Model model, ModelMaker maker){
public Map<String, LinkedList<String>> generatePropertyMap(List<Model> sourceModels, ModelMaker maker){
Map<String,LinkedList<String>> propertyMap = Collections.synchronizedMap(new HashMap<String, LinkedList<String>>());
Set<String> doneList = new HashSet<String>();
if(sourceModel!=null && sourceModel.length!=0){
for(String modelName : sourceModel){
if(modelName != null){
model = maker.getModel(modelName);
ClosableIterator cItr = model.listSubjects();
while(cItr.hasNext()){
Resource res = (Resource) cItr.next();
if(res.isAnon() && !doneList.contains(res.getId())){
doneList.add(res.getId().toString());
StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null);
while(stmtItr.hasNext()){
Statement stmt = stmtItr.next();
if(!stmt.getObject().isResource()){
if(propertyMap.containsKey(stmt.getPredicate().getURI())){
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
else{
propertyMap.put(stmt.getPredicate().getURI(), new LinkedList());
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
}
for(Model model : sourceModels) {
ClosableIterator cItr = model.listSubjects();
while(cItr.hasNext()){
Resource res = (Resource) cItr.next();
if(res.isAnon() && !doneList.contains(res.getId())){
doneList.add(res.getId().toString());
StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null);
while(stmtItr.hasNext()){
Statement stmt = stmtItr.next();
if(!stmt.getObject().isResource()){
if(propertyMap.containsKey(stmt.getPredicate().getURI())){
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
}
else{
propertyMap.put(stmt.getPredicate().getURI(), new LinkedList());
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
}
}
cItr = model.listObjects();
while(cItr.hasNext()){
RDFNode rdfn = (RDFNode) cItr.next();
if(rdfn.isResource()){
Resource res = (Resource)rdfn;
if(res.isAnon() && !doneList.contains(res.getId())){
doneList.add(res.getId().toString());
StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null);
while(stmtItr.hasNext()){
Statement stmt = stmtItr.next();
if(!stmt.getObject().isResource()){
if(propertyMap.containsKey(stmt.getPredicate().getURI())){
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
else{
propertyMap.put(stmt.getPredicate().getURI(), new LinkedList());
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
}
}
}
}
}
cItr.close();
}
}
cItr = model.listObjects();
while(cItr.hasNext()){
RDFNode rdfn = (RDFNode) cItr.next();
if(rdfn.isResource()){
Resource res = (Resource)rdfn;
if(res.isAnon() && !doneList.contains(res.getId())){
doneList.add(res.getId().toString());
StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null);
while(stmtItr.hasNext()){
Statement stmt = stmtItr.next();
if(!stmt.getObject().isResource()){
if(propertyMap.containsKey(stmt.getPredicate().getURI())){
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
else{
propertyMap.put(stmt.getPredicate().getURI(), new LinkedList());
LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI());
linkList.add(stmt.getObject().toString());
}
}
}
}
}
}
cItr.close();
}
return propertyMap;
}

View file

@ -6,6 +6,8 @@ import java.io.IOException;
import java.io.OutputStream;
import java.io.PipedInputStream;
import java.io.PipedOutputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -18,7 +20,16 @@ import org.openrdf.repository.http.HTTPRepository;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParseException;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.vocabulary.OWL;
public class SesameSyncUtils {
@ -42,11 +53,16 @@ public class SesameSyncUtils {
} else {
myConn.clear();
}
PipedInputStream in = new PipedInputStream();
PipedOutputStream out = new PipedOutputStream(in);
try {
new Thread(new JenaOutputter(jenaModel, out), "SesameSyncUtilities.JenaOutputter").start();
new Thread(new JenaOutputter(jenaModel, out, myConn), "SesameSyncUtilities.JenaOutputter").start();
if (contextRes != null) {
myConn.add(in,"http://example.org/base/", RDFFormat.NTRIPLES, contextRes);
} else {
@ -69,19 +85,62 @@ public class SesameSyncUtils {
}
private List<String> getIndividualURIs(Model model) {
List<String> individualURIs = new ArrayList<String>();
String queryStr = "SELECT DISTINCT ?s WHERE { \n" +
" ?s a <" + OWL.Thing.getURI() + "> \n" +
"}";
Query query = QueryFactory.create(queryStr);
QueryExecution qe = QueryExecutionFactory.create(query, model);
try {
ResultSet rs = qe.execSelect();
while (rs.hasNext()) {
QuerySolution qsoln = rs.nextSolution();
String individualURI = qsoln.getResource("s").getURI();
if (individualURI != null) {
individualURIs.add(individualURI);
}
}
} finally {
qe.close();
}
return individualURIs;
}
private class JenaOutputter implements Runnable {
private Model model;
private OutputStream out;
private RepositoryConnection rconn;
public JenaOutputter(Model model, OutputStream out) {
public JenaOutputter(Model model, OutputStream out, RepositoryConnection rconn) {
this.model = model;
this.out = out;
this.rconn = rconn;
}
public void run() {
Model t = ModelFactory.createDefaultModel();
try {
model.write(out, "N-TRIPLE");
List<String> individualURIs = getIndividualURIs(model);
log.info(individualURIs.size() + " individuals to send to Sesame");
int i = 0;
for (String individualURI : individualURIs) {
t.removeAll();
t.add(model.listStatements(
model.getResource(
individualURI), null, (RDFNode) null));
t.write(out, "N-TRIPLE");
i++;
if (i % 100 == 0) {
try {
rconn.commit();
} catch (Throwable e) {
log.error(e, e);
}
log.info(i + " individuals sent to Sesame");
}
}
} finally {
try {
out.flush();