diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/jena/JenaIngestController.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/jena/JenaIngestController.java index ba1cb7a84..bcc8f0004 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/jena/JenaIngestController.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/controller/jena/JenaIngestController.java @@ -80,6 +80,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.event.EditEvent; import edu.cornell.mannlib.vitro.webapp.servlet.setup.JenaDataSourceSetup; import edu.cornell.mannlib.vitro.webapp.utils.SparqlQueryUtils; import edu.cornell.mannlib.vitro.webapp.utils.jena.JenaIngestUtils; +import edu.cornell.mannlib.vitro.webapp.utils.jena.JenaIngestUtils.MergeResult; import edu.cornell.mannlib.vitro.webapp.utils.jena.JenaIngestWorkflowProcessor; import edu.cornell.mannlib.vitro.webapp.utils.jena.JenaOutputUtils; import edu.cornell.mannlib.vitro.webapp.utils.jena.WorkflowOntology; @@ -621,23 +622,25 @@ public class JenaIngestController extends BaseEditController { if(uri1!=null){ JenaIngestUtils utils = new JenaIngestUtils(); /* - * get baseOnt, Ont and infOnt models + * get baseOnt and infOnt models */ - OntModel baseOntModel = (OntModel) getServletContext().getAttribute("baseOntModel"); - OntModel ontModel = (OntModel) - getServletContext().getAttribute("jenaOntModel"); - OntModel infOntModel = (OntModel) - getServletContext().getAttribute(JenaBaseDao.INFERENCE_ONT_MODEL_ATTRIBUTE_NAME); + OntModel baseOntModel = ModelContext.getBaseOntModel( + getServletContext()); + OntModel tboxOntModel = ModelContext.getUnionOntModelSelector( + getServletContext()).getTBoxModel(); + /* * calling method that does the merge operation. */ - String result = utils.doMerge(uri1,uri2,baseOntModel,ontModel,infOntModel,usePrimaryLabelOnly); - vreq.getSession().setAttribute("leftoverModel", utils.getLeftOverModel()); - vreq.setAttribute("result",result); - vreq.setAttribute("title","Merge Resources"); - vreq.setAttribute("bodyJsp",MERGE_RESULT); - } - else{ + MergeResult result = utils.doMerge( + uri1, uri2, baseOntModel, tboxOntModel, usePrimaryLabelOnly); + + vreq.getSession().setAttribute( + "leftoverModel", result.getLeftoverModel()); + vreq.setAttribute("result", result); + vreq.setAttribute("title", "Merge Resources"); + vreq.setAttribute("bodyJsp", MERGE_RESULT); + } else{ vreq.setAttribute("title","Merge Resources"); vreq.setAttribute("bodyJsp",MERGE_RESOURCES); } @@ -1192,7 +1195,7 @@ public class JenaIngestController extends BaseEditController { ontModel.leaveCriticalSection(); } if(!namespacePresent){ - result = "0 resource renamed"; + result = "no resources renamed"; return result; } for( String oldURIStr : urisToChange){ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/jena/JenaIngestUtils.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/jena/JenaIngestUtils.java index b2d049f67..71589eb66 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/jena/JenaIngestUtils.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/utils/jena/JenaIngestUtils.java @@ -2,7 +2,6 @@ package edu.cornell.mannlib.vitro.webapp.utils.jena; -import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Collections; @@ -16,15 +15,12 @@ import java.util.Random; import java.util.Set; import java.util.regex.Pattern; -import javax.servlet.http.HttpServletRequest; - import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.openrdf.model.vocabulary.RDFS; +import com.hp.hpl.jena.ontology.FunctionalProperty; import com.hp.hpl.jena.ontology.OntModel; import com.hp.hpl.jena.ontology.OntModelSpec; -import com.hp.hpl.jena.ontology.OntProperty; import com.hp.hpl.jena.rdf.model.Literal; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.ModelFactory; @@ -41,785 +37,803 @@ import com.hp.hpl.jena.util.ResourceUtils; import com.hp.hpl.jena.util.iterator.ClosableIterator; import com.hp.hpl.jena.vocabulary.OWL; import com.hp.hpl.jena.vocabulary.RDF; +import com.hp.hpl.jena.vocabulary.RDFS; import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest; import edu.cornell.mannlib.vitro.webapp.dao.InsertException; import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; -import edu.cornell.mannlib.vitro.webapp.dao.jena.JenaBaseDao; public class JenaIngestUtils { - + private static final Log log = LogFactory.getLog(JenaIngestUtils.class.getName()); - private Random random = new Random(System.currentTimeMillis()); + private Random random = new Random(System.currentTimeMillis()); - /** - * Returns a new copy of the input model with blank nodes renamed with namespaceEtc plus a random int. - * @param namespaceEtc - * @return - */ - public Model renameBNodes(Model inModel, String namespaceEtc) { - return renameBNodes(inModel, namespaceEtc, null); - } - - /** - * Returns a new copy of the input model with blank nodes renamed with namespaceEtc plus a random int. - * Will prevent URI collisions with supplied dedupModel - * @param namespaceEtc - * @return - */ - public Model renameBNodes(Model inModel, String namespaceEtc, Model dedupModel) { - Model outModel = ModelFactory.createDefaultModel(); - OntModel dedupUnionModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM); // we're not using OWL here, just the OntModel submodel infrastructure - dedupUnionModel.addSubModel(outModel); - if (dedupModel != null) { - dedupUnionModel.addSubModel(dedupModel); - } - // the dedupUnionModel is so we can guard against reusing a URI in an - // existing model, as well as in the course of running this process - inModel.enterCriticalSection(Lock.READ); - Set doneSet = new HashSet(); - try { - outModel.add(inModel); - ClosableIterator closeIt = inModel.listSubjects(); - try { - for (Iterator it = closeIt; it.hasNext();) { - Resource res = (Resource) it.next(); - if (res.isAnon() && !(doneSet.contains(res.getId()))) { - // now we do something hacky to get the same resource in the outModel, since there's no getResourceById(); - ClosableIterator closfIt = outModel.listStatements(res,(Property)null,(RDFNode)null); - Statement stmt = null; - try { - if (closfIt.hasNext()) { - stmt = (Statement) closfIt.next(); - } - } finally { - closfIt.close(); - } - if (stmt != null) { - Resource outRes = stmt.getSubject(); - ResourceUtils.renameResource(outRes,getNextURI(namespaceEtc,dedupUnionModel)); - doneSet.add(res.getId().toString()); - } - } - } - } finally { - closeIt.close(); - } - closeIt = inModel.listObjects(); - try { - for (Iterator it = closeIt; it.hasNext();) { - RDFNode rdfn = (RDFNode) it.next(); - if (rdfn.isResource()) { - Resource res = (Resource) rdfn; - if (res.isAnon() && !(doneSet.contains(res.getId()))) { - // now we do something hacky to get the same resource in the outModel, since there's no getResourceById(); - ClosableIterator closfIt = outModel.listStatements((Resource)null,(Property)null,res); - Statement stmt = null; - try { - if (closfIt.hasNext()) { - stmt = (Statement) closfIt.next(); - } - } finally { - closfIt.close(); - } - if (stmt != null) { - Resource outRes = stmt.getSubject(); - ResourceUtils.renameResource(outRes,getNextURI(namespaceEtc, dedupUnionModel)); - doneSet.add(res.getId().toString()); - } - } - } - } - } finally { - closeIt.close(); - } - } finally { - inModel.leaveCriticalSection(); - } - return outModel; - } - - public Model renameBNodesByPattern(Model inModel, String namespaceEtc, Model dedupModel, String pattern, String property){ - Model outModel = ModelFactory.createDefaultModel(); - Property propertyRes = ResourceFactory.createProperty(property); - OntModel dedupUnionModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM); // we're not using OWL here, just the OntModel submodel infrastructure - dedupUnionModel.addSubModel(outModel); - if (dedupModel != null) { - dedupUnionModel.addSubModel(dedupModel); - } - // the dedupUnionModel is so we can guard against reusing a URI in an - // existing model, as well as in the course of running this process - inModel.enterCriticalSection(Lock.READ); - Set doneSet = new HashSet(); - - try { - outModel.add(inModel); - ClosableIterator closeIt = inModel.listSubjects(); - try { - for (Iterator it = closeIt; it.hasNext();) { - Resource res = (Resource) it.next(); - if (res.isAnon() && !(doneSet.contains(res.getId()))) { - // now we do something hacky to get the same resource in the outModel, since there's no getResourceById(); - ClosableIterator closfIt = outModel.listStatements(res,propertyRes,(RDFNode)null); - Statement stmt = null; - try { - if (closfIt.hasNext()) { - stmt = (Statement) closfIt.next(); - } - } finally { - closfIt.close(); - } - if (stmt != null) { - Resource outRes = stmt.getSubject(); - if(stmt.getObject().isLiteral()){ - String value = ((Literal) stmt.getObject()).getLexicalForm(); - String suffix = (pattern.contains("$$$")) - ? pattern.replace("$$$", value) - : pattern + value; - ResourceUtils.renameResource(outRes, namespaceEtc + suffix); - } - doneSet.add(res.getId().toString()); - } - } - } - } finally { - closeIt.close(); - } - } finally { - inModel.leaveCriticalSection(); - } - - - return outModel; - - } - - public Map> generatePropertyMap(List sourceModels, ModelMaker maker){ - Map> propertyMap = Collections.synchronizedMap(new HashMap>()); - Set doneList = new HashSet(); - for(Model model : sourceModels) { - ClosableIterator cItr = model.listSubjects(); - while(cItr.hasNext()){ - Resource res = (Resource) cItr.next(); - if(res.isAnon() && !doneList.contains(res.getId())){ - doneList.add(res.getId().toString()); - StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null); - while(stmtItr.hasNext()){ - Statement stmt = stmtItr.next(); - if(!stmt.getObject().isResource()){ - if(propertyMap.containsKey(stmt.getPredicate().getURI())){ - LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); - linkList.add(stmt.getObject().toString()); - } - else{ - propertyMap.put(stmt.getPredicate().getURI(), new LinkedList()); - LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); - linkList.add(stmt.getObject().toString()); - } - } - } - } - } - cItr = model.listObjects(); - while(cItr.hasNext()){ - RDFNode rdfn = (RDFNode) cItr.next(); - if(rdfn.isResource()){ - Resource res = (Resource)rdfn; - if(res.isAnon() && !doneList.contains(res.getId())){ - doneList.add(res.getId().toString()); - StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null); - while(stmtItr.hasNext()){ - Statement stmt = stmtItr.next(); - if(!stmt.getObject().isResource()){ - if(propertyMap.containsKey(stmt.getPredicate().getURI())){ - LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); - linkList.add(stmt.getObject().toString()); - } - else{ - propertyMap.put(stmt.getPredicate().getURI(), new LinkedList()); - LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); - linkList.add(stmt.getObject().toString()); - } - } - } - } - } - } - cItr.close(); - } - return propertyMap; - } - - private String getNextURI(String namespaceEtc, Model model) { - String nextURI = null; - boolean duplicate = true; - while (duplicate) { - nextURI = namespaceEtc+random.nextInt(9999999); - Resource res = ResourceFactory.createResource(nextURI); - duplicate = false; - ClosableIterator closeIt = model.listStatements(res, (Property)null, (RDFNode)null); - try { - if (closeIt.hasNext()) { - duplicate = true; - } - } finally { - closeIt.close(); - } - if (duplicate == false) { - closeIt = model.listStatements((Resource)null, (Property)null, res); - try { - if (closeIt.hasNext()) { - duplicate = true; - } - } finally { - closeIt.close(); - } - } - } - return nextURI; - } - - public void processPropertyValueStrings(Model source, Model destination, Model additions, Model retractions, - String processorClass, String processorMethod, String originalPropertyURI, String newPropertyURI) { - Model additionsModel = ModelFactory.createDefaultModel(); - Model retractionsModel = ModelFactory.createDefaultModel(); - Class stringProcessorClass = null; - Object processor = null; - Class[] methArgs = {String.class}; - Method meth = null; - try { - stringProcessorClass = Class.forName(processorClass); - processor = stringProcessorClass.newInstance(); - meth = stringProcessorClass.getMethod(processorMethod,methArgs); - } catch (Exception e) { - log.error(e, e); - return; - } - Property prop = ResourceFactory.createProperty(originalPropertyURI); - Property newProp = ResourceFactory.createProperty(newPropertyURI); - source.enterCriticalSection(Lock.READ); - try { - ClosableIterator closeIt = source.listStatements((Resource)null,prop,(RDFNode)null); - for (Iterator stmtIt = closeIt; stmtIt.hasNext(); ) { - Statement stmt = (Statement) stmtIt.next(); - if (stmt.getObject().isLiteral()) { - Literal lit = (Literal) stmt.getObject(); - String lex = lit.getLexicalForm(); - Object[] args = {lex}; - String newLex = null; - try { - if (log.isDebugEnabled()) { - log.debug("invoking string processor method on ["+lex.substring(0,lex.length()>50 ? 50 : lex.length())+"..."); - } - newLex = (String) meth.invoke(processor,args); - } catch (Exception e) { - log.error(e, e); - return; - } - if (!newLex.equals(lex)) { - retractionsModel.add(stmt); - Literal newLit = null; - if (lit.getLanguage()!=null && lit.getLanguage().length()>0) { - newLit = additionsModel.createLiteral(newLex,lit.getLanguage()); - } else if (lit.getDatatype() != null) { - newLit = additionsModel.createTypedLiteral(newLex,lit.getDatatype()); - } else { - newLit = additionsModel.createLiteral(newLex); - } - additionsModel.add(stmt.getSubject(),newProp,newLit); - } - } - } - if (destination != null) { - destination.enterCriticalSection(Lock.WRITE); - try { - destination.add(additionsModel); - destination.remove(retractionsModel); - } finally { - destination.leaveCriticalSection(); - } - } - if (additions != null) { - additions.enterCriticalSection(Lock.WRITE); - try { - additions.add(additionsModel); - } finally { - additions.leaveCriticalSection(); - } - } - if (retractions != null) { - retractions.enterCriticalSection(Lock.WRITE); - try { - retractions.add(retractionsModel); - } finally { - retractions.leaveCriticalSection(); - } - } - } finally { - source.leaveCriticalSection(); - } - } - - /** - * Splits values for a given data property URI on a supplied regex and - * asserts each value using newPropertyURI. New statements returned in - * a Jena Model. Split values may be optionally trim()ed. - * @param inModel - * @param propertyURI - * @param splitRegex - * @param newPropertyURI - * @param trim - * @return outModel - */ - public Model splitPropertyValues(Model inModel, String propertyURI, String splitRegex, String newPropertyURI, boolean trim) { - Model outModel = ModelFactory.createDefaultModel(); - Pattern delimiterPattern = Pattern.compile(splitRegex); - Property theProp = ResourceFactory.createProperty(propertyURI); - Property newProp = ResourceFactory.createProperty(newPropertyURI); - inModel.enterCriticalSection(Lock.READ); - try { - StmtIterator stmtIt = inModel.listStatements( (Resource)null, theProp, (RDFNode)null ); - try { - while(stmtIt.hasNext()) { - Statement stmt = stmtIt.nextStatement(); - Resource subj = stmt.getSubject(); - RDFNode obj = stmt.getObject(); - if (obj.isLiteral()) { - Literal lit = (Literal) obj; - String unsplitStr = lit.getLexicalForm(); - String[] splitPieces = delimiterPattern.split(unsplitStr); - for (int i=0; i 0) { - Literal newLiteral = null; - if (lit.getDatatype() != null) { - newLiteral = outModel.createTypedLiteral(newLexicalForm, lit.getDatatype()); - } else { - if (lit.getLanguage() != null) { - newLiteral = outModel.createLiteral(newLexicalForm, lit.getLanguage()); - } else { - newLiteral = outModel.createLiteral(newLexicalForm); - } - } - outModel.add(subj,newProp,newLiteral); - } - } - } - } - } finally { - stmtIt.close(); - } - } finally { - inModel.leaveCriticalSection(); - } - return outModel; - } - - /** - * A simple resource smusher based on a supplied inverse-functional property. - * A new model containing only resources about the smushed statements is returned. - * @param inModel - * @param prop - * @return - */ - public Model smushResources(Model inModel, Property prop) { - Model outModel = ModelFactory.createDefaultModel(); - outModel.add(inModel); - inModel.enterCriticalSection(Lock.READ); - try { - ClosableIterator closeIt = inModel.listObjectsOfProperty(prop); - try { - for (Iterator objIt = closeIt; objIt.hasNext();) { - RDFNode rdfn = (RDFNode) objIt.next(); - ClosableIterator closfIt = inModel.listSubjectsWithProperty(prop, rdfn); - try { - boolean first = true; - Resource smushToThisResource = null; - for (Iterator subjIt = closfIt; closfIt.hasNext();) { - Resource subj = (Resource) subjIt.next(); - if (first) { - smushToThisResource = subj; - first = false; - continue; - } - - ClosableIterator closgIt = inModel.listStatements(subj,(Property)null,(RDFNode)null); - try { - for (Iterator stmtIt = closgIt; stmtIt.hasNext();) { - Statement stmt = (Statement) stmtIt.next(); - outModel.remove(stmt.getSubject(), stmt.getPredicate(), stmt.getObject()); - outModel.add(smushToThisResource, stmt.getPredicate(), stmt.getObject()); - } - } finally { - closgIt.close(); - } - closgIt = inModel.listStatements((Resource) null, (Property)null, subj); - try { - for (Iterator stmtIt = closgIt; stmtIt.hasNext();) { - Statement stmt = (Statement) stmtIt.next(); - outModel.remove(stmt.getSubject(), stmt.getPredicate(), stmt.getObject()); - outModel.add(stmt.getSubject(), stmt.getPredicate(), smushToThisResource); - } - } finally { - closgIt.close(); - } - } - } finally { - closfIt.close(); - } - } - } finally { - closeIt.close(); - } - } finally { - inModel.leaveCriticalSection(); - } - return outModel; - } - - /** - * Returns a model where redundant individuals that are sameAs one another are smushed - * using URIs in preferred namespaces where possible. - * @param model - * @param preferredIndividualNamespace - * @return - */ - public Model dedupAndExtract( Model model, String preferredNamespace ) { - Model extractsModel = ModelFactory.createDefaultModel(); - - HashMap rewriteURIUsing = new HashMap(); - - Iterator haveSameAsIt = model.listSubjectsWithProperty(OWL.sameAs); - while (haveSameAsIt.hasNext()) { - String preferredURI = null; - Resource hasSameAs = (Resource) haveSameAsIt.next(); - List sameAsList = hasSameAs.listProperties(OWL.sameAs).toList(); - if (sameAsList.size()>1) { // if sameAs something other than the same URI (we assume reasoning model) - List sameAsURIs = new LinkedList(); - Iterator sameAsStmtIt = sameAsList.iterator(); - for (int i=0; i0 && functionalPresent) - result = "merged " + counter + " statements. Some statements could not be merged."; - else if(counter>0 && !functionalPresent) - result = "merged " + counter + " statements."; - else if(counter==0) - result = "No statements merged"; - return result; - - } - private Model leftoverModel = ModelFactory.createDefaultModel(); - - public void setLeftOverModel(Model leftoverModel){ - this.leftoverModel = leftoverModel; - } - public Model getLeftOverModel(){ - return this.leftoverModel; - } - - public void doPermanentURI(String oldModel,String newModel,String oldNamespace, - String newNamespace,String dNamespace,ModelMaker maker,VitroRequest vreq){ - - - WebappDaoFactory wdf = vreq.getFullWebappDaoFactory(); - Model m = maker.getModel(oldModel); - Model saveModel = maker.getModel(newModel); - Model tempModel = ModelFactory.createDefaultModel(); - ResIterator rsItr = null; - ArrayList urlCheck = new ArrayList(); - String changeNamespace = null; - boolean urlFound = false; - if(!oldModel.equals(newModel)){ - StmtIterator stmtItr = m.listStatements(); - while(stmtItr.hasNext()){ - Statement stmt = stmtItr.nextStatement(); - tempModel.add(stmt); - } - rsItr = tempModel.listResourcesWithProperty((Property)null); - } - else{ - rsItr = m.listResourcesWithProperty((Property)null); - } - - String uri = null; - while(rsItr.hasNext()){ - Resource res = rsItr.next(); - if(oldNamespace.equals(res.getNameSpace())){ - if(!newNamespace.equals("")){ - do{ - uri = getUnusedURI(newNamespace,wdf); - if(!urlCheck.contains(uri)){ - urlCheck.add(uri); - urlFound = true; - } - }while(!urlFound); - urlFound = false; - } - else if(dNamespace.equals(vreq.getFullWebappDaoFactory().getDefaultNamespace())){ - try{ - do{ - uri = wdf.getIndividualDao().getUnusedURI(null); - if(!urlCheck.contains(uri)){ - urlCheck.add(uri); - urlFound = true; - } - }while(!urlFound); - urlFound = false; - }catch(InsertException ex){ - log.error("could not create uri"); - } - } - ResourceUtils.renameResource(res, uri); - } - - } - boolean statementDone = false; - if(!newNamespace.equals("")){ - changeNamespace = newNamespace; - } - else if(dNamespace.equals(vreq.getFullWebappDaoFactory().getDefaultNamespace())){ - changeNamespace = dNamespace; - } - if(!oldModel.equals(newModel)){ - StmtIterator stmtItr = tempModel.listStatements(); - while(stmtItr.hasNext()){ - statementDone = false; - Statement stmt = stmtItr.nextStatement(); - Resource sRes = stmt.getSubject(); - Resource oRes = null; - if(sRes.getNameSpace().equals(changeNamespace)){ - saveModel.add(stmt); - statementDone = true; - } - try{ - oRes = (Resource)stmt.getObject(); - if(oRes.getNameSpace().equals(changeNamespace) && !statementDone){ - saveModel.add(stmt); - statementDone = true; - } - } - catch(Exception e){ - continue; - } - } - - } - } - public String getUnusedURI(String newNamespace,WebappDaoFactory wdf){ - String uri = null; - String errMsg = null; - Random random = new Random(); - boolean uriIsGood = false; + /** + * Returns a new copy of the input model with blank nodes renamed with namespaceEtc plus a random int. + * @param namespaceEtc + * @return + */ + public Model renameBNodes(Model inModel, String namespaceEtc) { + return renameBNodes(inModel, namespaceEtc, null); + } + + /** + * Returns a new copy of the input model with blank nodes renamed with namespaceEtc plus a random int. + * Will prevent URI collisions with supplied dedupModel + * @param namespaceEtc + * @return + */ + public Model renameBNodes(Model inModel, String namespaceEtc, Model dedupModel) { + Model outModel = ModelFactory.createDefaultModel(); + OntModel dedupUnionModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM); // we're not using OWL here, just the OntModel submodel infrastructure + dedupUnionModel.addSubModel(outModel); + if (dedupModel != null) { + dedupUnionModel.addSubModel(dedupModel); + } + // the dedupUnionModel is so we can guard against reusing a URI in an + // existing model, as well as in the course of running this process + inModel.enterCriticalSection(Lock.READ); + Set doneSet = new HashSet(); + try { + outModel.add(inModel); + ClosableIterator closeIt = inModel.listSubjects(); + try { + for (Iterator it = closeIt; it.hasNext();) { + Resource res = (Resource) it.next(); + if (res.isAnon() && !(doneSet.contains(res.getId()))) { + // now we do something hacky to get the same resource in the outModel, since there's no getResourceById(); + ClosableIterator closfIt = outModel.listStatements(res,(Property)null,(RDFNode)null); + Statement stmt = null; + try { + if (closfIt.hasNext()) { + stmt = (Statement) closfIt.next(); + } + } finally { + closfIt.close(); + } + if (stmt != null) { + Resource outRes = stmt.getSubject(); + ResourceUtils.renameResource(outRes,getNextURI(namespaceEtc,dedupUnionModel)); + doneSet.add(res.getId().toString()); + } + } + } + } finally { + closeIt.close(); + } + closeIt = inModel.listObjects(); + try { + for (Iterator it = closeIt; it.hasNext();) { + RDFNode rdfn = (RDFNode) it.next(); + if (rdfn.isResource()) { + Resource res = (Resource) rdfn; + if (res.isAnon() && !(doneSet.contains(res.getId()))) { + // now we do something hacky to get the same resource in the outModel, since there's no getResourceById(); + ClosableIterator closfIt = outModel.listStatements((Resource)null,(Property)null,res); + Statement stmt = null; + try { + if (closfIt.hasNext()) { + stmt = (Statement) closfIt.next(); + } + } finally { + closfIt.close(); + } + if (stmt != null) { + Resource outRes = stmt.getSubject(); + ResourceUtils.renameResource(outRes,getNextURI(namespaceEtc, dedupUnionModel)); + doneSet.add(res.getId().toString()); + } + } + } + } + } finally { + closeIt.close(); + } + } finally { + inModel.leaveCriticalSection(); + } + return outModel; + } + + public Model renameBNodesByPattern(Model inModel, String namespaceEtc, Model dedupModel, String pattern, String property){ + Model outModel = ModelFactory.createDefaultModel(); + Property propertyRes = ResourceFactory.createProperty(property); + OntModel dedupUnionModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM); // we're not using OWL here, just the OntModel submodel infrastructure + dedupUnionModel.addSubModel(outModel); + if (dedupModel != null) { + dedupUnionModel.addSubModel(dedupModel); + } + // the dedupUnionModel is so we can guard against reusing a URI in an + // existing model, as well as in the course of running this process + inModel.enterCriticalSection(Lock.READ); + Set doneSet = new HashSet(); + + try { + outModel.add(inModel); + ClosableIterator closeIt = inModel.listSubjects(); + try { + for (Iterator it = closeIt; it.hasNext();) { + Resource res = (Resource) it.next(); + if (res.isAnon() && !(doneSet.contains(res.getId()))) { + // now we do something hacky to get the same resource in the outModel, since there's no getResourceById(); + ClosableIterator closfIt = outModel.listStatements(res,propertyRes,(RDFNode)null); + Statement stmt = null; + try { + if (closfIt.hasNext()) { + stmt = (Statement) closfIt.next(); + } + } finally { + closfIt.close(); + } + if (stmt != null) { + Resource outRes = stmt.getSubject(); + if(stmt.getObject().isLiteral()){ + String value = ((Literal) stmt.getObject()).getLexicalForm(); + String suffix = (pattern.contains("$$$")) + ? pattern.replace("$$$", value) + : pattern + value; + ResourceUtils.renameResource(outRes, namespaceEtc + suffix); + } + doneSet.add(res.getId().toString()); + } + } + } + } finally { + closeIt.close(); + } + } finally { + inModel.leaveCriticalSection(); + } + + + return outModel; + + } + + public Map> generatePropertyMap(List sourceModels, ModelMaker maker){ + Map> propertyMap = Collections.synchronizedMap(new HashMap>()); + Set doneList = new HashSet(); + for(Model model : sourceModels) { + ClosableIterator cItr = model.listSubjects(); + while(cItr.hasNext()){ + Resource res = (Resource) cItr.next(); + if(res.isAnon() && !doneList.contains(res.getId())){ + doneList.add(res.getId().toString()); + StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null); + while(stmtItr.hasNext()){ + Statement stmt = stmtItr.next(); + if(!stmt.getObject().isResource()){ + if(propertyMap.containsKey(stmt.getPredicate().getURI())){ + LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); + linkList.add(stmt.getObject().toString()); + } + else{ + propertyMap.put(stmt.getPredicate().getURI(), new LinkedList()); + LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); + linkList.add(stmt.getObject().toString()); + } + } + } + } + } + cItr = model.listObjects(); + while(cItr.hasNext()){ + RDFNode rdfn = (RDFNode) cItr.next(); + if(rdfn.isResource()){ + Resource res = (Resource)rdfn; + if(res.isAnon() && !doneList.contains(res.getId())){ + doneList.add(res.getId().toString()); + StmtIterator stmtItr = model.listStatements(res, (Property)null, (RDFNode)null); + while(stmtItr.hasNext()){ + Statement stmt = stmtItr.next(); + if(!stmt.getObject().isResource()){ + if(propertyMap.containsKey(stmt.getPredicate().getURI())){ + LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); + linkList.add(stmt.getObject().toString()); + } + else{ + propertyMap.put(stmt.getPredicate().getURI(), new LinkedList()); + LinkedList linkList = propertyMap.get(stmt.getPredicate().getURI()); + linkList.add(stmt.getObject().toString()); + } + } + } + } + } + } + cItr.close(); + } + return propertyMap; + } + + private String getNextURI(String namespaceEtc, Model model) { + String nextURI = null; + boolean duplicate = true; + while (duplicate) { + nextURI = namespaceEtc+random.nextInt(9999999); + Resource res = ResourceFactory.createResource(nextURI); + duplicate = false; + ClosableIterator closeIt = model.listStatements(res, (Property)null, (RDFNode)null); + try { + if (closeIt.hasNext()) { + duplicate = true; + } + } finally { + closeIt.close(); + } + if (duplicate == false) { + closeIt = model.listStatements((Resource)null, (Property)null, res); + try { + if (closeIt.hasNext()) { + duplicate = true; + } + } finally { + closeIt.close(); + } + } + } + return nextURI; + } + + public void processPropertyValueStrings(Model source, Model destination, Model additions, Model retractions, + String processorClass, String processorMethod, String originalPropertyURI, String newPropertyURI) { + Model additionsModel = ModelFactory.createDefaultModel(); + Model retractionsModel = ModelFactory.createDefaultModel(); + Class stringProcessorClass = null; + Object processor = null; + Class[] methArgs = {String.class}; + Method meth = null; + try { + stringProcessorClass = Class.forName(processorClass); + processor = stringProcessorClass.newInstance(); + meth = stringProcessorClass.getMethod(processorMethod,methArgs); + } catch (Exception e) { + log.error(e, e); + return; + } + Property prop = ResourceFactory.createProperty(originalPropertyURI); + Property newProp = ResourceFactory.createProperty(newPropertyURI); + source.enterCriticalSection(Lock.READ); + try { + ClosableIterator closeIt = source.listStatements((Resource)null,prop,(RDFNode)null); + for (Iterator stmtIt = closeIt; stmtIt.hasNext(); ) { + Statement stmt = (Statement) stmtIt.next(); + if (stmt.getObject().isLiteral()) { + Literal lit = (Literal) stmt.getObject(); + String lex = lit.getLexicalForm(); + Object[] args = {lex}; + String newLex = null; + try { + if (log.isDebugEnabled()) { + log.debug("invoking string processor method on ["+lex.substring(0,lex.length()>50 ? 50 : lex.length())+"..."); + } + newLex = (String) meth.invoke(processor,args); + } catch (Exception e) { + log.error(e, e); + return; + } + if (!newLex.equals(lex)) { + retractionsModel.add(stmt); + Literal newLit = null; + if (lit.getLanguage()!=null && lit.getLanguage().length()>0) { + newLit = additionsModel.createLiteral(newLex,lit.getLanguage()); + } else if (lit.getDatatype() != null) { + newLit = additionsModel.createTypedLiteral(newLex,lit.getDatatype()); + } else { + newLit = additionsModel.createLiteral(newLex); + } + additionsModel.add(stmt.getSubject(),newProp,newLit); + } + } + } + if (destination != null) { + destination.enterCriticalSection(Lock.WRITE); + try { + destination.add(additionsModel); + destination.remove(retractionsModel); + } finally { + destination.leaveCriticalSection(); + } + } + if (additions != null) { + additions.enterCriticalSection(Lock.WRITE); + try { + additions.add(additionsModel); + } finally { + additions.leaveCriticalSection(); + } + } + if (retractions != null) { + retractions.enterCriticalSection(Lock.WRITE); + try { + retractions.add(retractionsModel); + } finally { + retractions.leaveCriticalSection(); + } + } + } finally { + source.leaveCriticalSection(); + } + } + + /** + * Splits values for a given data property URI on a supplied regex and + * asserts each value using newPropertyURI. New statements returned in + * a Jena Model. Split values may be optionally trim()ed. + * @param inModel + * @param propertyURI + * @param splitRegex + * @param newPropertyURI + * @param trim + * @return outModel + */ + public Model splitPropertyValues(Model inModel, String propertyURI, String splitRegex, String newPropertyURI, boolean trim) { + Model outModel = ModelFactory.createDefaultModel(); + Pattern delimiterPattern = Pattern.compile(splitRegex); + Property theProp = ResourceFactory.createProperty(propertyURI); + Property newProp = ResourceFactory.createProperty(newPropertyURI); + inModel.enterCriticalSection(Lock.READ); + try { + StmtIterator stmtIt = inModel.listStatements( (Resource)null, theProp, (RDFNode)null ); + try { + while(stmtIt.hasNext()) { + Statement stmt = stmtIt.nextStatement(); + Resource subj = stmt.getSubject(); + RDFNode obj = stmt.getObject(); + if (obj.isLiteral()) { + Literal lit = (Literal) obj; + String unsplitStr = lit.getLexicalForm(); + String[] splitPieces = delimiterPattern.split(unsplitStr); + for (int i=0; i 0) { + Literal newLiteral = null; + if (lit.getDatatype() != null) { + newLiteral = outModel.createTypedLiteral(newLexicalForm, lit.getDatatype()); + } else { + if (lit.getLanguage() != null) { + newLiteral = outModel.createLiteral(newLexicalForm, lit.getLanguage()); + } else { + newLiteral = outModel.createLiteral(newLexicalForm); + } + } + outModel.add(subj,newProp,newLiteral); + } + } + } + } + } finally { + stmtIt.close(); + } + } finally { + inModel.leaveCriticalSection(); + } + return outModel; + } + + /** + * A simple resource smusher based on a supplied inverse-functional property. + * A new model containing only resources about the smushed statements is returned. + * @param inModel + * @param prop + * @return + */ + public Model smushResources(Model inModel, Property prop) { + Model outModel = ModelFactory.createDefaultModel(); + outModel.add(inModel); + inModel.enterCriticalSection(Lock.READ); + try { + ClosableIterator closeIt = inModel.listObjectsOfProperty(prop); + try { + for (Iterator objIt = closeIt; objIt.hasNext();) { + RDFNode rdfn = (RDFNode) objIt.next(); + ClosableIterator closfIt = inModel.listSubjectsWithProperty(prop, rdfn); + try { + boolean first = true; + Resource smushToThisResource = null; + for (Iterator subjIt = closfIt; closfIt.hasNext();) { + Resource subj = (Resource) subjIt.next(); + if (first) { + smushToThisResource = subj; + first = false; + continue; + } + + ClosableIterator closgIt = inModel.listStatements(subj,(Property)null,(RDFNode)null); + try { + for (Iterator stmtIt = closgIt; stmtIt.hasNext();) { + Statement stmt = (Statement) stmtIt.next(); + outModel.remove(stmt.getSubject(), stmt.getPredicate(), stmt.getObject()); + outModel.add(smushToThisResource, stmt.getPredicate(), stmt.getObject()); + } + } finally { + closgIt.close(); + } + closgIt = inModel.listStatements((Resource) null, (Property)null, subj); + try { + for (Iterator stmtIt = closgIt; stmtIt.hasNext();) { + Statement stmt = (Statement) stmtIt.next(); + outModel.remove(stmt.getSubject(), stmt.getPredicate(), stmt.getObject()); + outModel.add(stmt.getSubject(), stmt.getPredicate(), smushToThisResource); + } + } finally { + closgIt.close(); + } + } + } finally { + closfIt.close(); + } + } + } finally { + closeIt.close(); + } + } finally { + inModel.leaveCriticalSection(); + } + return outModel; + } + + /** + * Returns a model where redundant individuals that are sameAs one another are smushed + * using URIs in preferred namespaces where possible. + * @param model + * @param preferredIndividualNamespace + * @return + */ + public Model dedupAndExtract( Model model, String preferredNamespace ) { + Model extractsModel = ModelFactory.createDefaultModel(); + + HashMap rewriteURIUsing = new HashMap(); + + Iterator haveSameAsIt = model.listSubjectsWithProperty(OWL.sameAs); + while (haveSameAsIt.hasNext()) { + String preferredURI = null; + Resource hasSameAs = (Resource) haveSameAsIt.next(); + List sameAsList = hasSameAs.listProperties(OWL.sameAs).toList(); + if (sameAsList.size()>1) { // if sameAs something other than the same URI (we assume reasoning model) + List sameAsURIs = new LinkedList(); + Iterator sameAsStmtIt = sameAsList.iterator(); + for (int i=0; i 0 && functionalPresent) { + result.setResultText("merged " + counter + + " statements. Some statements could not be merged."); + } else if(counter>0 && !functionalPresent) { + result.setResultText("merged " + counter + " statements."); + } else if (counter==0) { + result.setResultText("No statements merged"); + } + return result; + + } + + private boolean isFunctional(Property property, OntModel tboxOntModel) { + tboxOntModel.enterCriticalSection(Lock.READ); + try { + return (tboxOntModel.contains( + property, RDF.type, OWL.FunctionalProperty)); + } finally { + tboxOntModel.leaveCriticalSection(); + } + } + + public void doPermanentURI(String oldModel, String newModel, String oldNamespace, + String newNamespace, String dNamespace, ModelMaker maker, + VitroRequest vreq) { + + WebappDaoFactory wdf = vreq.getFullWebappDaoFactory(); + Model m = maker.getModel(oldModel); + Model saveModel = maker.getModel(newModel); + Model tempModel = ModelFactory.createDefaultModel(); + ResIterator rsItr = null; + ArrayList urlCheck = new ArrayList(); + String changeNamespace = null; + boolean urlFound = false; + if(!oldModel.equals(newModel)){ + StmtIterator stmtItr = m.listStatements(); + while(stmtItr.hasNext()){ + Statement stmt = stmtItr.nextStatement(); + tempModel.add(stmt); + } + rsItr = tempModel.listResourcesWithProperty((Property)null); + } else{ + rsItr = m.listResourcesWithProperty((Property)null); + } + + String uri = null; + while(rsItr.hasNext()){ + Resource res = rsItr.next(); + if(oldNamespace.equals(res.getNameSpace())){ + if(!newNamespace.equals("")){ + do{ + uri = getUnusedURI(newNamespace,wdf); + if(!urlCheck.contains(uri)){ + urlCheck.add(uri); + urlFound = true; + } + }while(!urlFound); + urlFound = false; + } + else if(dNamespace.equals(vreq.getFullWebappDaoFactory().getDefaultNamespace())){ + try{ + do{ + uri = wdf.getIndividualDao().getUnusedURI(null); + if(!urlCheck.contains(uri)){ + urlCheck.add(uri); + urlFound = true; + } + }while(!urlFound); + urlFound = false; + }catch(InsertException ex){ + log.error("could not create uri"); + } + } + ResourceUtils.renameResource(res, uri); + } + + } + boolean statementDone = false; + if(!newNamespace.equals("")){ + changeNamespace = newNamespace; + } + else if(dNamespace.equals(vreq.getFullWebappDaoFactory().getDefaultNamespace())){ + changeNamespace = dNamespace; + } + if(!oldModel.equals(newModel)){ + StmtIterator stmtItr = tempModel.listStatements(); + while(stmtItr.hasNext()){ + statementDone = false; + Statement stmt = stmtItr.nextStatement(); + Resource sRes = stmt.getSubject(); + Resource oRes = null; + if(sRes.getNameSpace().equals(changeNamespace)){ + saveModel.add(stmt); + statementDone = true; + } + try{ + oRes = (Resource)stmt.getObject(); + if(oRes.getNameSpace().equals(changeNamespace) && !statementDone){ + saveModel.add(stmt); + statementDone = true; + } + } + catch(Exception e){ + continue; + } + } + } + } + + public String getUnusedURI(String newNamespace,WebappDaoFactory wdf){ + String uri = null; + String errMsg = null; + Random random = new Random(); + boolean uriIsGood = false; int attempts = 0; - - while( uriIsGood == false && attempts < 30 ){ - uri = newNamespace + "n" + random.nextInt( Math.min(Integer.MAX_VALUE,(int)Math.pow(2,attempts + 13)) ); - errMsg = wdf.checkURI(uri); - if( errMsg != null) - uri = null; - else - uriIsGood = true; - attempts++; - } - - return uri; - } - + + while( uriIsGood == false && attempts < 30 ){ + uri = newNamespace + "n" + random.nextInt( Math.min(Integer.MAX_VALUE,(int)Math.pow(2,attempts + 13)) ); + errMsg = wdf.checkURI(uri); + if( errMsg != null) + uri = null; + else + uriIsGood = true; + attempts++; + } + + return uri; + } + } diff --git a/webapp/web/jenaIngest/merge_result.jsp b/webapp/web/jenaIngest/merge_result.jsp index fb85d4b0d..bc15bb33f 100644 --- a/webapp/web/jenaIngest/merge_result.jsp +++ b/webapp/web/jenaIngest/merge_result.jsp @@ -1,5 +1,6 @@ <%-- $This file is distributed under the terms of the license in /doc/license.txt$ --%> +<%@page import="edu.cornell.mannlib.vitro.webapp.utils.jena.JenaIngestUtils.MergeResult"%> <%@ taglib uri="http://java.sun.com/jstl/core" prefix="c"%> <%@taglib prefix="vitro" uri="/WEB-INF/tlds/VitroUtils.tld" %> @@ -11,8 +12,13 @@

Ingest Menu > Merge Individuals

-<%String result = (String) request.getAttribute("result");%> +<% + MergeResult resultObj = (MergeResult) request.getAttribute("result"); + String result = resultObj.getResultText(); +%> +

<%=result%>

+ <%if(!result.equals("resource 1 not present") && !result.equals("resource 2 not present")){ if(!result.equals("No statements merged") && !result.endsWith("statements.")){%>

Download non-mergeable statements.