diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/BaseObjectPropertyDataPostProcessor.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/BaseObjectPropertyDataPostProcessor.java index 9b829fb63..100bf6d86 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/BaseObjectPropertyDataPostProcessor.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/BaseObjectPropertyDataPostProcessor.java @@ -35,7 +35,7 @@ public abstract class BaseObjectPropertyDataPostProcessor implements return; } - removeDuplicates(data); + objectPropertyTemplateModel.removeDuplicates(data); for (Map map : data) { process(map); @@ -44,34 +44,6 @@ public abstract class BaseObjectPropertyDataPostProcessor implements protected abstract void process(Map map); - /** The SPARQL query results may contain duplicate rows for a single object, if there are multiple solutions - * to the entire query. Remove duplicates here by arbitrarily selecting only the first row returned. - * Note that in the case of a collated query, the query has filtered out inferred subclasses, but if there - * are multiple asserted subclasses, all will be returned. This method will arbitrarily remove all but the - * first one returned. - * @param List> data - */ - protected void removeDuplicates(List> data) { - String objectVariableName = objectPropertyTemplateModel.getObjectKey(); - if (objectVariableName == null) { - log.error("Cannot remove duplicate statements for property " + objectPropertyTemplateModel.getName() + " because no object found to dedupe."); - return; - } - List foundObjects = new ArrayList(); - log.debug("Removing duplicates from property: " + objectPropertyTemplateModel.getUri()); - Iterator> dataIterator = data.iterator(); - while (dataIterator.hasNext()) { - Map map = dataIterator.next(); - String objectValue = map.get(objectVariableName); - // We arbitrarily remove all but the first. Not sure what selection criteria could be brought to bear on this. - if (foundObjects.contains(objectValue)) { - dataIterator.remove(); - } else { - foundObjects.add(objectValue); - } - } - } - /* Postprocessor methods callable from any postprocessor */ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/CollatedObjectPropertyTemplateModel.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/CollatedObjectPropertyTemplateModel.java index a8905174b..93c1d7b28 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/CollatedObjectPropertyTemplateModel.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/CollatedObjectPropertyTemplateModel.java @@ -3,10 +3,14 @@ package edu.cornell.mannlib.vitro.webapp.web.templatemodels.individual; import java.util.ArrayList; +import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.regex.Matcher; @@ -27,15 +31,18 @@ import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; public class CollatedObjectPropertyTemplateModel extends ObjectPropertyTemplateModel { private static final Log log = LogFactory.getLog(CollatedObjectPropertyTemplateModel.class); + private static final String SUBCLASS_VARIABLE_NAME = "subclass"; + private static final Pattern SELECT_SUBCLASS_PATTERN = // SELECT ?subclass - Pattern.compile("SELECT[^{]*\\?subclass\\b", Pattern.CASE_INSENSITIVE); + Pattern.compile("SELECT[^{]*\\?" + SUBCLASS_VARIABLE_NAME + "\\b", Pattern.CASE_INSENSITIVE); // ORDER BY ?subclass // ORDER BY DESC(?subclass) private static final Pattern ORDER_BY_SUBCLASS_PATTERN = - Pattern.compile("ORDER\\s+BY\\s+(DESC\\s*\\(\\s*)?\\?subclass", Pattern.CASE_INSENSITIVE); + Pattern.compile("ORDER\\s+BY\\s+(DESC\\s*\\(\\s*)?\\?" + SUBCLASS_VARIABLE_NAME, Pattern.CASE_INSENSITIVE); private SortedMap> subclasses; + private WebappDaoFactory wdf; CollatedObjectPropertyTemplateModel(ObjectProperty op, Individual subject, VitroRequest vreq, EditingPolicyHelper policyHelper) @@ -44,7 +51,7 @@ public class CollatedObjectPropertyTemplateModel extends ObjectPropertyTemplateM super(op, subject, vreq, policyHelper); /* Get the data */ - WebappDaoFactory wdf = vreq.getWebappDaoFactory(); + wdf = vreq.getWebappDaoFactory(); ObjectPropertyStatementDao opDao = wdf.getObjectPropertyStatementDao(); String subjectUri = subject.getURI(); String propertyUri = op.getURI(); @@ -59,12 +66,7 @@ public class CollatedObjectPropertyTemplateModel extends ObjectPropertyTemplateM collate(subjectUri, propertyUri, statementData, vreq, policyHelper); /* Sort by subclass name */ - Comparator comparer = new Comparator() { - @Override - public int compare(String o1, String o2) { - return o1.compareTo(o2); - }}; - subclasses = new TreeMap>(comparer); + subclasses = new TreeMap>(); subclasses.putAll(unsortedSubclasses); for (List list : subclasses.values()) { @@ -92,6 +94,144 @@ public class CollatedObjectPropertyTemplateModel extends ObjectPropertyTemplateM return null; } + protected void removeDuplicates(List> data) { + filterSubclasses(data); + } + + /* + * The query returns subclasses of a specific superclass that the object belongs to; for example, + * in the case of authorInAuthorship, subclasses of core:InformationResource. Here we remove all but + * the most specific subclass for the object. This must precede BaseObjectPropertyDataPostProcess.removeDuplicates(), + * since that will arbitrarily remove all but the first result for a given object. + * RY Implementation alternative: roll this filtering into the removeDuplicates() method to reduce the number of times + * we need to iterate through the results; but at the cost of conceptual clarity. + */ + private void filterSubclasses(List> statementData) { + String objectVariableName = getObjectKey(); + if (objectVariableName == null) { + log.error("Cannot remove duplicate statements for property " + getUri() + " because no object found to dedupe."); + return; + } + + if (log.isDebugEnabled()) { + log.debug("Data before subclass filtering"); + logData(statementData); + } + + List> filteredList = new ArrayList>(); + Set processedObjects = new HashSet(); + for (Map outerMap : statementData) { + String objectUri = outerMap.get(objectVariableName); + if (processedObjects.contains(objectUri)) { + continue; + } + processedObjects.add(objectUri); + String subclassUri = outerMap.get(SUBCLASS_VARIABLE_NAME); + if (subclassUri == null) { + continue; + } + List> dataForThisObject = new ArrayList>(); + for (Map innerMap : statementData) { + if ( innerMap.get(objectVariableName) == objectUri ) { + dataForThisObject.add(innerMap); + } + } + // Sort the data for this object from most to least specific subclass, with nulls at end + Collections.sort(dataForThisObject, new SubclassComparator(wdf)); + filteredList.add(dataForThisObject.get(0)); + } + + statementData.clear(); + statementData.addAll(filteredList); + + if (log.isDebugEnabled()) { + log.debug("Data after subclass filtering"); + logData(statementData); + } + +// List> filteredList = new ArrayList>(); +// Set processedObjects = new HashSet(); +// Iterator> iOuter = statementData.iterator(); +// while (iOuter.hasNext()) { +//// for (Map map : statementData) { +// Map outerMap = (Map) iOuter.next(); +// String outerObjectUri = outerMap.get(objectVariableName); +// if (processedObjects.contains(outerObjectUri)) { +// continue; +// } +// processedObjects.add(outerObjectUri); +// String outerSubclass = outerMap.get(SUBCLASS_VARIABLE_NAME); +// if (outerSubclass == null) { +// continue; +// } +// List superclassUris = wdf.getVClassDao().getAllSuperClassURIs(outerSubclass); +//// List> dataForThisObject = new ArrayList>(); +// Iterator> iInner = statementData.iterator(); +// while (iInner.hasNext()) { +// Map innerMap = iInner.next(); +// if (innerMap == outerMap || innerMap.get(objectVariableName) != outerObjectUri) { +// continue; +// } +// String innerSubclass = innerMap.get(SUBCLASS_VARIABLE_NAME); +// if (superclassUris.contains(innerSubclass)) { +// +// } +// +// } +// +// } + + + } + + + // Collections.sort(mergedPropertyList,new PropertyRanker(vreq)); + private class SubclassComparator implements Comparator> { + + private VClassDao vclassDao; + + SubclassComparator(WebappDaoFactory wdf) { + this.vclassDao = wdf.getVClassDao(); + } + + @Override + public int compare(Map map1, Map map2) { + + String subclass1 = map1.get(SUBCLASS_VARIABLE_NAME); + String subclass2 = map2.get(SUBCLASS_VARIABLE_NAME); + + if (subclass1 == null) { + if (subclass2 == null) { + return 0; + } else { + return 1; // nulls rank highest + } + } + + if (subclass2 == null) { + return -1; // nulls rank highest + } + + if (subclass1.equals(subclass2)) { + return 0; + } + + List superclasses = vclassDao.getAllSuperClassURIs(subclass1); + if (superclasses.contains(subclass2)) { + return -1; + } + + superclasses = vclassDao.getAllSuperClassURIs(subclass2); + if (superclasses.contains(subclass1)) { + return 1; + } + + return 0; + + } + + } + private Map> collate(String subjectUri, String propertyUri, List> statementData, VitroRequest vreq, EditingPolicyHelper policyHelper) { @@ -101,7 +241,7 @@ public class CollatedObjectPropertyTemplateModel extends ObjectPropertyTemplateM List currentList = null; String objectKey = getObjectKey(); for (Map map : statementData) { - String subclassUri = map.get("subclass"); + String subclassUri = map.get(SUBCLASS_VARIABLE_NAME); // Rows with no subclass are put into a subclass map with an empty name. if (subclassUri == null) { subclassUri = ""; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/ObjectPropertyTemplateModel.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/ObjectPropertyTemplateModel.java index a90d735c9..f017362f7 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/ObjectPropertyTemplateModel.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/web/templatemodels/individual/ObjectPropertyTemplateModel.java @@ -215,7 +215,7 @@ public abstract class ObjectPropertyTemplateModel extends PropertyTemplateModel } } - private void logData(List> data) { + protected void logData(List> data) { if (log.isDebugEnabled()) { int count = 1; @@ -229,6 +229,32 @@ public abstract class ObjectPropertyTemplateModel extends PropertyTemplateModel } } + + /** The SPARQL query results may contain duplicate rows for a single object, if there are multiple solutions + * to the entire query. Remove duplicates here by arbitrarily selecting only the first row returned. + * @param List> data + */ + protected void removeDuplicates(List> data) { + String objectVariableName = getObjectKey(); + if (objectVariableName == null) { + log.error("Cannot remove duplicate statements for property " + getUri() + " because no object found to dedupe."); + return; + } + List foundObjects = new ArrayList(); + log.debug("Removing duplicates from property: " + getUri()); + Iterator> dataIterator = data.iterator(); + while (dataIterator.hasNext()) { + Map map = dataIterator.next(); + String objectValue = map.get(objectVariableName); + // We arbitrarily remove all but the first. Not sure what selection criteria could be brought to bear on this. + if (foundObjects.contains(objectValue)) { + dataIterator.remove(); + } else { + foundObjects.add(objectValue); + } + } + } + /* Post-processing that must occur after collation, because it does reordering on collated subclass * lists rather than on the entire list. This should ideally be configurable in the config file * like the pre-collation post-processing, but for now due to time constraints it applies to all views. diff --git a/webapp/web/config/listViewConfig-default.xml b/webapp/web/config/listViewConfig-default.xml index 3cc2bb71c..73cb92fd7 100644 --- a/webapp/web/config/listViewConfig-default.xml +++ b/webapp/web/config/listViewConfig-default.xml @@ -24,15 +24,9 @@ SELECT ?subclass ?object ?name ?moniker { GRAPH ?g1 { ?subject ?property ?object OPTIONAL { ?object a ?subclass } - # TODO: the original intent was to allow the - # type triple to be in a different graph, - # but SDB makes an extremely inefficient - # query with this additional graph variable. } OPTIONAL { GRAPH ?g2 { ?object rdfs:label ?name } } - OPTIONAL { GRAPH ?g3 { ?object vitro:moniker ?moniker } } - FILTER ( ?g1 != <http://vitro.mannlib.cornell.edu/default/inferred-tbox> && - ?g1 != <http://vitro.mannlib.cornell.edu/default/vitro-kb-inf> ) + OPTIONAL { GRAPH ?g3 { ?object vitro:moniker ?moniker } } } ORDER BY ?subclass