VIVO-662 Remove duplicate rows from the SPARQL query results.

Two rows are considered to be duplicates if they have the same authorshipURI or the same authorURI.
This commit is contained in:
j2blake 2014-01-09 17:27:14 -05:00
parent 88fbafc98b
commit 0280cda8e3
2 changed files with 152 additions and 2 deletions

View file

@ -2,10 +2,14 @@
package edu.cornell.mannlib.vitro.webapp.dao.jena;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.ObjectUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -56,6 +60,39 @@ public class QueryUtils {
return map;
}
/**
* If any pair of maps in the list has the same (non-null) value for any of
* these keys, call the maps duplicates and keep only the first of them.
*/
public static List<Map<String, String>> removeDuplicatesMapsFromList(
List<Map<String, String>> rawList, String... keys) {
List<Map<String, String>> filteredList = new ArrayList<>();
outerLoop: for (Map<String, String> rawMap : rawList) {
for (Map<String, String> filteredMap : filteredList) {
for (String key : keys) {
String rawValue = rawMap.get(key);
if (rawValue != null) {
if (rawValue.equals(filteredMap.get(key))) {
if (log.isDebugEnabled()) {
logDuplicateRows(rawMap, filteredMap, keys);
}
continue outerLoop;
}
}
}
}
filteredList.add(rawMap);
}
return filteredList;
}
private static void logDuplicateRows(Map<String, String> rawMap,
Map<String, String> filteredMap, String... keys) {
log.debug("Found duplicate rows, by at least one of these keys: "
+ Arrays.toString(keys) + ". Keeping " + filteredMap
+ ". Discarding " + rawMap + ".");
}
public static Object nodeToObject( RDFNode node ){
if( node == null ){
return "";

View file

@ -3,6 +3,14 @@
package edu.cornell.mannlib.vitro.webapp.dao.jena;
import static org.junit.Assert.*;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.junit.Ignore;
import org.junit.Test;
@ -18,6 +26,11 @@ import edu.cornell.mannlib.vitro.testing.AbstractTestClass;
* TODO
*/
public class QueryUtilsTest extends AbstractTestClass {
// ----------------------------------------------------------------------
// Test bindVariables
// ----------------------------------------------------------------------
private QuerySolutionMap bindings = new QuerySolutionMap();
@Test
@ -54,12 +67,112 @@ public class QueryUtilsTest extends AbstractTestClass {
fail("bindAnon not implemented");
}
// ----------------------------------------------------------------------
// Helper methods
// ----------------------------------------------------------------------
private void assertBoundQueryEquals(String template, String expected) {
String actual = QueryUtils.bindVariables(template, bindings);
assertEquals("bounding results", expected, actual);
}
// ----------------------------------------------------------------------
// Test removeDuplicatesMapsFromList
// ----------------------------------------------------------------------
private List<Map<String, String>> theList = list(
map(pair("id", "1"), pair("color", "blue"), pair("size", "large")),
map(pair("id", "2"), pair("color", "red"), pair("size", "large"),
pair("parity", "odd")));
private List<Map<String, String>> filteredList;
@Test
public void noKeys() {
assertExpectedIDs(ids("1", "2"), keys());
}
@Test
public void emptyList() {
theList = new ArrayList<>();
assertExpectedIDs(ids(), keys("color"));
}
@Test
public void unrecognizedKey() {
assertExpectedIDs(ids("1", "2"), keys("bogus"));
}
@Test
public void unmatchedKey() {
assertExpectedIDs(ids("1", "2"), keys("parity"));
}
@Test
public void foundDuplicate() {
assertExpectedIDs(ids("1"), keys("size"));
}
@Test
public void noDuplicates() {
assertExpectedIDs(ids("1", "2"), keys("color"));
}
@Test
public void matchOneKeyOfMany() {
assertExpectedIDs(ids("1"), keys("color", "size"));
}
@Test
public void multipleDuplicatesOfASingleRecord() {
theList.add(map(pair("id", "3"), pair("size", "large")));
assertExpectedIDs(ids("1"), keys("color", "size"));
}
// ----------------------------------------------------------------------
private void assertExpectedIDs(String[] ids, String[] keys) {
filteredList = QueryUtils.removeDuplicatesMapsFromList(theList, keys);
assertEquals("ids", Arrays.asList(ids), idsInFilteredList());
}
private List<String> idsInFilteredList() {
List<String> ids = new ArrayList<>();
for (Map<String, String> map : filteredList) {
String id = map.get("id");
if (id == null) {
fail("ID was null");
} else {
ids.add(id);
}
}
return ids;
}
@SafeVarargs
private final List<Map<String, String>> list(Map<String, String>... maps) {
return new ArrayList<>(Arrays.asList(maps));
}
private Map<String, String> map(String[]... pairs) {
Map<String, String> map = new HashMap<>();
for (String[] pair : pairs) {
map.put(pair[0], pair[1]);
}
return map;
}
private String[] pair(String... s) {
return s;
}
private String[] keys(String... keys) {
return keys;
}
private String[] ids(String... ids) {
return ids;
}
// ----------------------------------------------------------------------
// Helper methods
// ----------------------------------------------------------------------
}