VIVO-662 Remove duplicate rows from the SPARQL query results.
Two rows are considered to be duplicates if they have the same authorshipURI or the same authorURI.
This commit is contained in:
parent
88fbafc98b
commit
0280cda8e3
2 changed files with 152 additions and 2 deletions
|
@ -2,10 +2,14 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.dao.jena;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.lang.ObjectUtils;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
@ -56,6 +60,39 @@ public class QueryUtils {
|
|||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* If any pair of maps in the list has the same (non-null) value for any of
|
||||
* these keys, call the maps duplicates and keep only the first of them.
|
||||
*/
|
||||
public static List<Map<String, String>> removeDuplicatesMapsFromList(
|
||||
List<Map<String, String>> rawList, String... keys) {
|
||||
List<Map<String, String>> filteredList = new ArrayList<>();
|
||||
outerLoop: for (Map<String, String> rawMap : rawList) {
|
||||
for (Map<String, String> filteredMap : filteredList) {
|
||||
for (String key : keys) {
|
||||
String rawValue = rawMap.get(key);
|
||||
if (rawValue != null) {
|
||||
if (rawValue.equals(filteredMap.get(key))) {
|
||||
if (log.isDebugEnabled()) {
|
||||
logDuplicateRows(rawMap, filteredMap, keys);
|
||||
}
|
||||
continue outerLoop;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
filteredList.add(rawMap);
|
||||
}
|
||||
return filteredList;
|
||||
}
|
||||
|
||||
private static void logDuplicateRows(Map<String, String> rawMap,
|
||||
Map<String, String> filteredMap, String... keys) {
|
||||
log.debug("Found duplicate rows, by at least one of these keys: "
|
||||
+ Arrays.toString(keys) + ". Keeping " + filteredMap
|
||||
+ ". Discarding " + rawMap + ".");
|
||||
}
|
||||
|
||||
public static Object nodeToObject( RDFNode node ){
|
||||
if( node == null ){
|
||||
return "";
|
||||
|
|
|
@ -3,6 +3,14 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.dao.jena;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
@ -18,6 +26,11 @@ import edu.cornell.mannlib.vitro.testing.AbstractTestClass;
|
|||
* TODO
|
||||
*/
|
||||
public class QueryUtilsTest extends AbstractTestClass {
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Test bindVariables
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private QuerySolutionMap bindings = new QuerySolutionMap();
|
||||
|
||||
@Test
|
||||
|
@ -54,12 +67,112 @@ public class QueryUtilsTest extends AbstractTestClass {
|
|||
fail("bindAnon not implemented");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper methods
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private void assertBoundQueryEquals(String template, String expected) {
|
||||
String actual = QueryUtils.bindVariables(template, bindings);
|
||||
assertEquals("bounding results", expected, actual);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Test removeDuplicatesMapsFromList
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private List<Map<String, String>> theList = list(
|
||||
map(pair("id", "1"), pair("color", "blue"), pair("size", "large")),
|
||||
map(pair("id", "2"), pair("color", "red"), pair("size", "large"),
|
||||
pair("parity", "odd")));
|
||||
private List<Map<String, String>> filteredList;
|
||||
|
||||
@Test
|
||||
public void noKeys() {
|
||||
assertExpectedIDs(ids("1", "2"), keys());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void emptyList() {
|
||||
theList = new ArrayList<>();
|
||||
assertExpectedIDs(ids(), keys("color"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void unrecognizedKey() {
|
||||
assertExpectedIDs(ids("1", "2"), keys("bogus"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void unmatchedKey() {
|
||||
assertExpectedIDs(ids("1", "2"), keys("parity"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void foundDuplicate() {
|
||||
assertExpectedIDs(ids("1"), keys("size"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void noDuplicates() {
|
||||
assertExpectedIDs(ids("1", "2"), keys("color"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void matchOneKeyOfMany() {
|
||||
assertExpectedIDs(ids("1"), keys("color", "size"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void multipleDuplicatesOfASingleRecord() {
|
||||
theList.add(map(pair("id", "3"), pair("size", "large")));
|
||||
assertExpectedIDs(ids("1"), keys("color", "size"));
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private void assertExpectedIDs(String[] ids, String[] keys) {
|
||||
filteredList = QueryUtils.removeDuplicatesMapsFromList(theList, keys);
|
||||
assertEquals("ids", Arrays.asList(ids), idsInFilteredList());
|
||||
}
|
||||
|
||||
private List<String> idsInFilteredList() {
|
||||
List<String> ids = new ArrayList<>();
|
||||
for (Map<String, String> map : filteredList) {
|
||||
String id = map.get("id");
|
||||
if (id == null) {
|
||||
fail("ID was null");
|
||||
} else {
|
||||
ids.add(id);
|
||||
}
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
|
||||
@SafeVarargs
|
||||
private final List<Map<String, String>> list(Map<String, String>... maps) {
|
||||
return new ArrayList<>(Arrays.asList(maps));
|
||||
}
|
||||
|
||||
private Map<String, String> map(String[]... pairs) {
|
||||
Map<String, String> map = new HashMap<>();
|
||||
for (String[] pair : pairs) {
|
||||
map.put(pair[0], pair[1]);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private String[] pair(String... s) {
|
||||
return s;
|
||||
}
|
||||
|
||||
private String[] keys(String... keys) {
|
||||
return keys;
|
||||
}
|
||||
|
||||
private String[] ids(String... ids) {
|
||||
return ids;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper methods
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue