diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java index 725cd1ffe..617e725b5 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFService.java @@ -11,7 +11,6 @@ import java.util.Comparator; import java.util.Iterator; import java.util.List; -import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -42,26 +41,20 @@ public class LanguageFilteringRDFService implements RDFService { this.langs = normalizeLangs(langs); } - private List normalizeLangs(List langs) { - List normalizedLangs = new ArrayList(); - String currentBaseLang = null; - for (String lang : langs) { - String normalizedLang = StringUtils.lowerCase(lang); - String baseLang = normalizedLang.split("-")[0]; - if (currentBaseLang == null) { - currentBaseLang = baseLang; - } else if (!currentBaseLang.equals(baseLang)) { - if (!normalizedLangs.contains(currentBaseLang)) { - normalizedLangs.add(currentBaseLang); - } - currentBaseLang = baseLang; - } - } - if (currentBaseLang != null && !normalizedLangs.contains(currentBaseLang)) { - normalizedLangs.add(currentBaseLang); - } - return normalizedLangs; - } + private List normalizeLangs(List langs) { + log.debug("Preferred languages:" + langs); + + List normalizedLangs = new ArrayList(langs); + for (String lang : langs) { + String baseLang = lang.split("-")[0]; + if (!normalizedLangs.contains(baseLang)) { + normalizedLangs.add(baseLang); + } + } + + log.debug("Normalized languages:" + normalizedLangs); + return normalizedLangs; + } @Override public boolean changeSetUpdate(ChangeSet changeSet) @@ -106,6 +99,7 @@ public class LanguageFilteringRDFService implements RDFService { } private Model filterModel(Model m) { + log.debug("filterModel"); List retractions = new ArrayList(); StmtIterator stmtIt = m.listStatements(); while (stmtIt.hasNext()) { @@ -117,6 +111,7 @@ public class LanguageFilteringRDFService implements RDFService { continue; } Collections.sort(candidatesForRemoval, new StatementSortByLang()); + log.debug("sorted statements: " + showSortedStatements(candidatesForRemoval)); Iterator candIt = candidatesForRemoval.iterator(); String langRegister = null; boolean chuckRemaining = false; @@ -142,9 +137,27 @@ public class LanguageFilteringRDFService implements RDFService { return m; } - @Override + private String showSortedStatements(List candidatesForRemoval) { + List langStrings = new ArrayList(); + for (Statement stmt: candidatesForRemoval) { + if (stmt == null) { + langStrings.add("null stmt"); + } else { + RDFNode node = stmt.getObject(); + if (!node.isLiteral()) { + langStrings.add("not literal"); + } else { + langStrings.add(node.asLiteral().getLanguage()); + } + } + } + return langStrings.toString(); + } + + @Override public InputStream sparqlSelectQuery(String query, ResultFormat resultFormat) throws RDFServiceException { + log.debug("sparqlSelectQuery: " + query.replaceAll("\\s+", " ")); ResultSet resultSet = ResultSetFactory.fromJSON( s.sparqlSelectQuery(query, RDFService.ResultFormat.JSON)); List solnList = getSolutionList(resultSet); @@ -178,6 +191,7 @@ public class LanguageFilteringRDFService implements RDFService { continue; } Collections.sort(candidatesForRemoval, new RowIndexedLiteralSortByLang()); + log.debug("sorted RowIndexedLiterals: " + showSortedRILs(candidatesForRemoval)); Iterator candIt = candidatesForRemoval.iterator(); String langRegister = null; boolean chuckRemaining = false; @@ -223,7 +237,15 @@ public class LanguageFilteringRDFService implements RDFService { return new ByteArrayInputStream(outputStream.toByteArray()); } - private class RowIndexedLiteral { + private String showSortedRILs(List candidatesForRemoval) { + List langstrings = new ArrayList(); + for (RowIndexedLiteral ril: candidatesForRemoval) { + langstrings.add(ril.getLiteral().getLanguage()); + } + return langstrings.toString(); + } + + private class RowIndexedLiteral { private Literal literal; private int index; @@ -324,37 +346,44 @@ public class LanguageFilteringRDFService implements RDFService { } private class LangSort { + // any inexact match is worse than any exact match + private int inexactMatchPenalty = langs.size(); + // no language is worse than any inexact match (unless it is one of the preferred languages) + private int noLanguage = 2 * inexactMatchPenalty; + // no match is worse than no language. + private int noMatch = noLanguage+1; protected int compareLangs(String t1lang, String t2lang) { - t1lang = StringUtils.lowerCase(t1lang); - t2lang = StringUtils.lowerCase(t2lang); - if ( t1lang == null && t2lang == null) { - return 0; - } else if (t1lang == null) { - return 1; - } else if (t2lang == null) { - return -1; - } else { - int t1langPref = langs.indexOf(t1lang); - int t2langPref = langs.indexOf(t2lang); - if (t1langPref == -1 && t2langPref == -1) { - if ("".equals(t1lang) && "".equals(t2lang)) { - return 0; - } else if ("".equals(t1lang) && !("".equals(t2lang))) { - return -1; - } else { - return 1; - } - } else if (t1langPref > -1 && t2langPref == -1) { - return -1; - } else if (t1langPref == -1 && t2langPref > -1) { - return 1; - } else { - return t1langPref - t2langPref; - } - } + return languageIndex(t1lang) - languageIndex(t2lang); } + /** + * Return index of exact match, or index of partial match, a + * language-free, or no match. + */ + private int languageIndex(String lang) { + if (lang == null) { + lang = ""; + } + + int index = langs.indexOf(lang); + if (index >= 0) { + return index; + } + + if (lang.length() > 2) { + index = langs.indexOf(lang.substring(0, 2)); + if (index >= 0) { + return index + inexactMatchPenalty; + } + } + + if (lang.isEmpty()) { + return noLanguage; + } + + return noMatch; + } } private class RowIndexedLiteralSortByLang extends LangSort implements Comparator { diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFServiceTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFServiceTest.java new file mode 100644 index 000000000..5535c22ec --- /dev/null +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/rdfservice/filter/LanguageFilteringRDFServiceTest.java @@ -0,0 +1,314 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.rdfservice.filter; + +import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; + +import java.lang.reflect.Constructor; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.log4j.Level; +import org.junit.Before; +import org.junit.Test; + +import stubs.com.hp.hpl.jena.rdf.model.LiteralStub; + +import com.hp.hpl.jena.rdf.model.Literal; + +import edu.cornell.mannlib.vitro.testing.AbstractTestClass; + +/** + * This is the matching order we expect to see: + *
+ * exact match to preferred, by order.
+ * partial match to preferred, by order.
+ * vanilla or null (no language)
+ * no match
+ * 
+ */ +public class LanguageFilteringRDFServiceTest extends AbstractTestClass { + private static final Log log = LogFactory + .getLog(LanguageFilteringRDFServiceTest.class); + + private static final String COLLATOR_CLASSNAME = "edu.cornell.mannlib.vitro.webapp.rdfservice.filter.LanguageFilteringRDFService$RowIndexedLiteralSortByLang"; + private static final String RIL_CLASSNAME = "edu.cornell.mannlib.vitro.webapp.rdfservice.filter.LanguageFilteringRDFService$RowIndexedLiteral"; + + private LanguageFilteringRDFService filteringRDFService; + private List listOfRowIndexedLiterals; + private int literalIndex; + + private List preferredLanguages; + private List availableLanguages; + private List expectedSortOrders; + + @Before + public void setup() { + setLoggerLevel(this.getClass(), Level.DEBUG); + setLoggerLevel(LanguageFilteringRDFService.class, Level.DEBUG); + } + + // ---------------------------------------------------------------------- + // The tests + // ---------------------------------------------------------------------- + + @Test + public void singleMatch() { + preferredLanguages = list("en-US"); + availableLanguages = list("en-US"); + expectedSortOrders = list("en-US"); + testArbitraryOrder(); + } + + @Test + public void singleNoMatch() { + preferredLanguages = list("en-US"); + availableLanguages = list("es-MX"); + expectedSortOrders = list("es-MX"); + testArbitraryOrder(); + } + + @Test + public void doubleMatch() { + preferredLanguages = list("en-US", "es-MX"); + availableLanguages = list("en-US", "es-MX"); + expectedSortOrders = list("en-US", "es-MX"); + testBothWays(); + } + + @Test + public void noMatches() { + preferredLanguages = list("es-MX"); + availableLanguages = list("en-US", "fr-FR"); + expectedSortOrders = list("en-US", "fr-FR"); + testArbitraryOrder(); + } + + @Test + public void partialMatches() { + preferredLanguages = list("en", "es"); + availableLanguages = list("en-US", "es-MX"); + expectedSortOrders = list("en-US", "es-MX"); + testBothWays(); + } + + @Test + public void matchIsBetterThanNoMatch() { + preferredLanguages = list("en-US", "es-MX"); + availableLanguages = list("en-US", "fr-FR"); + expectedSortOrders = list("en-US", "fr-FR"); + testBothWays(); + } + + @Test + public void matchIsBetterThanPartialMatch() { + preferredLanguages = list("es-ES", "en-US"); + availableLanguages = list("en-US", "es-MX"); + expectedSortOrders = list("en-US", "es-MX"); + testBothWays(); + } + + @Test + public void exactMatchIsBetterThanPartialMatch() { + preferredLanguages = list("es"); + availableLanguages = list("es","es-MX"); + expectedSortOrders = list("es", "es-MX"); + testBothWays(); + } + + @Test + public void matchIsBetterThanVanilla() { + preferredLanguages = list("en-US"); + availableLanguages = list("en-US", ""); + expectedSortOrders = list("en-US", ""); + testBothWays(); + } + + @Test + public void partialMatchIsBetterThanVanilla() { + preferredLanguages = list("es-MX"); + availableLanguages = list("es-ES", ""); + expectedSortOrders = list("es-ES", ""); + testBothWays(); + } + + @Test + public void vanillaIsBetterThanNoMatch() { + preferredLanguages = list("es-MX"); + availableLanguages = list("en-US", ""); + expectedSortOrders = list("", "en-US"); + testBothWays(); + } + + @Test + public void omnibus() { + preferredLanguages = list("es-MX", "es", "en-UK", "es-PE", "fr"); + availableLanguages = list("es-MX", "es", "fr", "es-ES", "fr-FR", "", "de-DE"); + expectedSortOrders = list("es-MX", "es", "fr", "es-ES", "fr-FR", "", "de-DE"); + testBothWays(); + } + + /** + * TODO test plan + * + *
+	 * single match
+	 * single not match
+	 * double match both ways
+	 * double, one match, both ways
+	 * double, no match
+	 * double, match only languages, both ways
+	 * double, one match full, one match language, both ways
+	 * double, one vanilla, one match, both ways
+	 * double, one vanilla, one no match, both ways
+	 * 
+ */ + + // ---------------------------------------------------------------------- + // Helper methods + // ---------------------------------------------------------------------- + + private void testBothWays() { + createLanguageFilter(); + + buildListOfLiterals(); + sortListOfLiterals(); + assertLanguageOrder("sort literals"); + + buildReversedListOfLiterals(); + sortListOfLiterals(); + assertLanguageOrder("sort reversed literals"); + } + + private void testArbitraryOrder() { + createLanguageFilter(); + + buildListOfLiterals(); + sortListOfLiterals(); + assertLanguages("sort literals"); + + buildReversedListOfLiterals(); + sortListOfLiterals(); + assertLanguages("sort reversed literals"); + + } + + private List list(String... strings) { + return new ArrayList(Arrays.asList(strings)); + } + + private void createLanguageFilter() { + filteringRDFService = new LanguageFilteringRDFService(null, + preferredLanguages); + } + + private void buildListOfLiterals() { + List list = new ArrayList(); + for (String language : availableLanguages) { + list.add(buildRowIndexedLiteral(language)); + } + listOfRowIndexedLiterals = list; + } + + private void buildReversedListOfLiterals() { + List list = new ArrayList(); + for (String language : availableLanguages) { + list.add(0, buildRowIndexedLiteral(language)); + } + listOfRowIndexedLiterals = list; + } + + private void sortListOfLiterals() { + log.debug("before sorting: " + + languagesFromLiterals(listOfRowIndexedLiterals)); + Comparator comparator = buildRowIndexedLiteralSortByLang(); + Collections.sort(listOfRowIndexedLiterals, comparator); + } + + private void assertLanguageOrder(String message) { + List expectedLanguages = expectedSortOrders; + log.debug("expected order: " + expectedLanguages); + + List actualLanguages = languagesFromLiterals(listOfRowIndexedLiterals); + log.debug("actual order: " + actualLanguages); + + assertEquals(message, expectedLanguages, actualLanguages); + } + + private void assertLanguages(String message) { + Set expectedLanguages = new HashSet(expectedSortOrders); + log.debug("expected languages: " + expectedLanguages); + + Set actualLanguages = new HashSet( + languagesFromLiterals(listOfRowIndexedLiterals)); + log.debug("actual languages: " + actualLanguages); + + assertEquals(message, expectedLanguages, actualLanguages); + } + + private List languagesFromLiterals(List literals) { + List actualLanguages = new ArrayList(); + for (Object ril : literals) { + actualLanguages.add(getLanguageFromRowIndexedLiteral(ril)); + } + return actualLanguages; + } + + // ---------------------------------------------------------------------- + // Reflection methods to get around "private" declarations. + // ---------------------------------------------------------------------- + + private Object buildRowIndexedLiteral(String language) { + try { + Class clazz = Class.forName(RIL_CLASSNAME); + Class[] argTypes = { LanguageFilteringRDFService.class, + Literal.class, Integer.TYPE }; + Constructor constructor = clazz.getDeclaredConstructor(argTypes); + constructor.setAccessible(true); + + Literal l = new LiteralStub(language); + int i = literalIndex++; + return constructor.newInstance(filteringRDFService, l, i); + } catch (Exception e) { + throw new RuntimeException( + "Could not create a row-indexed literal", e); + } + } + + @SuppressWarnings("unchecked") + private Comparator buildRowIndexedLiteralSortByLang() { + try { + Class clazz = Class.forName(COLLATOR_CLASSNAME); + Class[] argTypes = { LanguageFilteringRDFService.class }; + Constructor constructor = clazz.getDeclaredConstructor(argTypes); + constructor.setAccessible(true); + + return (Comparator) constructor + .newInstance(filteringRDFService); + } catch (Exception e) { + throw new RuntimeException("Could not create a collator", e); + } + } + + private String getLanguageFromRowIndexedLiteral(Object ril) { + try { + Method m = ril.getClass().getDeclaredMethod("getLiteral"); + m.setAccessible(true); + Literal l = (Literal) m.invoke(ril); + return l.getLanguage(); + } catch (Exception e) { + throw new RuntimeException( + "Could not get the Literal from a RowIndexedLiteral", e); + } + } + +} diff --git a/webapp/test/stubs/com/hp/hpl/jena/rdf/model/LiteralStub.java b/webapp/test/stubs/com/hp/hpl/jena/rdf/model/LiteralStub.java new file mode 100644 index 000000000..5bcbd71c3 --- /dev/null +++ b/webapp/test/stubs/com/hp/hpl/jena/rdf/model/LiteralStub.java @@ -0,0 +1,177 @@ +package stubs.com.hp.hpl.jena.rdf.model; + +import com.hp.hpl.jena.datatypes.RDFDatatype; +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.RDFVisitor; +import com.hp.hpl.jena.rdf.model.Resource; + +/** + * Only implemented what I needed so far. The rest is left as an exercise for + * the student. + */ +public class LiteralStub implements Literal { + // ---------------------------------------------------------------------- + // Stub infrastructure + // ---------------------------------------------------------------------- + + final String language; + + public LiteralStub(String language) { + this.language = language; + } + + // ---------------------------------------------------------------------- + // Stub methods + // ---------------------------------------------------------------------- + + @Override + public boolean isLiteral() { + return true; + } + + @Override + public boolean isAnon() { + return false; + } + + @Override + public boolean isResource() { + return false; + } + + @Override + public boolean isURIResource() { + return false; + } + + @Override + public Literal asLiteral() { + return this; + } + + @Override + public Resource asResource() { + throw new ClassCastException(); + } + + @Override + public String getLanguage() { + return language; + } + + // ---------------------------------------------------------------------- + // Un-implemented methods + // ---------------------------------------------------------------------- + + @Override + public T as(Class view) { + throw new RuntimeException("LiteralStub.as() not implemented."); + } + + @Override + public boolean canAs(Class arg0) { + throw new RuntimeException("LiteralStub.canAs() not implemented."); + } + + @Override + public Model getModel() { + throw new RuntimeException("LiteralStub.getModel() not implemented."); + } + + @Override + public Object visitWith(RDFVisitor arg0) { + throw new RuntimeException("LiteralStub.visitWith() not implemented."); + } + + @Override + public Node asNode() { + throw new RuntimeException("LiteralStub.asNode() not implemented."); + } + + @Override + public boolean getBoolean() { + throw new RuntimeException("LiteralStub.getBoolean() not implemented."); + } + + @Override + public byte getByte() { + throw new RuntimeException("LiteralStub.getByte() not implemented."); + } + + @Override + public char getChar() { + throw new RuntimeException("LiteralStub.getChar() not implemented."); + } + + @Override + public RDFDatatype getDatatype() { + throw new RuntimeException("LiteralStub.getDatatype() not implemented."); + } + + @Override + public String getDatatypeURI() { + throw new RuntimeException( + "LiteralStub.getDatatypeURI() not implemented."); + } + + @Override + public double getDouble() { + throw new RuntimeException("LiteralStub.getDouble() not implemented."); + } + + @Override + public float getFloat() { + throw new RuntimeException("LiteralStub.getFloat() not implemented."); + } + + @Override + public int getInt() { + throw new RuntimeException("LiteralStub.getInt() not implemented."); + } + + @Override + public String getLexicalForm() { + throw new RuntimeException( + "LiteralStub.getLexicalForm() not implemented."); + } + + @Override + public long getLong() { + throw new RuntimeException("LiteralStub.getLong() not implemented."); + } + + @Override + public short getShort() { + throw new RuntimeException("LiteralStub.getShort() not implemented."); + } + + @Override + public String getString() { + throw new RuntimeException("LiteralStub.getString() not implemented."); + } + + @Override + public Object getValue() { + throw new RuntimeException("LiteralStub.getValue() not implemented."); + } + + @Override + public Literal inModel(Model arg0) { + throw new RuntimeException("LiteralStub.inModel() not implemented."); + } + + @Override + public boolean isWellFormedXML() { + throw new RuntimeException( + "LiteralStub.isWellFormedXML() not implemented."); + } + + @Override + public boolean sameValueAs(Literal arg0) { + throw new RuntimeException("LiteralStub.sameValueAs() not implemented."); + } + +}