NIHVIVO-3664 language filter

This commit is contained in:
brianjlowe 2012-06-22 18:48:36 +00:00
parent 9ca30d634a
commit bf9c72daa5
9 changed files with 433 additions and 30 deletions

View file

@ -105,6 +105,21 @@ VitroConnection.DataSource.dbtype = MySQL
VitroConnection.DataSource.driver = com.mysql.jdbc.Driver
VitroConnection.DataSource.validationQuery = SELECT 1
#
# Optional URI of a SPARQL endpoint from which VIVO should display data.
# If set, VIVO will use this endpoint as its triple store instead of the
# SDB database.
#
#VitroConnection.DataSource.endpointURI =
#
# Optional URI to use for modifying the above endpoint via SPARQL UPDATE.
# This setting is only necessary if the endpoint does not support updates via
# its main URI. (This may be done for access control purposes.)
# If the endpointURI above is not set, this setting has no effect.
#
#VitroConnection.DataSource.updateEndpointURI =
#
# The email address of the root user for the VIVO application. The password
# for this user is initially set to "rootPassword", but you will be asked to
@ -134,3 +149,8 @@ selfEditing.idMatchingProperty = http://vitro.mydomain.edu/ns#networkId
# If this is omitted, defaults to http://www.w3.org/2002/07/owl#Thing
proxy.eligibleTypeList = http://www.w3.org/2002/07/owl#Thing
#
# Show only the most appropriate data values based on the Accept-Language
# header supplied by the browser. Default is true if not set.
#
RDFService.languageFilter = true

View file

@ -49,13 +49,15 @@ public class IndividualDaoSDB extends IndividualDaoJena {
private DatasetWrapperFactory dwf;
private SDBDatasetMode datasetMode;
private WebappDaoFactorySDB wadf;
public IndividualDaoSDB(DatasetWrapperFactory dwf,
SDBDatasetMode datasetMode,
WebappDaoFactoryJena wadf) {
WebappDaoFactorySDB wadf) {
super(wadf);
this.dwf = dwf;
this.datasetMode = datasetMode;
this.wadf = wadf;
}
protected DatasetWrapper getDatasetWrapper() {
@ -67,7 +69,7 @@ public class IndividualDaoSDB extends IndividualDaoJena {
return new IndividualSDB(individualURI,
this.dwf,
datasetMode,
getWebappDaoFactory());
wadf);
} catch (IndividualNotFoundException e) {
// If the individual does not exist, return null.
return null;
@ -255,7 +257,7 @@ public class IndividualDaoSDB extends IndividualDaoJena {
private Individual makeIndividual(String uri, String label) throws IndividualNotFoundException {
Individual ent = new IndividualSDB(uri,
this.dwf, datasetMode, getWebappDaoFactory(),
this.dwf, datasetMode, wadf,
SKIP_INITIALIZATION);
ent.setName(label);
return ent;

View file

@ -53,13 +53,16 @@ import edu.cornell.mannlib.vitro.webapp.dao.VClassDao;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactorySDB.SDBDatasetMode;
import edu.cornell.mannlib.vitro.webapp.filestorage.model.ImageInfo;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
public class IndividualSDB extends IndividualImpl implements Individual {
private static final Log log = LogFactory.getLog(
IndividualSDB.class.getName());
private OntResource ind = null;
private WebappDaoFactoryJena webappDaoFactory = null;
private WebappDaoFactorySDB webappDaoFactory = null;
private Float _searchBoostJena = null;
private boolean retreivedNullRdfsLabel = false;
private DatasetWrapperFactory dwf = null;
@ -72,7 +75,7 @@ public class IndividualSDB extends IndividualImpl implements Individual {
public IndividualSDB(String individualURI,
DatasetWrapperFactory datasetWrapperFactory,
SDBDatasetMode datasetMode,
WebappDaoFactoryJena wadf,
WebappDaoFactorySDB wadf,
Model initModel) {
this.individualURI = individualURI;
this.dwf = datasetWrapperFactory;
@ -107,7 +110,7 @@ public class IndividualSDB extends IndividualImpl implements Individual {
public IndividualSDB(String individualURI,
DatasetWrapperFactory datasetWrapperFactory,
SDBDatasetMode datasetMode,
WebappDaoFactoryJena wadf,
WebappDaoFactorySDB wadf,
boolean skipInitialization) throws IndividualNotFoundException {
this.individualURI = individualURI;
this.datasetMode = datasetMode;
@ -181,7 +184,7 @@ public class IndividualSDB extends IndividualImpl implements Individual {
public IndividualSDB(String individualURI,
DatasetWrapperFactory datasetWrapperFactory,
SDBDatasetMode datasetMode,
WebappDaoFactoryJena wadf) throws IndividualNotFoundException {
WebappDaoFactorySDB wadf) throws IndividualNotFoundException {
this(individualURI,
datasetWrapperFactory,
datasetMode,
@ -758,16 +761,15 @@ public class IndividualSDB extends IndividualImpl implements Individual {
? WebappDaoFactorySDB.SDBDatasetMode
.ASSERTIONS_ONLY
: datasetMode))
+ "} \n";
DatasetWrapper w = getDatasetWrapper();
Dataset dataset = w.getDataset();
dataset.getLock().enterCriticalSection(Lock.READ);
+ "} \n";
RDFService service = webappDaoFactory.getRDFService();
try {
tempModel = QueryExecutionFactory.create(
QueryFactory.create(getTypes), dataset).execConstruct();
} finally {
dataset.getLock().leaveCriticalSection();
w.close();
tempModel = RDFServiceUtils.parseModel(
service.sparqlConstructQuery(
getTypes, RDFService.ModelSerializationFormat.N3),
RDFService.ModelSerializationFormat.N3);
} catch (RDFServiceException e) {
throw new RuntimeException(e);
}
}
StmtIterator stmtItr = tempModel.listStatements(

View file

@ -40,15 +40,17 @@ public class ObjectPropertyStatementDaoSDB extends
private DatasetWrapperFactory dwf;
private SDBDatasetMode datasetMode;
private WebappDaoFactorySDB wadf;
public ObjectPropertyStatementDaoSDB(
RDFService rdfService,
DatasetWrapperFactory dwf,
SDBDatasetMode datasetMode,
WebappDaoFactoryJena wadf) {
WebappDaoFactorySDB wadf) {
super (rdfService, dwf, wadf);
this.dwf = dwf;
this.datasetMode = datasetMode;
this.wadf = wadf;
}
@Override
@ -129,7 +131,7 @@ public class ObjectPropertyStatementDaoSDB extends
objPropertyStmt.getObjectURI(),
this.dwf,
datasetMode,
getWebappDaoFactory());
wadf);
objPropertyStmt.setObject(objInd);
}

View file

@ -56,6 +56,7 @@ public class WebappDaoFactorySDB extends WebappDaoFactoryJena {
this.config = base.config;
this.userURI = userURI;
this.dwf = base.dwf;
this.rdfService = base.rdfService;
}
@Override
@ -97,6 +98,10 @@ public class WebappDaoFactorySDB extends WebappDaoFactoryJena {
return new WebappDaoFactorySDB(this, userURI);
}
public RDFService getRDFService() {
return this.rdfService;
}
public enum SDBDatasetMode {
ASSERTIONS_ONLY, INFERENCES_ONLY, ASSERTIONS_AND_INFERENCES
}

View file

@ -19,17 +19,12 @@ import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import org.apache.commons.dbcp.BasicDataSource;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.graph.Graph;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntModelSpec;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.sdb.StoreDesc;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.controller.VitroRequest;
@ -38,15 +33,12 @@ import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactoryConfig;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.OntModelSelector;
import edu.cornell.mannlib.vitro.webapp.dao.jena.RDFServiceDataset;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlDataset;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlDatasetGraph;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SparqlGraphMultilingual;
import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactorySDB;
import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactorySDB.SDBDatasetMode;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.rdfservice.filter.LanguageFilteringRDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.JenaDataSourceSetupBase;
public class WebappDaoFactorySDBPrep implements Filter {
@ -111,6 +103,13 @@ public class WebappDaoFactorySDBPrep implements Filter {
RDFServiceFactory factory = RDFServiceUtils.getRDFServiceFactory(_ctx);
RDFService rdfService = factory.getRDFService();
if (!"false".equals(
ConfigurationProperties.getBean(vreq).getProperty(
"RDFService.languageFilter", "true"))) {
rdfService = new LanguageFilteringRDFService(rdfService, langs);
}
Dataset dataset = new RDFServiceDataset(rdfService);
wadf = new WebappDaoFactorySDB(rdfService, oms, config);
WebappDaoFactory assertions = new WebappDaoFactorySDB(

View file

@ -12,9 +12,9 @@ import com.hp.hpl.jena.sparql.engine.binding.Binding;
public class FilteredResultSet implements ResultSet {
private Iterator<QuerySolution> solutIt;
private ResultSet originalResultSet;
private int rowNum = -1;
protected Iterator<QuerySolution> solutIt;
protected ResultSet originalResultSet;
protected int rowNum = -1;
public FilteredResultSet (List<QuerySolution> solutions, ResultSet originalResultSet) {
this.solutIt = solutions.iterator();

View file

@ -0,0 +1,371 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.rdfservice.filter;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.ResultSetFactory;
import com.hp.hpl.jena.query.ResultSetFormatter;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeListener;
import edu.cornell.mannlib.vitro.webapp.rdfservice.ChangeSet;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
public class LanguageFilteringRDFService implements RDFService {
private static final Log log = LogFactory.getLog(LanguageFilteringRDFService.class);
private RDFService s;
private List<String> langs;
public LanguageFilteringRDFService(RDFService service, List<String> langs) {
this.s = service;
this.langs = langs;
}
@Override
public boolean changeSetUpdate(ChangeSet changeSet)
throws RDFServiceException {
return s.changeSetUpdate(changeSet);
}
@Override
public void newIndividual(String individualURI, String individualTypeURI)
throws RDFServiceException {
s.newIndividual(individualURI, individualTypeURI);
}
@Override
public void newIndividual(String individualURI,
String individualTypeURI, String graphURI)
throws RDFServiceException {
s.newIndividual(individualURI, individualTypeURI, graphURI);
}
@Override
public InputStream sparqlConstructQuery(String query,
ModelSerializationFormat resultFormat)
throws RDFServiceException {
Model m = RDFServiceUtils.parseModel(s.sparqlConstructQuery(query, resultFormat), resultFormat);
InputStream in = outputModel(filterModel(m), resultFormat);
return in;
}
@Override
public InputStream sparqlDescribeQuery(String query,
ModelSerializationFormat resultFormat)
throws RDFServiceException {
Model m = RDFServiceUtils.parseModel(s.sparqlDescribeQuery(query, resultFormat), resultFormat);
return outputModel(filterModel(m), resultFormat);
}
private InputStream outputModel(Model m, ModelSerializationFormat resultFormat) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
m.write(out, RDFServiceUtils.getSerializationFormatString(resultFormat));
return new ByteArrayInputStream(out.toByteArray());
}
private Model filterModel(Model m) {
List<Statement> retractions = new ArrayList<Statement>();
StmtIterator stmtIt = m.listStatements();
while (stmtIt.hasNext()) {
Statement stmt = stmtIt.nextStatement();
if (stmt.getObject().isLiteral()) {
List<Statement> candidatesForRemoval = m.listStatements(
stmt.getSubject(), stmt.getPredicate(), (RDFNode) null).toList();
if (candidatesForRemoval.size() == 1) {
continue;
}
Collections.sort(candidatesForRemoval, new StatementSortByLang());
Iterator<Statement> candIt = candidatesForRemoval.iterator();
String langRegister = "";
boolean chuckRemaining = false;
while(candIt.hasNext()) {
Statement s = candIt.next();
if (!s.getObject().isLiteral()) {
continue;
} else if (chuckRemaining) {
retractions.add(s);
}
String lang = s.getObject().asLiteral().getLanguage();
if (langRegister == "") {
langRegister = lang;
} else if ((langRegister == null && lang != null)
|| (!langRegister.equals(lang)) ) {
chuckRemaining = true;
retractions.add(s);
}
}
}
}
m.remove(retractions);
return m;
}
@Override
public InputStream sparqlSelectQuery(String query,
ResultFormat resultFormat) throws RDFServiceException {
ResultSet resultSet = ResultSetFactory.fromJSON(
s.sparqlSelectQuery(query, RDFService.ResultFormat.JSON));
List<QuerySolution> solnList = getSolutionList(resultSet);
List<String> vars = resultSet.getResultVars();
Iterator<String> varIt = vars.iterator();
while (varIt.hasNext()) {
String var = varIt.next();
for (int i = 0 ; i < solnList.size(); i ++ ) {
QuerySolution s = solnList.get(i);
if (s == null) {
continue;
}
RDFNode node = s.get(var);
if (node == null || !node.isLiteral()) {
continue;
}
List<RowIndexedLiteral> candidatesForRemoval =
new ArrayList<RowIndexedLiteral>();
candidatesForRemoval.add(new RowIndexedLiteral(node.asLiteral(), i));
for (int j = i + 1; j < solnList.size(); j ++) {
QuerySolution t = solnList.get(j);
if (t == null) {
continue;
}
if (matchesExceptForVar(s, t, var, vars)) {
candidatesForRemoval.add(
new RowIndexedLiteral(t.getLiteral(var), j));
}
}
if (candidatesForRemoval.size() == 1) {
continue;
}
Collections.sort(candidatesForRemoval, new RowIndexedLiteralSortByLang());
Iterator<RowIndexedLiteral> candIt = candidatesForRemoval.iterator();
String langRegister = "";
boolean chuckRemaining = false;
while(candIt.hasNext()) {
RowIndexedLiteral rlit = candIt.next();
if (chuckRemaining) {
solnList.set(rlit.getIndex(), null);
} else if (langRegister == "") {
langRegister = rlit.getLiteral().getLanguage();
} else if ((langRegister == null && rlit.getLiteral().getLanguage() != null)
|| (!langRegister.equals(rlit.getLiteral().getLanguage())) ) {
chuckRemaining = true;
solnList.set(rlit.getIndex(), null);
}
}
}
}
List<QuerySolution> compactedList = new ArrayList<QuerySolution>();
Iterator<QuerySolution> solIt = solnList.iterator();
while(solIt.hasNext()) {
QuerySolution soln = solIt.next();
if (soln != null) {
compactedList.add(soln);
}
}
ResultSet filtered = new FilteredResultSet(compactedList, resultSet);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
switch (resultFormat) {
case CSV:
ResultSetFormatter.outputAsCSV(outputStream, filtered);
break;
case TEXT:
ResultSetFormatter.out(outputStream, filtered);
break;
case JSON:
ResultSetFormatter.outputAsJSON(outputStream, filtered);
break;
case XML:
ResultSetFormatter.outputAsXML(outputStream, filtered);
break;
default:
throw new RDFServiceException("unrecognized result format");
}
return new ByteArrayInputStream(outputStream.toByteArray());
}
private class RowIndexedLiteral {
private Literal literal;
private int index;
public RowIndexedLiteral(Literal literal, int index) {
this.literal = literal;
this.index = index;
}
public Literal getLiteral() {
return this.literal;
}
public int getIndex() {
return index;
}
}
private boolean matchesExceptForVar(QuerySolution a, QuerySolution b,
String varName, List<String> varList) {
if (varName == null) {
throw new RuntimeException("expected non-null variable nane");
}
for (String var : varList) {
RDFNode nodea = a.get(var);
RDFNode nodeb = b.get(var);
if (var.equals(varName)) {
if (nodea == null || !nodea.isLiteral() || nodeb == null || !nodeb.isLiteral()) {
return false;
}
} else {
if (nodea == null && nodeb == null) {
continue;
} else if (nodea == null && nodeb != null) {
return false;
} else if (nodeb == null && nodea != null) {
return false;
}
if (!a.get(var).equals(b.get(var))) {
return false;
}
}
}
return true;
}
private List<QuerySolution> getSolutionList(ResultSet resultSet) {
List<QuerySolution> solnList = new ArrayList<QuerySolution>();
while (resultSet.hasNext()) {
QuerySolution soln = resultSet.nextSolution();
solnList.add(soln);
}
return solnList;
}
@Override
public boolean sparqlAskQuery(String query) throws RDFServiceException {
return s.sparqlAskQuery(query);
}
@Override
public List<String> getGraphURIs() throws RDFServiceException {
return s.getGraphURIs();
}
@Override
public void getGraphMetadata() throws RDFServiceException {
s.getGraphMetadata();
}
@Override
public String getDefaultWriteGraphURI() throws RDFServiceException {
return s.getDefaultWriteGraphURI();
}
@Override
public void registerListener(ChangeListener changeListener)
throws RDFServiceException {
// TODO Auto-generated method stub
s.registerListener(changeListener);
}
@Override
public void unregisterListener(ChangeListener changeListener)
throws RDFServiceException {
s.unregisterListener(changeListener);
}
@Override
public ChangeSet manufactureChangeSet() {
return s.manufactureChangeSet();
}
@Override
public void close() {
s.close();
}
private class RowIndexedLiteralSortByLang implements Comparator<RowIndexedLiteral> {
public int compare(RowIndexedLiteral rilit1, RowIndexedLiteral rilit2) {
if (rilit1 == null || rilit2 == null) {
return 0;
}
String t1lang = rilit1.getLiteral().getLanguage();
String t2lang = rilit2.getLiteral().getLanguage();
if ( t1lang == null && t2lang == null) {
return 0;
} else if (t1lang == null) {
return 1;
} else if (t2lang == null) {
return -1;
} else {
int t1langPref = langs.indexOf(t1lang);
if (t1langPref == -1) {
t1langPref = Integer.MAX_VALUE;
}
int t2langPref = langs.indexOf(t2lang);
if (t2langPref == -1) {
t2langPref = Integer.MAX_VALUE;
}
return t1langPref - t2langPref;
}
}
}
private class StatementSortByLang implements Comparator<Statement> {
public int compare(Statement s1, Statement s2) {
if (s1 == null || s2 == null) {
return 0;
} else if (!s1.getObject().isLiteral() || !s2.getObject().isLiteral()) {
return 0;
}
String s1lang = s1.getObject().asLiteral().getLanguage();
String s2lang = s2.getObject().asLiteral().getLanguage();
if (s1lang == null && s2lang == null) {
return 0;
} else if (s1lang == null) {
return 1;
} else if (s2lang == null) {
return -1;
} else {
int t1langPref = langs.indexOf(s1lang);
if (t1langPref == -1) {
t1langPref = Integer.MAX_VALUE;
}
int t2langPref = langs.indexOf(s2lang);
if (t2langPref == -1) {
t2langPref = Integer.MAX_VALUE;
}
return t1langPref - t2langPref;
}
}
}
}

View file

@ -22,6 +22,8 @@ public class RDFServiceUtils {
private static final String RDFSERVICEFACTORY_ATTR =
RDFServiceUtils.class.getName() + ".RDFServiceFactory";
private static final String RDFSERVICEFACTORY_FILTERING_ATTR =
RDFServiceUtils.class.getName() + ".RDFServiceFactory.Filtering";
public static RDFServiceFactory getRDFServiceFactory(ServletContext context) {
Object o = context.getAttribute(RDFSERVICEFACTORY_ATTR);