VIVO-870 Move the DocumentModifiers into the configuration.

This commit is contained in:
Jim Blake 2015-01-16 16:52:52 -05:00
parent 4fac4b7576
commit 8c3e06fc78
7 changed files with 100 additions and 268 deletions

View file

@ -60,13 +60,26 @@
# ------------------------------------
:documentModifier_nameFields
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.NameFields> ,
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> .
:documentModifier_nameBoost
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.NameBoost> ,
:documentModifier_AllNames
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SimpleSparqlQueryDocumentModifier> ,
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
rdfs:label "All labels are added to name fields." ;
:hasTargetField "nameRaw" ;
:hasSparqlQuery """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT ?label
WHERE {
?uri rdfs:label ?label .
}
""" .
:documentModifier_NameFieldBooster
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.FieldBooster> ,
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
:hasTargetField "nameRaw" ;
:hasTargetField "nameLowercase" ;
:hasTargetField "nameUnstemmed" ;
:hasTargetField "nameStemmed" ;
:hasBoost "1.2"^^xsd:float .
:documentModifier_thumbnailImageUrl

View file

@ -0,0 +1,75 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
public class FieldBooster implements DocumentModifier {
private final List<String> fieldNames = new ArrayList<>();
private volatile Float boost;
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasTargetField")
public void addTargetField(String fieldName) {
fieldNames.add(fieldName);
}
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost")
public void setBoost(float boost) {
this.boost = boost;
}
@Validation
public void validate() {
if (boost == null) {
throw new IllegalStateException(
"Configuration did not include a boost value.");
}
if (fieldNames.isEmpty()) {
throw new IllegalStateException(
"Configuration did not include a target field.");
}
Set<String> uniqueFieldNames = new HashSet<>(fieldNames);
List<String> duplicateFieldNames = new ArrayList<>(fieldNames);
for (String fn : uniqueFieldNames) {
duplicateFieldNames.remove(fn);
}
if (!duplicateFieldNames.isEmpty()) {
throw new IllegalStateException(
"Configuration contains duplicate names for target fields: "
+ duplicateFieldNames);
}
}
@Override
public void modifyDocument(Individual individual, SearchInputDocument doc) {
for (String fieldName : fieldNames) {
SearchInputField field = doc.getField(fieldName);
if (field != null) {
field.setBoost(field.getBoost() + boost);
}
}
}
@Override
public void shutdown() {
// do nothing.
}
@Override
public String toString() {
return "FieldBooster[fieldNames=" + fieldNames + ", boost=" + boost
+ "]";
}
}

View file

@ -1,66 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_LOWERCASE;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_UNSTEMMED;
import java.util.Arrays;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
public class NameBoost implements DocumentModifier {
/**
* These are the fields in the search Document that
* are related to the name. If you modify the schema,
* please consider if you need to change this list
* of name fields to boost.
*/
private final String[] fieldsToBoost = {NAME_RAW,NAME_LOWERCASE,NAME_UNSTEMMED,NAME_STEMMED};
private volatile Float boost;
@Property(uri="http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost")
public void setBoost(float boost) {
this.boost = boost;
}
@Validation
public void validate() {
if (boost == null) {
throw new IllegalStateException(
"Configuration did not include a boost value.");
}
}
@Override
public void modifyDocument(Individual individual, SearchInputDocument doc) {
for( String fieldName : fieldsToBoost){
SearchInputField field = doc.getField(fieldName);
if( field != null ){
field.setBoost(field.getBoost() + boost);
}
}
}
@Override
public void shutdown() {
// do nothing.
}
@Override
public String toString() {
return "NameBoost[fieldsToBoost=" + Arrays.toString(fieldsToBoost)
+ ", boost=" + boost + "]";
}
}

View file

@ -1,79 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
/**
* Adds all labels to name fields, not just the one returned by Individual.getName().
*/
public class NameFields implements DocumentModifier, ContextModelsUser {
public static final Log log = LogFactory.getLog(NameFields.class.getName());
private volatile RDFService rdfService;
@Override
public void setContextModels(ContextModelAccess models) {
this.rdfService = models.getRDFService();
}
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
if( ind == null || ind.getURI() == null ){
return;
}
//also run SPARQL query to get rdfs:label values
String query =
"SELECT ?label WHERE { " +
"<" + ind.getURI() + "> " +
"<http://www.w3.org/2000/01/rdf-schema#label> ?label }";
try {
BufferedReader stream =
new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV)));
StringBuffer buffer = new StringBuffer();
String line;
//throw out first line since it is just a header
stream.readLine();
while( (line = stream.readLine()) != null ){
buffer.append(line).append(' ');
}
log.debug("Adding labels for " + ind.getURI() + " \"" + buffer.toString() + "\"");
doc.addField(NAME_RAW, buffer.toString());
} catch (RDFServiceException e) {
log.error("could not get the rdfs:label for " + ind.getURI(), e);
} catch (IOException e) {
log.error("could not get the rdfs:label for " + ind.getURI(), e);
}
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
@Override
public void shutdown() { /*nothing to do */ }
}

View file

@ -33,7 +33,6 @@ public class ExcludeBasedOnType implements SearchIndexExcluder {
}
List<VClass> vclasses = new ArrayList<>();
vclasses.add(ind.getVClass());
addToList(vclasses, ind.getVClasses());
for (VClass vclz : vclasses) {

View file

@ -1,109 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
import static org.junit.Assert.assertEquals;
import org.junit.Before;
import org.junit.Test;
import stubs.edu.cornell.mannlib.vitro.webapp.beans.IndividualStub;
import stubs.edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccessStub;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Statement;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
/**
* TODO NameFields should add the values as separate objects.
*/
public class NameFieldsTest {
private static final String INDIVIDUAL_URI = "http://mydomain.edu/individual/n3012";
private static final String LABEL_PROPERTY_URI = "http://www.w3.org/2000/01/rdf-schema#label";
private Model baseModel;
private NameFields nameFields;
private BaseSearchInputDocument doc;
@Before
public void setup() {
baseModel = ModelFactory.createDefaultModel();
doc = new BaseSearchInputDocument();
RDFServiceModel rdfService = new RDFServiceModel(baseModel);
ContextModelAccessStub models = new ContextModelAccessStub();
models.setRDFService(CONTENT, rdfService);
nameFields = new NameFields();
nameFields.setContextModels(models);
}
@Test
public void nullIndividual() {
SearchInputDocument expected = new BaseSearchInputDocument(doc);
assertResultingSearchDocument(null, expected);
}
@Test
public void nullUri() {
SearchInputDocument expected = new BaseSearchInputDocument(doc);
assertResultingSearchDocument(new IndividualStub(null), expected);
}
@Test
public void foundNoLabels() {
SearchInputDocument expected = new BaseSearchInputDocument(doc);
expected.addField(NAME_RAW, "");
assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI),
expected);
}
@Test
public void foundOneLabel() {
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1"));
SearchInputDocument expected = new BaseSearchInputDocument(doc);
expected.addField(NAME_RAW, "label1 ");
assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI),
expected);
}
@Test
public void foundTwoLabels() {
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1"));
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label2"));
SearchInputDocument expected = new BaseSearchInputDocument(doc);
expected.addField(NAME_RAW, "label2 label1 ");
assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI),
expected);
}
// ----------------------------------------------------------------------
// Helper methods
// ----------------------------------------------------------------------
private Statement stmt(String subjectUri, String propertyUri, String literal) {
return baseModel.createStatement(baseModel.createResource(subjectUri),
baseModel.createProperty(propertyUri),
baseModel.createLiteral(literal));
}
private void assertResultingSearchDocument(Individual ind,
SearchInputDocument expected) {
nameFields.modifyDocument(ind, doc);
assertEquals(expected, doc);
}
}

View file

@ -5,6 +5,7 @@ package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions;
import static org.junit.Assert.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.junit.Test;
@ -22,8 +23,9 @@ public class ExcludeBasedOnTypeTest {
IndividualImpl ind = new IndividualImpl();
ind.setURI("http://example.com/n2343");
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Person");
ind.setVClass(personClass);
ind.setVClasses(Collections.singletonList(personClass), false);
String excludeResult = ebot.checkForExclusion(ind);
assertNotNull( excludeResult );
@ -37,10 +39,9 @@ public class ExcludeBasedOnTypeTest {
IndividualImpl ind = new IndividualImpl();
ind.setURI("http://example.com/n2343");
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Agent");
ind.setVClass(personClass);
List<VClass> vClassList = new ArrayList<VClass>();
vClassList.add( new VClass("http://xmlns.com/foaf/0.1/Agent"));
vClassList.add( new VClass("http://example.com/Robot"));
vClassList.add( new VClass("http://example.com/KillerRobot"));
vClassList.add( new VClass("http://example.com/Droid"));
@ -52,14 +53,13 @@ public class ExcludeBasedOnTypeTest {
@Test
public void testCheckForNonExclusion() {
ExcludeBasedOnType ebot = new ExcludeBasedOnType();
ebot.addTypeToExclude("http://xmlns.com/foaf/0.1/Person");
IndividualImpl ind = new IndividualImpl();
ind.setURI("http://example.com/n2343");
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Robot");
ind.setVClass(personClass);
ind.setVClasses(Collections.singletonList(personClass), false);
String excludeResult = ebot.checkForExclusion(ind);
assertNull( excludeResult );
@ -72,10 +72,9 @@ public class ExcludeBasedOnTypeTest {
IndividualImpl ind = new IndividualImpl();
ind.setURI("http://example.com/n2343");
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Agent");
ind.setVClass(personClass);
List<VClass> vClassList = new ArrayList<VClass>();
vClassList.add( new VClass("http://xmlns.com/foaf/0.1/Agent"));
vClassList.add( new VClass("http://example.com/Robot"));
vClassList.add( new VClass("http://example.com/KillerRobot"));
vClassList.add( new VClass("http://example.com/Droid"));