VIVO-870 Move the DocumentModifiers into the configuration.
This commit is contained in:
parent
4fac4b7576
commit
8c3e06fc78
7 changed files with 100 additions and 268 deletions
|
@ -60,13 +60,26 @@
|
|||
|
||||
# ------------------------------------
|
||||
|
||||
:documentModifier_nameFields
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.NameFields> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> .
|
||||
|
||||
:documentModifier_nameBoost
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.NameBoost> ,
|
||||
:documentModifier_AllNames
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SimpleSparqlQueryDocumentModifier> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
|
||||
rdfs:label "All labels are added to name fields." ;
|
||||
:hasTargetField "nameRaw" ;
|
||||
:hasSparqlQuery """
|
||||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||
SELECT ?label
|
||||
WHERE {
|
||||
?uri rdfs:label ?label .
|
||||
}
|
||||
""" .
|
||||
|
||||
:documentModifier_NameFieldBooster
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.FieldBooster> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
|
||||
:hasTargetField "nameRaw" ;
|
||||
:hasTargetField "nameLowercase" ;
|
||||
:hasTargetField "nameUnstemmed" ;
|
||||
:hasTargetField "nameStemmed" ;
|
||||
:hasBoost "1.2"^^xsd:float .
|
||||
|
||||
:documentModifier_thumbnailImageUrl
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
|
||||
|
||||
public class FieldBooster implements DocumentModifier {
|
||||
private final List<String> fieldNames = new ArrayList<>();
|
||||
private volatile Float boost;
|
||||
|
||||
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasTargetField")
|
||||
public void addTargetField(String fieldName) {
|
||||
fieldNames.add(fieldName);
|
||||
}
|
||||
|
||||
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost")
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
@Validation
|
||||
public void validate() {
|
||||
if (boost == null) {
|
||||
throw new IllegalStateException(
|
||||
"Configuration did not include a boost value.");
|
||||
}
|
||||
if (fieldNames.isEmpty()) {
|
||||
throw new IllegalStateException(
|
||||
"Configuration did not include a target field.");
|
||||
}
|
||||
|
||||
Set<String> uniqueFieldNames = new HashSet<>(fieldNames);
|
||||
List<String> duplicateFieldNames = new ArrayList<>(fieldNames);
|
||||
for (String fn : uniqueFieldNames) {
|
||||
duplicateFieldNames.remove(fn);
|
||||
}
|
||||
if (!duplicateFieldNames.isEmpty()) {
|
||||
throw new IllegalStateException(
|
||||
"Configuration contains duplicate names for target fields: "
|
||||
+ duplicateFieldNames);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyDocument(Individual individual, SearchInputDocument doc) {
|
||||
|
||||
for (String fieldName : fieldNames) {
|
||||
SearchInputField field = doc.getField(fieldName);
|
||||
if (field != null) {
|
||||
field.setBoost(field.getBoost() + boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
// do nothing.
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "FieldBooster[fieldNames=" + fieldNames + ", boost=" + boost
|
||||
+ "]";
|
||||
}
|
||||
|
||||
}
|
|
@ -1,66 +0,0 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
|
||||
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_LOWERCASE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_STEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_UNSTEMMED;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
|
||||
|
||||
public class NameBoost implements DocumentModifier {
|
||||
|
||||
/**
|
||||
* These are the fields in the search Document that
|
||||
* are related to the name. If you modify the schema,
|
||||
* please consider if you need to change this list
|
||||
* of name fields to boost.
|
||||
*/
|
||||
private final String[] fieldsToBoost = {NAME_RAW,NAME_LOWERCASE,NAME_UNSTEMMED,NAME_STEMMED};
|
||||
|
||||
private volatile Float boost;
|
||||
|
||||
@Property(uri="http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost")
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
@Validation
|
||||
public void validate() {
|
||||
if (boost == null) {
|
||||
throw new IllegalStateException(
|
||||
"Configuration did not include a boost value.");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyDocument(Individual individual, SearchInputDocument doc) {
|
||||
|
||||
for( String fieldName : fieldsToBoost){
|
||||
SearchInputField field = doc.getField(fieldName);
|
||||
if( field != null ){
|
||||
field.setBoost(field.getBoost() + boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() {
|
||||
// do nothing.
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "NameBoost[fieldsToBoost=" + Arrays.toString(fieldsToBoost)
|
||||
+ ", boost=" + boost + "]";
|
||||
}
|
||||
|
||||
}
|
|
@ -1,79 +0,0 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
|
||||
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
|
||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
|
||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
|
||||
|
||||
/**
|
||||
* Adds all labels to name fields, not just the one returned by Individual.getName().
|
||||
*/
|
||||
public class NameFields implements DocumentModifier, ContextModelsUser {
|
||||
public static final Log log = LogFactory.getLog(NameFields.class.getName());
|
||||
|
||||
private volatile RDFService rdfService;
|
||||
|
||||
@Override
|
||||
public void setContextModels(ContextModelAccess models) {
|
||||
this.rdfService = models.getRDFService();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void modifyDocument(Individual ind, SearchInputDocument doc) {
|
||||
if( ind == null || ind.getURI() == null ){
|
||||
return;
|
||||
}
|
||||
|
||||
//also run SPARQL query to get rdfs:label values
|
||||
String query =
|
||||
"SELECT ?label WHERE { " +
|
||||
"<" + ind.getURI() + "> " +
|
||||
"<http://www.w3.org/2000/01/rdf-schema#label> ?label }";
|
||||
|
||||
try {
|
||||
BufferedReader stream =
|
||||
new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV)));
|
||||
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
String line;
|
||||
|
||||
//throw out first line since it is just a header
|
||||
stream.readLine();
|
||||
|
||||
while( (line = stream.readLine()) != null ){
|
||||
buffer.append(line).append(' ');
|
||||
}
|
||||
|
||||
log.debug("Adding labels for " + ind.getURI() + " \"" + buffer.toString() + "\"");
|
||||
doc.addField(NAME_RAW, buffer.toString());
|
||||
|
||||
} catch (RDFServiceException e) {
|
||||
log.error("could not get the rdfs:label for " + ind.getURI(), e);
|
||||
} catch (IOException e) {
|
||||
log.error("could not get the rdfs:label for " + ind.getURI(), e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return this.getClass().getSimpleName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown() { /*nothing to do */ }
|
||||
}
|
|
@ -33,7 +33,6 @@ public class ExcludeBasedOnType implements SearchIndexExcluder {
|
|||
}
|
||||
|
||||
List<VClass> vclasses = new ArrayList<>();
|
||||
vclasses.add(ind.getVClass());
|
||||
addToList(vclasses, ind.getVClasses());
|
||||
|
||||
for (VClass vclz : vclasses) {
|
||||
|
|
|
@ -1,109 +0,0 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
|
||||
|
||||
import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
|
||||
import stubs.edu.cornell.mannlib.vitro.webapp.beans.IndividualStub;
|
||||
import stubs.edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccessStub;
|
||||
|
||||
import com.hp.hpl.jena.rdf.model.Model;
|
||||
import com.hp.hpl.jena.rdf.model.ModelFactory;
|
||||
import com.hp.hpl.jena.rdf.model.Statement;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
|
||||
|
||||
/**
|
||||
* TODO NameFields should add the values as separate objects.
|
||||
*/
|
||||
public class NameFieldsTest {
|
||||
private static final String INDIVIDUAL_URI = "http://mydomain.edu/individual/n3012";
|
||||
private static final String LABEL_PROPERTY_URI = "http://www.w3.org/2000/01/rdf-schema#label";
|
||||
private Model baseModel;
|
||||
private NameFields nameFields;
|
||||
private BaseSearchInputDocument doc;
|
||||
|
||||
@Before
|
||||
public void setup() {
|
||||
baseModel = ModelFactory.createDefaultModel();
|
||||
|
||||
doc = new BaseSearchInputDocument();
|
||||
|
||||
RDFServiceModel rdfService = new RDFServiceModel(baseModel);
|
||||
ContextModelAccessStub models = new ContextModelAccessStub();
|
||||
models.setRDFService(CONTENT, rdfService);
|
||||
|
||||
nameFields = new NameFields();
|
||||
nameFields.setContextModels(models);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void nullIndividual() {
|
||||
SearchInputDocument expected = new BaseSearchInputDocument(doc);
|
||||
|
||||
assertResultingSearchDocument(null, expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void nullUri() {
|
||||
SearchInputDocument expected = new BaseSearchInputDocument(doc);
|
||||
|
||||
assertResultingSearchDocument(new IndividualStub(null), expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void foundNoLabels() {
|
||||
SearchInputDocument expected = new BaseSearchInputDocument(doc);
|
||||
expected.addField(NAME_RAW, "");
|
||||
|
||||
assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI),
|
||||
expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void foundOneLabel() {
|
||||
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1"));
|
||||
|
||||
SearchInputDocument expected = new BaseSearchInputDocument(doc);
|
||||
expected.addField(NAME_RAW, "label1 ");
|
||||
|
||||
assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI),
|
||||
expected);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void foundTwoLabels() {
|
||||
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1"));
|
||||
baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label2"));
|
||||
|
||||
SearchInputDocument expected = new BaseSearchInputDocument(doc);
|
||||
expected.addField(NAME_RAW, "label2 label1 ");
|
||||
|
||||
assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI),
|
||||
expected);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper methods
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private Statement stmt(String subjectUri, String propertyUri, String literal) {
|
||||
return baseModel.createStatement(baseModel.createResource(subjectUri),
|
||||
baseModel.createProperty(propertyUri),
|
||||
baseModel.createLiteral(literal));
|
||||
}
|
||||
|
||||
private void assertResultingSearchDocument(Individual ind,
|
||||
SearchInputDocument expected) {
|
||||
nameFields.modifyDocument(ind, doc);
|
||||
assertEquals(expected, doc);
|
||||
}
|
||||
}
|
|
@ -5,6 +5,7 @@ package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions;
|
|||
import static org.junit.Assert.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.Test;
|
||||
|
@ -22,8 +23,9 @@ public class ExcludeBasedOnTypeTest {
|
|||
|
||||
IndividualImpl ind = new IndividualImpl();
|
||||
ind.setURI("http://example.com/n2343");
|
||||
|
||||
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Person");
|
||||
ind.setVClass(personClass);
|
||||
ind.setVClasses(Collections.singletonList(personClass), false);
|
||||
|
||||
String excludeResult = ebot.checkForExclusion(ind);
|
||||
assertNotNull( excludeResult );
|
||||
|
@ -37,10 +39,9 @@ public class ExcludeBasedOnTypeTest {
|
|||
|
||||
IndividualImpl ind = new IndividualImpl();
|
||||
ind.setURI("http://example.com/n2343");
|
||||
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Agent");
|
||||
ind.setVClass(personClass);
|
||||
|
||||
List<VClass> vClassList = new ArrayList<VClass>();
|
||||
vClassList.add( new VClass("http://xmlns.com/foaf/0.1/Agent"));
|
||||
vClassList.add( new VClass("http://example.com/Robot"));
|
||||
vClassList.add( new VClass("http://example.com/KillerRobot"));
|
||||
vClassList.add( new VClass("http://example.com/Droid"));
|
||||
|
@ -52,14 +53,13 @@ public class ExcludeBasedOnTypeTest {
|
|||
|
||||
@Test
|
||||
public void testCheckForNonExclusion() {
|
||||
|
||||
ExcludeBasedOnType ebot = new ExcludeBasedOnType();
|
||||
ebot.addTypeToExclude("http://xmlns.com/foaf/0.1/Person");
|
||||
|
||||
IndividualImpl ind = new IndividualImpl();
|
||||
ind.setURI("http://example.com/n2343");
|
||||
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Robot");
|
||||
ind.setVClass(personClass);
|
||||
ind.setVClasses(Collections.singletonList(personClass), false);
|
||||
|
||||
String excludeResult = ebot.checkForExclusion(ind);
|
||||
assertNull( excludeResult );
|
||||
|
@ -72,10 +72,9 @@ public class ExcludeBasedOnTypeTest {
|
|||
|
||||
IndividualImpl ind = new IndividualImpl();
|
||||
ind.setURI("http://example.com/n2343");
|
||||
VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Agent");
|
||||
ind.setVClass(personClass);
|
||||
|
||||
List<VClass> vClassList = new ArrayList<VClass>();
|
||||
vClassList.add( new VClass("http://xmlns.com/foaf/0.1/Agent"));
|
||||
vClassList.add( new VClass("http://example.com/Robot"));
|
||||
vClassList.add( new VClass("http://example.com/KillerRobot"));
|
||||
vClassList.add( new VClass("http://example.com/Droid"));
|
||||
|
|
Loading…
Add table
Reference in a new issue