From 8c3e06fc78d3fa1c12223f7b8b4615f474cfcb11 Mon Sep 17 00:00:00 2001 From: Jim Blake Date: Fri, 16 Jan 2015 16:52:52 -0500 Subject: [PATCH] VIVO-870 Move the DocumentModifiers into the configuration. --- .../searchIndexerConfigurationVitro.n3 | 25 +++- .../documentBuilding/FieldBooster.java | 75 ++++++++++++ .../documentBuilding/NameBoost.java | 66 ----------- .../documentBuilding/NameFields.java | 79 ------------- .../exclusions/ExcludeBasedOnType.java | 1 - .../documentBuilding/NameFieldsTest.java | 109 ------------------ .../exclusions/ExcludeBasedOnTypeTest.java | 13 +-- 7 files changed, 100 insertions(+), 268 deletions(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/FieldBooster.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameBoost.java delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFields.java delete mode 100644 webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFieldsTest.java diff --git a/webapp/rdf/display/everytime/searchIndexerConfigurationVitro.n3 b/webapp/rdf/display/everytime/searchIndexerConfigurationVitro.n3 index d8e06b1c4..4990dc99f 100644 --- a/webapp/rdf/display/everytime/searchIndexerConfigurationVitro.n3 +++ b/webapp/rdf/display/everytime/searchIndexerConfigurationVitro.n3 @@ -60,13 +60,26 @@ # ------------------------------------ -:documentModifier_nameFields - a , - . - -:documentModifier_nameBoost - a , +:documentModifier_AllNames + a , ; + rdfs:label "All labels are added to name fields." ; + :hasTargetField "nameRaw" ; + :hasSparqlQuery """ + PREFIX rdfs: + SELECT ?label + WHERE { + ?uri rdfs:label ?label . + } + """ . + +:documentModifier_NameFieldBooster + a , + ; + :hasTargetField "nameRaw" ; + :hasTargetField "nameLowercase" ; + :hasTargetField "nameUnstemmed" ; + :hasTargetField "nameStemmed" ; :hasBoost "1.2"^^xsd:float . :documentModifier_thumbnailImageUrl diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/FieldBooster.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/FieldBooster.java new file mode 100644 index 000000000..5a86e1f65 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/FieldBooster.java @@ -0,0 +1,75 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ + +package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; +import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property; +import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation; + +public class FieldBooster implements DocumentModifier { + private final List fieldNames = new ArrayList<>(); + private volatile Float boost; + + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasTargetField") + public void addTargetField(String fieldName) { + fieldNames.add(fieldName); + } + + @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost") + public void setBoost(float boost) { + this.boost = boost; + } + + @Validation + public void validate() { + if (boost == null) { + throw new IllegalStateException( + "Configuration did not include a boost value."); + } + if (fieldNames.isEmpty()) { + throw new IllegalStateException( + "Configuration did not include a target field."); + } + + Set uniqueFieldNames = new HashSet<>(fieldNames); + List duplicateFieldNames = new ArrayList<>(fieldNames); + for (String fn : uniqueFieldNames) { + duplicateFieldNames.remove(fn); + } + if (!duplicateFieldNames.isEmpty()) { + throw new IllegalStateException( + "Configuration contains duplicate names for target fields: " + + duplicateFieldNames); + } + } + + @Override + public void modifyDocument(Individual individual, SearchInputDocument doc) { + + for (String fieldName : fieldNames) { + SearchInputField field = doc.getField(fieldName); + if (field != null) { + field.setBoost(field.getBoost() + boost); + } + } + } + + @Override + public void shutdown() { + // do nothing. + } + + @Override + public String toString() { + return "FieldBooster[fieldNames=" + fieldNames + ", boost=" + boost + + "]"; + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameBoost.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameBoost.java deleted file mode 100644 index 149b1da52..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameBoost.java +++ /dev/null @@ -1,66 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding; - -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_LOWERCASE; -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW; -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_STEMMED; -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_UNSTEMMED; - -import java.util.Arrays; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; -import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField; -import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property; -import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation; - -public class NameBoost implements DocumentModifier { - - /** - * These are the fields in the search Document that - * are related to the name. If you modify the schema, - * please consider if you need to change this list - * of name fields to boost. - */ - private final String[] fieldsToBoost = {NAME_RAW,NAME_LOWERCASE,NAME_UNSTEMMED,NAME_STEMMED}; - - private volatile Float boost; - - @Property(uri="http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost") - public void setBoost(float boost) { - this.boost = boost; - } - - @Validation - public void validate() { - if (boost == null) { - throw new IllegalStateException( - "Configuration did not include a boost value."); - } - - } - - @Override - public void modifyDocument(Individual individual, SearchInputDocument doc) { - - for( String fieldName : fieldsToBoost){ - SearchInputField field = doc.getField(fieldName); - if( field != null ){ - field.setBoost(field.getBoost() + boost); - } - } - } - - @Override - public void shutdown() { - // do nothing. - } - - @Override - public String toString() { - return "NameBoost[fieldsToBoost=" + Arrays.toString(fieldsToBoost) - + ", boost=" + boost + "]"; - } - -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFields.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFields.java deleted file mode 100644 index a2e7e835a..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFields.java +++ /dev/null @@ -1,79 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding; - -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess; -import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat; -import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; -import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser; - -/** - * Adds all labels to name fields, not just the one returned by Individual.getName(). - */ -public class NameFields implements DocumentModifier, ContextModelsUser { - public static final Log log = LogFactory.getLog(NameFields.class.getName()); - - private volatile RDFService rdfService; - - @Override - public void setContextModels(ContextModelAccess models) { - this.rdfService = models.getRDFService(); - } - - @Override - public void modifyDocument(Individual ind, SearchInputDocument doc) { - if( ind == null || ind.getURI() == null ){ - return; - } - - //also run SPARQL query to get rdfs:label values - String query = - "SELECT ?label WHERE { " + - "<" + ind.getURI() + "> " + - " ?label }"; - - try { - BufferedReader stream = - new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV))); - - StringBuffer buffer = new StringBuffer(); - String line; - - //throw out first line since it is just a header - stream.readLine(); - - while( (line = stream.readLine()) != null ){ - buffer.append(line).append(' '); - } - - log.debug("Adding labels for " + ind.getURI() + " \"" + buffer.toString() + "\""); - doc.addField(NAME_RAW, buffer.toString()); - - } catch (RDFServiceException e) { - log.error("could not get the rdfs:label for " + ind.getURI(), e); - } catch (IOException e) { - log.error("could not get the rdfs:label for " + ind.getURI(), e); - } - - } - - @Override - public String toString() { - return this.getClass().getSimpleName(); - } - - @Override - public void shutdown() { /*nothing to do */ } -} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnType.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnType.java index d5abfdce1..49dd50e54 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnType.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnType.java @@ -33,7 +33,6 @@ public class ExcludeBasedOnType implements SearchIndexExcluder { } List vclasses = new ArrayList<>(); - vclasses.add(ind.getVClass()); addToList(vclasses, ind.getVClasses()); for (VClass vclz : vclasses) { diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFieldsTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFieldsTest.java deleted file mode 100644 index 15922750a..000000000 --- a/webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/documentBuilding/NameFieldsTest.java +++ /dev/null @@ -1,109 +0,0 @@ -/* $This file is distributed under the terms of the license in /doc/license.txt$ */ - -package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding; - -import static edu.cornell.mannlib.vitro.webapp.modelaccess.ModelAccess.WhichService.CONTENT; -import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW; -import static org.junit.Assert.assertEquals; - -import org.junit.Before; -import org.junit.Test; - -import stubs.edu.cornell.mannlib.vitro.webapp.beans.IndividualStub; -import stubs.edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccessStub; - -import com.hp.hpl.jena.rdf.model.Model; -import com.hp.hpl.jena.rdf.model.ModelFactory; -import com.hp.hpl.jena.rdf.model.Statement; - -import edu.cornell.mannlib.vitro.webapp.beans.Individual; -import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument; -import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.jena.model.RDFServiceModel; -import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument; - -/** - * TODO NameFields should add the values as separate objects. - */ -public class NameFieldsTest { - private static final String INDIVIDUAL_URI = "http://mydomain.edu/individual/n3012"; - private static final String LABEL_PROPERTY_URI = "http://www.w3.org/2000/01/rdf-schema#label"; - private Model baseModel; - private NameFields nameFields; - private BaseSearchInputDocument doc; - - @Before - public void setup() { - baseModel = ModelFactory.createDefaultModel(); - - doc = new BaseSearchInputDocument(); - - RDFServiceModel rdfService = new RDFServiceModel(baseModel); - ContextModelAccessStub models = new ContextModelAccessStub(); - models.setRDFService(CONTENT, rdfService); - - nameFields = new NameFields(); - nameFields.setContextModels(models); - } - - @Test - public void nullIndividual() { - SearchInputDocument expected = new BaseSearchInputDocument(doc); - - assertResultingSearchDocument(null, expected); - } - - @Test - public void nullUri() { - SearchInputDocument expected = new BaseSearchInputDocument(doc); - - assertResultingSearchDocument(new IndividualStub(null), expected); - } - - @Test - public void foundNoLabels() { - SearchInputDocument expected = new BaseSearchInputDocument(doc); - expected.addField(NAME_RAW, ""); - - assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI), - expected); - } - - @Test - public void foundOneLabel() { - baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1")); - - SearchInputDocument expected = new BaseSearchInputDocument(doc); - expected.addField(NAME_RAW, "label1 "); - - assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI), - expected); - } - - @Test - public void foundTwoLabels() { - baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label1")); - baseModel.add(stmt(INDIVIDUAL_URI, LABEL_PROPERTY_URI, "label2")); - - SearchInputDocument expected = new BaseSearchInputDocument(doc); - expected.addField(NAME_RAW, "label2 label1 "); - - assertResultingSearchDocument(new IndividualStub(INDIVIDUAL_URI), - expected); - } - - // ---------------------------------------------------------------------- - // Helper methods - // ---------------------------------------------------------------------- - - private Statement stmt(String subjectUri, String propertyUri, String literal) { - return baseModel.createStatement(baseModel.createResource(subjectUri), - baseModel.createProperty(propertyUri), - baseModel.createLiteral(literal)); - } - - private void assertResultingSearchDocument(Individual ind, - SearchInputDocument expected) { - nameFields.modifyDocument(ind, doc); - assertEquals(expected, doc); - } -} diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnTypeTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnTypeTest.java index 9c031e735..2170e3ffd 100644 --- a/webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnTypeTest.java +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/searchindex/exclusions/ExcludeBasedOnTypeTest.java @@ -5,6 +5,7 @@ package edu.cornell.mannlib.vitro.webapp.searchindex.exclusions; import static org.junit.Assert.*; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import org.junit.Test; @@ -22,8 +23,9 @@ public class ExcludeBasedOnTypeTest { IndividualImpl ind = new IndividualImpl(); ind.setURI("http://example.com/n2343"); + VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Person"); - ind.setVClass(personClass); + ind.setVClasses(Collections.singletonList(personClass), false); String excludeResult = ebot.checkForExclusion(ind); assertNotNull( excludeResult ); @@ -37,10 +39,9 @@ public class ExcludeBasedOnTypeTest { IndividualImpl ind = new IndividualImpl(); ind.setURI("http://example.com/n2343"); - VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Agent"); - ind.setVClass(personClass); List vClassList = new ArrayList(); + vClassList.add( new VClass("http://xmlns.com/foaf/0.1/Agent")); vClassList.add( new VClass("http://example.com/Robot")); vClassList.add( new VClass("http://example.com/KillerRobot")); vClassList.add( new VClass("http://example.com/Droid")); @@ -52,14 +53,13 @@ public class ExcludeBasedOnTypeTest { @Test public void testCheckForNonExclusion() { - ExcludeBasedOnType ebot = new ExcludeBasedOnType(); ebot.addTypeToExclude("http://xmlns.com/foaf/0.1/Person"); IndividualImpl ind = new IndividualImpl(); ind.setURI("http://example.com/n2343"); VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Robot"); - ind.setVClass(personClass); + ind.setVClasses(Collections.singletonList(personClass), false); String excludeResult = ebot.checkForExclusion(ind); assertNull( excludeResult ); @@ -72,10 +72,9 @@ public class ExcludeBasedOnTypeTest { IndividualImpl ind = new IndividualImpl(); ind.setURI("http://example.com/n2343"); - VClass personClass = new VClass("http://xmlns.com/foaf/0.1/Agent"); - ind.setVClass(personClass); List vClassList = new ArrayList(); + vClassList.add( new VClass("http://xmlns.com/foaf/0.1/Agent")); vClassList.add( new VClass("http://example.com/Robot")); vClassList.add( new VClass("http://example.com/KillerRobot")); vClassList.add( new VClass("http://example.com/Droid"));