VIVO-870 Move the DocumentModifiers into the configuration.

This commit is contained in:
Jim Blake 2015-01-16 16:52:52 -05:00
parent 4fac4b7576
commit 8c3e06fc78
7 changed files with 100 additions and 268 deletions

View file

@ -0,0 +1,75 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
public class FieldBooster implements DocumentModifier {
private final List<String> fieldNames = new ArrayList<>();
private volatile Float boost;
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasTargetField")
public void addTargetField(String fieldName) {
fieldNames.add(fieldName);
}
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost")
public void setBoost(float boost) {
this.boost = boost;
}
@Validation
public void validate() {
if (boost == null) {
throw new IllegalStateException(
"Configuration did not include a boost value.");
}
if (fieldNames.isEmpty()) {
throw new IllegalStateException(
"Configuration did not include a target field.");
}
Set<String> uniqueFieldNames = new HashSet<>(fieldNames);
List<String> duplicateFieldNames = new ArrayList<>(fieldNames);
for (String fn : uniqueFieldNames) {
duplicateFieldNames.remove(fn);
}
if (!duplicateFieldNames.isEmpty()) {
throw new IllegalStateException(
"Configuration contains duplicate names for target fields: "
+ duplicateFieldNames);
}
}
@Override
public void modifyDocument(Individual individual, SearchInputDocument doc) {
for (String fieldName : fieldNames) {
SearchInputField field = doc.getField(fieldName);
if (field != null) {
field.setBoost(field.getBoost() + boost);
}
}
}
@Override
public void shutdown() {
// do nothing.
}
@Override
public String toString() {
return "FieldBooster[fieldNames=" + fieldNames + ", boost=" + boost
+ "]";
}
}

View file

@ -1,66 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_LOWERCASE;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_UNSTEMMED;
import java.util.Arrays;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
public class NameBoost implements DocumentModifier {
/**
* These are the fields in the search Document that
* are related to the name. If you modify the schema,
* please consider if you need to change this list
* of name fields to boost.
*/
private final String[] fieldsToBoost = {NAME_RAW,NAME_LOWERCASE,NAME_UNSTEMMED,NAME_STEMMED};
private volatile Float boost;
@Property(uri="http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBoost")
public void setBoost(float boost) {
this.boost = boost;
}
@Validation
public void validate() {
if (boost == null) {
throw new IllegalStateException(
"Configuration did not include a boost value.");
}
}
@Override
public void modifyDocument(Individual individual, SearchInputDocument doc) {
for( String fieldName : fieldsToBoost){
SearchInputField field = doc.getField(fieldName);
if( field != null ){
field.setBoost(field.getBoost() + boost);
}
}
}
@Override
public void shutdown() {
// do nothing.
}
@Override
public String toString() {
return "NameBoost[fieldsToBoost=" + Arrays.toString(fieldsToBoost)
+ ", boost=" + boost + "]";
}
}

View file

@ -1,79 +0,0 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding;
import static edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames.NAME_RAW;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.modelaccess.ContextModelAccess;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.ContextModelsUser;
/**
* Adds all labels to name fields, not just the one returned by Individual.getName().
*/
public class NameFields implements DocumentModifier, ContextModelsUser {
public static final Log log = LogFactory.getLog(NameFields.class.getName());
private volatile RDFService rdfService;
@Override
public void setContextModels(ContextModelAccess models) {
this.rdfService = models.getRDFService();
}
@Override
public void modifyDocument(Individual ind, SearchInputDocument doc) {
if( ind == null || ind.getURI() == null ){
return;
}
//also run SPARQL query to get rdfs:label values
String query =
"SELECT ?label WHERE { " +
"<" + ind.getURI() + "> " +
"<http://www.w3.org/2000/01/rdf-schema#label> ?label }";
try {
BufferedReader stream =
new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV)));
StringBuffer buffer = new StringBuffer();
String line;
//throw out first line since it is just a header
stream.readLine();
while( (line = stream.readLine()) != null ){
buffer.append(line).append(' ');
}
log.debug("Adding labels for " + ind.getURI() + " \"" + buffer.toString() + "\"");
doc.addField(NAME_RAW, buffer.toString());
} catch (RDFServiceException e) {
log.error("could not get the rdfs:label for " + ind.getURI(), e);
} catch (IOException e) {
log.error("could not get the rdfs:label for " + ind.getURI(), e);
}
}
@Override
public String toString() {
return this.getClass().getSimpleName();
}
@Override
public void shutdown() { /*nothing to do */ }
}

View file

@ -33,7 +33,6 @@ public class ExcludeBasedOnType implements SearchIndexExcluder {
}
List<VClass> vclasses = new ArrayList<>();
vclasses.add(ind.getVClass());
addToList(vclasses, ind.getVClasses());
for (VClass vclz : vclasses) {