Removing rdfs:label from search index document field ALLTEXT. Moving vivo specific DocumentModifiers out of vitro. NIHVIVO-2975

This commit is contained in:
briancaruso 2011-11-01 16:19:02 +00:00
parent 0c31c08fb0
commit 827ea6d7e9
6 changed files with 358 additions and 0 deletions

View file

@ -56,6 +56,9 @@ edu.cornell.mannlib.vivo.auth.policy.SelfEditorRelationshipPolicy$Setup
edu.cornell.mannlib.vitro.webapp.auth.policy.RestrictHomeMenuItemEditingPolicy$Setup
# Add VIVO specific DocumentModifers, this must come before SolrSetup
edu.cornell.mannlib.vitro.webapp.search.solr.VivoDocumentModifiers
# The Solr index uses a "public" filter, so the PropertyRestrictionPolicyHelper must already be set up.
edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup

View file

@ -0,0 +1,154 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.List;
import com.hp.hpl.jena.rdf.model.Model;
/**
* Class that adds text from context nodes to Solr Documents for
* foaf:Agent individuals.
*/
public class VivoAgentContextNodeFields extends ContextNodeFields{
static List<String> queriesForAgent = new ArrayList<String>();
public VivoAgentContextNodeFields(Model model){
super(model,queriesForAgent);
}
protected static final String prefix =
"prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
//queries for foaf:Agent
static {
/* Position */
queriesForAgent.add(prefix +
"SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:hrJobTitle ?ContextNodeProperty . }");
queriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:involvedOrganizationName ?ContextNodeProperty . }");
queriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }");
queriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Position . " +
" ?c core:titleOrRole ?ContextNodeProperty . }");
/* HR Job Title */
queriesForAgent.add(prefix +
"SELECT " +
"(str(?HRJobTitle) as ?hrJobTitle) " +
"(str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
"(str(?PositionInOrganization) as ?positionInOrganization) " +
"(str(?TitleOrRole) as ?titleOrRole) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Position . "
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
+ " }");
/* Advisor */
queriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }");
queriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }");
/* Author */
queriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedAuthor ?f . " +
" ?f rdfs:label ?ContextNodeProperty . " +
" FILTER( ?f != ?uri ) " +
"}");
queriesForAgent.add(prefix + "SELECT " +
"(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" +
" ?uri rdf:type foaf:Agent ; ?b ?c . " +
" ?c rdf:type core:Relationship . " +
" ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }");
/* Award */
queriesForAgent.add(prefix +
"SELECT " +
"(str(?AwardLabel) as ?awardLabel) " +
"(str(?AwardConferredBy) as ?awardConferredBy) " +
"(str(?Description) as ?description) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:AwardReceipt . "
+ " OPTIONAL { ?c rdfs:label ?AwardLabel . } . "
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy . } . "
+ " OPTIONAL { ?c core:description ?Description . } . "
+ " }");
/* Role In Organization */
queriesForAgent.add(prefix +
"SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?OrganizationLabel . "
+ " }");
/* Academic Degree / Educational Training */
queriesForAgent.add(prefix +
"SELECT " +
"(str(?AcademicDegreeLabel) as ?academicDegreeLabel) " +
"(str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) " +
"(str(?MajorField) as ?majorField) " +
"(str(?DepartmentOrSchool) as ?departmentOrSchool) " +
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
+"}");
}
}

View file

@ -0,0 +1,40 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.List;
import javax.servlet.ServletContext;
import javax.servlet.ServletContextEvent;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.DatasetFactory;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
public class VivoDocumentModifiers implements javax.servlet.ServletContextListener{
@Override
public void contextInitialized(ServletContextEvent sce) {
ServletContext context = sce.getServletContext();
Dataset dataset = DatasetFactory.create(ModelContext.getJenaOntModel(context));
OntModel jenaOntModel = ModelContext.getJenaOntModel(context);
/* put DocumentModifiers into servlet context for use later in startup by SolrSetup */
List<DocumentModifier> modifiers = new ArrayList<DocumentModifier>();
modifiers.add(new CalculateParameters(dataset)); //
modifiers.add(new VivoAgentContextNodeFields(jenaOntModel));
modifiers.add(new VivoInformationResourceContextNodeFields(jenaOntModel));
context.setAttribute("DocumentModifiers", modifiers);
}
@Override
public void contextDestroyed(ServletContextEvent arg0) {
// do nothing.
}
}

View file

@ -0,0 +1,75 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.util.ArrayList;
import java.util.List;
import com.hp.hpl.jena.rdf.model.Model;
/**
* Class that adds text from context nodes to Solr Documents for
* core:InformationResource individuals.
*
* @author bdc34
*
*/
public class VivoInformationResourceContextNodeFields extends ContextNodeFields{
static List<String> queriesForInformationResource = new ArrayList<String>();
public VivoInformationResourceContextNodeFields(Model model){
super(model,queriesForInformationResource);
}
protected static final String prefix =
"prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
//queries for core:InformationResource
static {
/* linked author labels */
queriesForInformationResource
.add(prefix
+ "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ."
+ "?b rdfs:label ?ContextNodeProperty .}");
/* features */
queriesForInformationResource
.add(prefix
+ "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ."
+ "}");
/* editor */
queriesForInformationResource
.add(prefix
+ "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ."
+ "}");
/* subject area */
queriesForInformationResource
.add(prefix
+ "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {"
+ "?uri rdf:type core:InformationResource . "
+ "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ."
+ "}");
}
}

View file

@ -0,0 +1,67 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;
import org.apache.solr.common.SolrInputDocument;
import org.junit.Assert;
import org.junit.Test;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.ontology.OntModelSpec;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import edu.cornell.mannlib.vitro.testing.AbstractTestClass;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactoryJena;
public class VivoInformationResourceContextNodeFieldsTest extends AbstractTestClass {
String TEST_NO_LABLE_N3_FILE = "VivoInformationResourceContextNodeFieldsTest.n3";
String RDFS_LABEL_VALUE = "Test Document X";
String DOCUMENT_URI = "http://example.com/vivo/individual/n7474";
@Test
public void testNoLabel() throws IOException{
//Test that rdfs:label is NOT added by the VivoInformationResourceContextNodeFields
//setup a model & wdf with test RDF file
InputStream stream = VivoInformationResourceContextNodeFieldsTest.class.getResourceAsStream(TEST_NO_LABLE_N3_FILE);
Model model = ModelFactory.createDefaultModel();
model.read(stream, null, "N3");
stream.close();
OntModel ontModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM,model);
ontModel.prepare();
Assert.assertTrue("ontModel had no statements" , ontModel.size() > 0 );
WebappDaoFactory wadf = new WebappDaoFactoryJena(ontModel);
Individual ind = wadf.getIndividualDao().getIndividualByURI(DOCUMENT_URI);
Assert.assertNotNull(ind);
SolrInputDocument doc = new SolrInputDocument();
doc.addField("ALLTEXT", "");
VivoInformationResourceContextNodeFields vircnf = new VivoInformationResourceContextNodeFields(ontModel);
vircnf.modifyDocument(ind, doc, new StringBuffer());
Collection values = doc.getFieldValues("ALLTEXT");
for( Object value : values){
Assert.assertFalse("rdf:label erroneously added by document modifier:", value.toString().contains(RDFS_LABEL_VALUE));
}
VivoAgentContextNodeFields vacnf = new VivoAgentContextNodeFields(ontModel);
vacnf.modifyDocument(ind, doc, new StringBuffer());
}
}

View file

@ -0,0 +1,19 @@
@prefix dc: <http://purl.org/dc/elements/1.1/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix vitro: <http://vitro.mannlib.cornell.edu/ns/vitro/0.7#> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
<http://example.com/vivo/individual/n7474>
a <http://purl.org/ontology/bibo/Document> , owl:Thing , <http://purl.org/ontology/bibo/AudioDocument> , <http://vivoweb.org/ontology/core#InformationResource> ;
rdfs:label "Test Document X"@en-US ;
<http://purl.org/dc/terms/creator>
<http://caruso-laptop.mannlib.cornell.edu:8090/vivo/individual/n4782> ;
<http://purl.org/dc/terms/title>
"Test Document X"@en-US ;
vitro:modTime "2011-11-01T11:11:49"^^xsd:dateTime ;
vitro:mostSpecificType
<http://purl.org/ontology/bibo/AudioDocument> ;
<http://vivoweb.org/ontology/core#informationResourceInAuthorship>
<http://caruso-laptop.mannlib.cornell.edu:8090/vivo/individual/n7484> .