From 827ea6d7e9f9a06270e341c9953f6df9d995c3fb Mon Sep 17 00:00:00 2001 From: briancaruso Date: Tue, 1 Nov 2011 16:19:02 +0000 Subject: [PATCH] Removing rdfs:label from search index document field ALLTEXT. Moving vivo specific DocumentModifiers out of vitro. NIHVIVO-2975 --- .../WEB-INF/resources/startup_listeners.txt | 3 + .../solr/VivoAgentContextNodeFields.java | 154 ++++++++++++++++++ .../search/solr/VivoDocumentModifiers.java | 40 +++++ ...oInformationResourceContextNodeFields.java | 75 +++++++++ ...ormationResourceContextNodeFieldsTest.java | 67 ++++++++ ...nformationResourceContextNodeFieldsTest.n3 | 19 +++ 6 files changed, 358 insertions(+) create mode 100644 src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoAgentContextNodeFields.java create mode 100644 src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java create mode 100644 src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFields.java create mode 100644 test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.java create mode 100644 test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.n3 diff --git a/productMods/WEB-INF/resources/startup_listeners.txt b/productMods/WEB-INF/resources/startup_listeners.txt index e4f75158..1ee632fe 100644 --- a/productMods/WEB-INF/resources/startup_listeners.txt +++ b/productMods/WEB-INF/resources/startup_listeners.txt @@ -56,6 +56,9 @@ edu.cornell.mannlib.vivo.auth.policy.SelfEditorRelationshipPolicy$Setup edu.cornell.mannlib.vitro.webapp.auth.policy.RestrictHomeMenuItemEditingPolicy$Setup +# Add VIVO specific DocumentModifers, this must come before SolrSetup +edu.cornell.mannlib.vitro.webapp.search.solr.VivoDocumentModifiers + # The Solr index uses a "public" filter, so the PropertyRestrictionPolicyHelper must already be set up. edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoAgentContextNodeFields.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoAgentContextNodeFields.java new file mode 100644 index 00000000..1bcfec86 --- /dev/null +++ b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoAgentContextNodeFields.java @@ -0,0 +1,154 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ +package edu.cornell.mannlib.vitro.webapp.search.solr; + +import java.util.ArrayList; +import java.util.List; + +import com.hp.hpl.jena.rdf.model.Model; + +/** + * Class that adds text from context nodes to Solr Documents for + * foaf:Agent individuals. + */ +public class VivoAgentContextNodeFields extends ContextNodeFields{ + + static List queriesForAgent = new ArrayList(); + + public VivoAgentContextNodeFields(Model model){ + super(model,queriesForAgent); + } + + protected static final String prefix = + "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + + //queries for foaf:Agent + static { + + /* Position */ + queriesForAgent.add(prefix + + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Position . " + + " ?c core:hrJobTitle ?ContextNodeProperty . }"); + + queriesForAgent.add(prefix + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Position . " + + " ?c core:involvedOrganizationName ?ContextNodeProperty . }"); + + queriesForAgent.add(prefix + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Position . " + + " ?c core:positionInOrganization ?i . ?i rdfs:label ?ContextNodeProperty . }"); + + queriesForAgent.add(prefix + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Position . " + + " ?c core:titleOrRole ?ContextNodeProperty . }"); + + /* HR Job Title */ + + queriesForAgent.add(prefix + + "SELECT " + + "(str(?HRJobTitle) as ?hrJobTitle) " + + "(str(?InvolvedOrganizationName) as ?involvedOrganizationName) " + + "(str(?PositionInOrganization) as ?positionInOrganization) " + + "(str(?TitleOrRole) as ?titleOrRole) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Position . " + + + " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . " + + " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ." + + " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . " + + " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . " + + " }"); + + /* Advisor */ + + queriesForAgent.add(prefix + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Relationship . " + + " ?c core:advisee ?d . ?d rdfs:label ?ContextNodeProperty . }"); + + queriesForAgent.add(prefix + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Relationship . " + + " ?c core:degreeCandidacy ?e . ?e rdfs:label ?ContextNodeProperty . }"); + + /* Author */ + + queriesForAgent.add(prefix + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Relationship . " + + " ?c core:linkedAuthor ?f . " + + " ?f rdfs:label ?ContextNodeProperty . " + + " FILTER( ?f != ?uri ) " + + "}"); + + queriesForAgent.add(prefix + "SELECT " + + "(str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Relationship . " + + " ?c core:linkedInformationResource ?h . ?h rdfs:label ?ContextNodeProperty . }"); + + /* Award */ + + queriesForAgent.add(prefix + + "SELECT " + + "(str(?AwardLabel) as ?awardLabel) " + + "(str(?AwardConferredBy) as ?awardConferredBy) " + + "(str(?Description) as ?description) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:AwardReceipt . " + + + " OPTIONAL { ?c rdfs:label ?AwardLabel . } . " + + " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy . } . " + + " OPTIONAL { ?c core:description ?Description . } . " + + " }"); + + /* Role In Organization */ + + queriesForAgent.add(prefix + + "SELECT (str(?OrganizationLabel) as ?organizationLabel) WHERE {" + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Role ; core:roleIn ?Organization ." + + " ?Organization rdfs:label ?OrganizationLabel . " + + " }"); + + /* Academic Degree / Educational Training */ + + queriesForAgent.add(prefix + + "SELECT " + + "(str(?AcademicDegreeLabel) as ?academicDegreeLabel) " + + "(str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) " + + "(str(?MajorField) as ?majorField) " + + "(str(?DepartmentOrSchool) as ?departmentOrSchool) " + + "(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {" + + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:EducationalTraining . " + + + "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . " + + "OPTIONAL { ?c core:majorField ?MajorField .} ." + + " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }" + + " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . " + +"}"); + } +} diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java new file mode 100644 index 00000000..36c55ed5 --- /dev/null +++ b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoDocumentModifiers.java @@ -0,0 +1,40 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ +package edu.cornell.mannlib.vitro.webapp.search.solr; + +import java.util.ArrayList; +import java.util.List; + +import javax.servlet.ServletContext; +import javax.servlet.ServletContextEvent; + +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.query.Dataset; +import com.hp.hpl.jena.query.DatasetFactory; + +import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; + +public class VivoDocumentModifiers implements javax.servlet.ServletContextListener{ + + @Override + public void contextInitialized(ServletContextEvent sce) { + + ServletContext context = sce.getServletContext(); + + Dataset dataset = DatasetFactory.create(ModelContext.getJenaOntModel(context)); + OntModel jenaOntModel = ModelContext.getJenaOntModel(context); + + /* put DocumentModifiers into servlet context for use later in startup by SolrSetup */ + + List modifiers = new ArrayList(); + modifiers.add(new CalculateParameters(dataset)); // + modifiers.add(new VivoAgentContextNodeFields(jenaOntModel)); + modifiers.add(new VivoInformationResourceContextNodeFields(jenaOntModel)); + + context.setAttribute("DocumentModifiers", modifiers); + } + + @Override + public void contextDestroyed(ServletContextEvent arg0) { + // do nothing. + } +} diff --git a/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFields.java b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFields.java new file mode 100644 index 00000000..cc0f5f3a --- /dev/null +++ b/src/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFields.java @@ -0,0 +1,75 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ +package edu.cornell.mannlib.vitro.webapp.search.solr; + +import java.util.ArrayList; +import java.util.List; + +import com.hp.hpl.jena.rdf.model.Model; + +/** + * Class that adds text from context nodes to Solr Documents for + * core:InformationResource individuals. + * + * @author bdc34 + * + */ +public class VivoInformationResourceContextNodeFields extends ContextNodeFields{ + + static List queriesForInformationResource = new ArrayList(); + + public VivoInformationResourceContextNodeFields(Model model){ + super(model,queriesForInformationResource); + } + + protected static final String prefix = + "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + + //queries for core:InformationResource + static { + + /* linked author labels */ + + queriesForInformationResource + .add(prefix + + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + " ?uri rdf:type core:InformationResource . " + + "?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ." + + "?b rdfs:label ?ContextNodeProperty .}"); + + /* features */ + + queriesForInformationResource + .add(prefix + + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + "?uri rdf:type core:InformationResource . " + + "?uri core:features ?i . ?i rdfs:label ?ContextNodeProperty ." + + "}"); + + /* editor */ + + queriesForInformationResource + .add(prefix + + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + "?uri rdf:type core:InformationResource . " + + "?uri bibo:editor ?e . ?e rdfs:label ?ContextNodeProperty ." + + "}"); + + /* subject area */ + + queriesForInformationResource + .add(prefix + + "SELECT (str(?ContextNodeProperty) as ?contextNodeProperty) WHERE {" + + "?uri rdf:type core:InformationResource . " + + "?uri core:hasSubjectArea ?f . ?f rdfs:label ?ContextNodeProperty ." + + "}"); + } + +} diff --git a/test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.java b/test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.java new file mode 100644 index 00000000..d855a853 --- /dev/null +++ b/test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.java @@ -0,0 +1,67 @@ +/* $This file is distributed under the terms of the license in /doc/license.txt$ */ +package edu.cornell.mannlib.vitro.webapp.search.solr; + + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collection; + +import org.apache.solr.common.SolrInputDocument; +import org.junit.Assert; +import org.junit.Test; + +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.ontology.OntModelSpec; +import com.hp.hpl.jena.rdf.model.Model; +import com.hp.hpl.jena.rdf.model.ModelFactory; + +import edu.cornell.mannlib.vitro.testing.AbstractTestClass; +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory; +import edu.cornell.mannlib.vitro.webapp.dao.jena.WebappDaoFactoryJena; + +public class VivoInformationResourceContextNodeFieldsTest extends AbstractTestClass { + + String TEST_NO_LABLE_N3_FILE = "VivoInformationResourceContextNodeFieldsTest.n3"; + String RDFS_LABEL_VALUE = "Test Document X"; + String DOCUMENT_URI = "http://example.com/vivo/individual/n7474"; + + + @Test + public void testNoLabel() throws IOException{ + //Test that rdfs:label is NOT added by the VivoInformationResourceContextNodeFields + + //setup a model & wdf with test RDF file + InputStream stream = VivoInformationResourceContextNodeFieldsTest.class.getResourceAsStream(TEST_NO_LABLE_N3_FILE); + Model model = ModelFactory.createDefaultModel(); + model.read(stream, null, "N3"); + stream.close(); + + OntModel ontModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_DL_MEM,model); + ontModel.prepare(); + Assert.assertTrue("ontModel had no statements" , ontModel.size() > 0 ); + + WebappDaoFactory wadf = new WebappDaoFactoryJena(ontModel); + Individual ind = wadf.getIndividualDao().getIndividualByURI(DOCUMENT_URI); + Assert.assertNotNull(ind); + + SolrInputDocument doc = new SolrInputDocument(); + doc.addField("ALLTEXT", ""); + + VivoInformationResourceContextNodeFields vircnf = new VivoInformationResourceContextNodeFields(ontModel); + vircnf.modifyDocument(ind, doc, new StringBuffer()); + + Collection values = doc.getFieldValues("ALLTEXT"); + for( Object value : values){ + Assert.assertFalse("rdf:label erroneously added by document modifier:", value.toString().contains(RDFS_LABEL_VALUE)); + } + + VivoAgentContextNodeFields vacnf = new VivoAgentContextNodeFields(ontModel); + vacnf.modifyDocument(ind, doc, new StringBuffer()); + + + + + } + +} diff --git a/test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.n3 b/test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.n3 new file mode 100644 index 00000000..8567af77 --- /dev/null +++ b/test/edu/cornell/mannlib/vitro/webapp/search/solr/VivoInformationResourceContextNodeFieldsTest.n3 @@ -0,0 +1,19 @@ +@prefix dc: . +@prefix rdfs: . +@prefix vitro: . +@prefix owl: . +@prefix xsd: . +@prefix rdf: . + + + a , owl:Thing , , ; + rdfs:label "Test Document X"@en-US ; + + ; + + "Test Document X"@en-US ; + vitro:modTime "2011-11-01T11:11:49"^^xsd:dateTime ; + vitro:mostSpecificType + ; + + . \ No newline at end of file