Committing the following changes to dev-search-relevance branch

1) ContextNodesInclusionFactory NIHVIVO-2204, NIHVIVO-2333(partly)
2) IndividualToSolrDoc to replace Entity2LuceneDoc while constructing SolrInputDocuments NIHVIVO-2653
3) LuceneDocument (test utility that prints contents from Lucene/Solr documents)
This commit is contained in:
deepakkoni 2011-05-18 20:35:42 +00:00
parent 4a209a12ce
commit a3471b7102
12 changed files with 3613 additions and 2246 deletions

View file

@ -24,6 +24,9 @@ public class DisplayVocabulary {
/* Individuals */
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
//bk392 for extracting properties beyond context nodes.
public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes";
/* Page types */
public static final String PAGE_TYPE = NS + "Page";
public static final String HOME_PAGE_TYPE = NS + "HomePage";
@ -35,8 +38,10 @@ public class DisplayVocabulary {
/* Data Properties */
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
public static final String TITLE = NS + "title";
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
//bk392 for extracting properties beyond context nodes.
public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining");
/* URIs for storing menu.n3 */
public static final String MENU_TEXT_RES = NS + "MenuText";
public static final String HAS_TEXT_REPRESENTATION = NS + "hasMenuText";

View file

@ -0,0 +1,57 @@
package edu.cornell.mannlib.vitro.webapp.search;
public class VitroTermNames {
/** Id of entity, vclass or tab */
public static String URI = "URI";
/** lucene document id */
public static String DOCID = "DocId";
/** java class of the object that the Doc represents. */
public static String JCLASS = "JCLASS";
/** rdf:type */
public static String RDFTYPE = "type";
/** rdf:type */
public static String CLASSGROUP_URI = "classgroup";
/** Modtime from db */
public static String MODTIME = "modTime";
/** time of index in msec since epoc */
public static String INDEXEDTIME= "indexedTime";
/** timekey of entity in yyyymmddhhmm */
public static String TIMEKEY="TIMEKEY";
/** time of sunset/end of entity in yyyymmddhhmm */
public static String SUNSET="SUNSET";
/** time of sunrise/start of entity in yyyymmddhhmm */
public static String SUNRISE="SUNRISE";
/** entity's moniker */
public static String MONIKER="moniker";
/** text for 'full text' search, this is stemmed */
public static String ALLTEXT = "ALLTEXT";
/** text for 'full text' search, this is unstemmed for
* use with wildcards and prefix queries */
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
/** class name for storing context nodes **/
public static final String CONTEXTNODE = "contextNode";
/** keywords */
public static final String KEYWORDS = "KEYWORDS";
/** Does the individual have a thumbnail image? 1=yes 0=no */
public static final String THUMBNAIL = "THUMBNAIL";
/** Should individual be included in full text search results? 1=yes 0=no */
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
/** class names in human readable form of an individual*/
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
/** class names in human readable form of an individual*/
public static final String CLASSLOCALNAME = "classLocalName";
// Fields derived from rdfs:label
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
public static String NAME_RAW = "nameRaw"; // was NAMERAW
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
public static String NAME_STEMMED = "nameStemmed"; // was NAME
}

View file

@ -0,0 +1,637 @@
package edu.cornell.mannlib.vitro.webapp.search.beans;
import javax.servlet.ServletContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.hp.hpl.jena.ontology.OntModel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.QuerySolutionMap;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.Literal;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.rdf.model.ResourceFactory;
import com.hp.hpl.jena.shared.Lock;
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
public class ContextNodesInclusionFactory {
private OntModel fullModel;
private String contextNodeURI;
private String query = "";
private static final String queryForEducationalTraining = "SELECT ?query WHERE {" +
"?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }";
private static Log log = LogFactory.getLog(ContextNodesInclusionFactory.class);
public ContextNodesInclusionFactory(String contextNodeURI,
OntModel displayOntModel, ServletContext context) {
this.fullModel = ModelContext.getJenaOntModel(context);
this.contextNodeURI = contextNodeURI;
query = getQueryFromModel(contextNodeURI, displayOntModel);
}
private String getQueryFromModel(String uri, OntModel displayOntModel) {
String resultQuery = "";
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource searchConfig = ResourceFactory.createResource(uri);
initialBinding.add("searchConfig", searchConfig);
Query query = QueryFactory.create(queryForEducationalTraining);
displayOntModel.enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(query, displayOntModel, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
Literal node = soln.getLiteral("query");
if(node.isLiteral()){
resultQuery = node.toString();
}else{
log.warn("unexpected literal in the object position for context node queries " + node.toString());
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
displayOntModel.leaveCriticalSection();
}
return resultQuery.substring(0, resultQuery.length() - 3);
}
// public List<Field> getFieldValues(String uri, Model modelToQuery, List<String> queries){
//what do the queries need to be like?
// SELECT ?field ?value WHERE ....
// what to do with multiple values for a field?
// }
//in different object:
/*
* get queries from somewhere
* get model to run queries on
* get list of individuals
* for each individual:
* fields = getFieldValues(uri, model, queiries)
* index(fields)?
*
*
*/
public String getPropertiesAssociatedWithPosition(String uri){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT " +
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
" (str(?TitleOrRole) as ?titleOrRole) (str(?PositionLabel) as ?positionLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Position . "
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
+ " OPTIONAL { ?c rdfs:label ?PositionLabel . } "
+ " } ORDER BY ?PositionLabel ";
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
fullModel.enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
RDFNode hrJobTitle = soln.get("hrJobTitle");
if(hrJobTitle != null){
propertyValues.append(" " + hrJobTitle.toString());
}else{
log.warn("hrJobTitle is null ");
}
RDFNode involvedOrganizationName = soln.get("involvedOrganizationName");
if(involvedOrganizationName != null){
propertyValues.append(" " + involvedOrganizationName.toString());
}else{
log.warn("involvedOrganizationName is null ");
}
RDFNode positionForPerson = soln.get("positionForPerson");
if(positionForPerson != null){
propertyValues.append(" " + positionForPerson.toString());
}else{
log.warn("positionForPerson is null ");
}
RDFNode positionInOrganization = soln.get("positionInOrganization");
if(positionInOrganization != null){
propertyValues.append(" " + positionInOrganization.toString());
}else{
log.warn("positionInOrganization is null ");
}
RDFNode titleOrRole = soln.get("titleOrRole");
if(titleOrRole != null){
propertyValues.append(" " + titleOrRole.toString());
}else{
log.warn("titleOrRole is null ");
}
RDFNode positionLabel = soln.get("positionLabel");
if(positionLabel != null){
propertyValues.append(" " + positionLabel.toString());
}else{
log.warn("positionLabel is null ");
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
fullModel.leaveCriticalSection();
}
return propertyValues.toString();
}
public String getPropertiesAssociatedWithRelationship(String uri){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Relationship . "
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
+ " } ";
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
fullModel.enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
RDFNode advisee = soln.get("advisee");
if(advisee != null){
propertyValues.append(" " + advisee.toString());
}else{
log.warn("advisee is null ");
}
RDFNode degreeCandidacy = soln.get("degreeCandidacy");
if(degreeCandidacy != null){
propertyValues.append(" " + degreeCandidacy.toString());
}else{
log.warn("degreeCandidacy is null ");
}
RDFNode linkedAuthor = soln.get("linkedAuthor");
if(linkedAuthor != null){
propertyValues.append(" " + linkedAuthor.toString());
}else{
log.warn("linkedAuthor is null ");
}
RDFNode linkedInformationResource = soln.get("linkedInformationResource");
if(linkedInformationResource != null){
propertyValues.append(" " + linkedInformationResource.toString());
}else{
log.warn("linkedInformationResource is null ");
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
fullModel.leaveCriticalSection();
}
return propertyValues.toString();
}
public String getPropertiesAssociatedWithAwardReceipt(String uri){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
" (str(?Description) as ?description) (str(?AwardReceiptLabel) as ?awardReceiptLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:AwardReceipt . "
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
+ " OPTIONAL { ?c core:description ?Description . } . "
+ " OPTIONAL { ?c rdfs:label ?AwardReceiptLabel . } . "
+ " } ORDER BY ?AwardReceiptLabel";
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
fullModel.enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
RDFNode awardConferredBy = soln.get("awardConferredBy");
if(awardConferredBy != null){
propertyValues.append(" " + awardConferredBy.toString());
}else{
log.warn("awardConferredBy is null ");
}
RDFNode awardOrHonorFor = soln.get("awardOrHonorFor");
if(awardOrHonorFor != null){
propertyValues.append(" " + awardOrHonorFor.toString());
}else{
log.warn("awardOrHonorFor is null ");
}
RDFNode description = soln.get("description");
if(description != null){
propertyValues.append(" " + description.toString());
}else{
log.warn("description is null ");
}
RDFNode awardReceiptLabel = soln.get("awardReceiptLabel");
if(awardReceiptLabel != null){
propertyValues.append(" " + awardReceiptLabel.toString());
}else{
log.warn("awardReceiptLabel is null ");
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
fullModel.leaveCriticalSection();
}
return propertyValues.toString();
}
public String getPropertiesAssociatedWithRole(String uri){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT DISTINCT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
+ " ?Organization rdfs:label ?OrganizationLabel . "
+ " } ORDER BY ?OrganizationLabel ";
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
fullModel.enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
RDFNode organizationLabel = soln.get("organizationLabel");
if(organizationLabel != null){
propertyValues.append(" " + organizationLabel.toString());
}else{
log.warn("organizationLabel is null ");
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
fullModel.leaveCriticalSection();
}
return propertyValues.toString();
}
public String getPropertiesAssociatedWithEducationalTraining(String uri){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
+ " ?c rdf:type core:EducationalTraining . "
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
+"}";
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
fullModel.enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
RDFNode academicDegreeLabel = soln.get("academicDegreeLabel");
if(academicDegreeLabel != null){
propertyValues.append(" " + academicDegreeLabel.toString());
}else{
log.warn("academicDegreeLabel is null ");
}
RDFNode academicDegreeAbbreviation = soln.get("academicDegreeAbbreviation");
if(academicDegreeAbbreviation != null){
propertyValues.append(" " + academicDegreeAbbreviation.toString());
}else{
log.warn("academicDegreeAbbreviation is null ");
}
RDFNode majorField = soln.get("majorField");
if(majorField != null){
propertyValues.append(" " + majorField.toString());
}else{
log.warn("majorField is null ");
}
RDFNode trainingAtDepartmentOrSchool = soln.get("departmentOrSchool");
if(trainingAtDepartmentOrSchool != null){
propertyValues.append(" " + trainingAtDepartmentOrSchool.toString());
}else{
log.warn("trainingAtDepartmentOrSchool is null ");
}
RDFNode trainingAtOrganizationLabel = soln.get("trainingAtOrganizationLabel");
if(trainingAtOrganizationLabel != null){
propertyValues.append(" " + trainingAtOrganizationLabel.toString());
}else{
log.warn("trainingAtOrganizationLabel is null ");
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
fullModel.leaveCriticalSection();
}
return propertyValues.toString();
}
public String getPropertiesAssociatedWithInformationResource(String uri){
StringBuffer propertyValues = new StringBuffer();
QuerySolutionMap initialBinding = new QuerySolutionMap();
Resource uriResource = ResourceFactory.createResource(uri);
initialBinding.add("uri", uriResource);
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
+ " prefix core: <http://vivoweb.org/ontology/core#> "
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
String thisQuery = prefix +
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
"(str(?Features) as ?features) WHERE {"
+ " ?uri rdf:type core:InformationResource . "
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . "
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } "
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
+"}";
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
fullModel.enterCriticalSection(Lock.READ);
try{
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
try{
ResultSet results = qExec.execSelect();
while(results.hasNext()){
QuerySolution soln = results.nextSolution();
RDFNode linkedAuthor = soln.get("linkedAuthor");
if(linkedAuthor != null){
propertyValues.append(" " + linkedAuthor.toString());
}else{
log.warn("linkedAuthor is null ");
}
RDFNode linkedInformationResource = soln.get("linkedInformationResource");
if(linkedInformationResource != null){
propertyValues.append(" " + linkedInformationResource.toString());
}else{
log.warn("linkedInformationResource is null ");
}
RDFNode editor = soln.get("editor");
if(editor != null){
propertyValues.append(" " + editor.toString());
}else{
log.warn("editor is null ");
}
RDFNode subjectArea = soln.get("subjectArea");
if(subjectArea != null){
propertyValues.append(" " + subjectArea.toString());
}else{
log.warn("subjectArea is null ");
}
RDFNode researchAreaOf = soln.get("researchAreaOf");
if(researchAreaOf != null){
propertyValues.append(" " + researchAreaOf.toString());
}else{
log.warn("researchAreaOf is null ");
}
RDFNode features = soln.get("features");
if(features != null){
propertyValues.append(" " + features.toString());
}else{
log.warn("features is null ");
}
}
}catch(Throwable t){
log.error(t,t);
} finally{
qExec.close();
}
}finally{
fullModel.leaveCriticalSection();
}
return propertyValues.toString();
}
}

View file

@ -2,6 +2,8 @@
package edu.cornell.mannlib.vitro.webapp.search.lucene;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@ -21,9 +23,11 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.lucene.test.LuceneDocument;
/**
* This class expect that Entities passed to it will have
@ -31,6 +35,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
* be as full as possible.
*/
public class Entity2LuceneDoc implements Obj2DocIface{
/** These are the terms for the lucene index */
public static class VitroLuceneTermNames{
/** Id of entity, vclass or tab */
@ -61,6 +66,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
/** text for 'full text' search, this is unstemmed for
* use with wildcards and prefix queries */
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
/** class name for storing context nodes **/
public static final String CONTEXTNODE = "contextNode";
/** keywords */
public static final String KEYWORDS = "KEYWORDS";
/** Does the individual have a thumbnail image? 1=yes 0=no */
@ -103,12 +110,19 @@ public class Entity2LuceneDoc implements Obj2DocIface{
private ProhibitedFromSearch classesProhibitedFromSearch;
private IndividualProhibitedFromSearch individualProhibited;
private ContextNodesInclusionFactory contextNodesInclusionFactory;
private static HashMap<String, String> IndividualURIToObjectProperties = new HashMap<String, String>();
private static HashSet<String> objectProperties = new HashSet<String>();
public Entity2LuceneDoc(
ProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibited){
IndividualProhibitedFromSearch individualProhibited, ContextNodesInclusionFactory contextNodesInclusionFactory){
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
this.individualProhibited = individualProhibited;
this.contextNodesInclusionFactory = contextNodesInclusionFactory;
}
public boolean canTranslate(Object obj) {
@ -123,7 +137,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
String value;
Document doc = new Document();
String classPublicNames = "";
LuceneDocument document = new LuceneDocument();
//DocId
String id = ent.getURI();
@ -166,12 +180,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{
Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
doc.add( typeField);
document.setRDFTYPE(clz.getURI());
if(clz.getLocalName() != null){
Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED);
Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED);
doc.add(classLocalName);
doc.add(classLocalNameLowerCase);
document.setCLASSLOCALNAME(clz.getLocalName());
}
if( clz.getName() != null )
@ -183,22 +199,29 @@ public class Entity2LuceneDoc implements Obj2DocIface{
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
// classGroupField.setBoost(FIELD_BOOST);
doc.add(classGroupField);
document.setCLASSGROUP_URI(clz.getGroupURI());
}
}
}
doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0",
Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS) );
document.setPROHIBITED_FROM_TEXT_RESULTS(prohibited?"1":"0");
/* lucene DOCID */
doc.add( new Field(term.DOCID, entClassName + id,
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
document.setDOCID(entClassName + id);
//vitro Id
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
document.setURI(id);
//java class
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
document.setJCLASS(entClassName);
// Individual label
if( ent.getRdfsLabel() != null )
value=ent.getRdfsLabel();
@ -208,10 +231,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
value = ent.getLocalName();
}
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
nameRaw.setBoost(NAME_BOOST);
doc.add(nameRaw);
document.setNAME(value);
// RY Not sure if we need to store this. For Solr, see schema.xml field definition.
Field nameLowerCase = new Field(term.NAME_LOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED);
@ -224,14 +247,30 @@ public class Entity2LuceneDoc implements Obj2DocIface{
Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
nameStemmed.setBoost(NAME_BOOST);
doc.add(nameStemmed);
doc.add(nameStemmed);
String contextNodePropertyValues;
// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
contextNodePropertyValues = contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI());
// }
Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED );
doc.add(contextNodeInformation);
document.setCONTEXTNODE(contextNodePropertyValues);
//Moniker
if(ent.getMoniker() != null){
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
doc.add(moniker);
document.setMONIKER(ent.getMoniker());
}
//boost for entity
@ -274,31 +313,45 @@ public class Entity2LuceneDoc implements Obj2DocIface{
Object anon[] = { new Long((new DateTime() ).getMillis()) };
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
document.setINDEXEDTIME(String.format("%019d", anon));
if( ! prohibited ){
//ALLTEXT, all of the 'full text'
StringBuffer alltext = new StringBuffer();
String t=null;
value ="";
value+= " "+( ((t=ent.getName()) == null)?"":t );
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
value+= " "+ getKeyterms(ent);
value ="";
value+= " "+( ((t=ent.getName()) == null)?"":t );
alltext.append("\t NAME: " + ( ((t=ent.getName()) == null)?"":t ));
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
alltext.append("\t ANCHOR: " + ( ((t=ent.getAnchor()) == null)?"":t));
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
alltext.append("\t MONIKER: " + ( ((t=ent.getMoniker()) == null)?"":t ));
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
alltext.append("\t DESCRIPTION: " + ( ((t=ent.getDescription()) == null)?"":t ));
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
alltext.append("\t BLURB: " + ( ((t=ent.getBlurb()) == null)?"":t ));
value+= " "+ getKeyterms(ent);
alltext.append("\t KEYTERMS: " + getKeyterms(ent));
value+= " " + classPublicNames;
value+= " " + classPublicNames;
alltext.append(" CLASSPUBLICNAMES: " + classPublicNames);
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
if (dataPropertyStatements != null) {
alltext.append("\n DATA_PROPERTY_STATEMENTS \n -------------------------------- \n");
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
while (dataPropertyStmtIter.hasNext()) {
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
alltext.append("\n " + ( ((t=dataPropertyStmt.getData()) == null)?"":t ));
}
}
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
alltext.append("\n OBJECT_PROPERTY_STATEMENTS \n -------------------------------- \n");
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
@ -306,17 +359,31 @@ public class Entity2LuceneDoc implements Obj2DocIface{
continue;
try {
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
alltext.append("\n " +( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )
+ " : " + ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ));
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
}
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
//stemmed terms
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
//unstemmed terms
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
document.setALLTEXT(alltext.toString());
}
document.writeToLog();
// log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n");
log.info(" \n Object Properties " + objectProperties.toString() + "\n\n");
return doc;
}

View file

@ -10,6 +10,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE;
import java.io.File;
import java.io.IOException;
@ -41,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
@ -111,10 +113,16 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
getAnalyzer());
context.setAttribute(ANALYZER, getAnalyzer());
//bk392 adding another argument to Entity2LuceneDoc
// that takes care of sparql queries for context nodes.
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc translator = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) );
new IndividualProhibitedFromSearch(context),
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)
);
indexer.addObj2Doc(translator);
context.setAttribute(LuceneIndexer.class.getName(), indexer);
@ -250,9 +258,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29));
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
return analyzer;
}

View file

@ -1,7 +1,7 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.lucene;
package edu.cornell.mannlib.vitro.webapp.search.lucene;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
@ -27,91 +27,93 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
/**
* Setup objects for lucene searching and indexing.
*
* The indexing and search objects, IndexBuilder and Searcher are found by the
* controllers IndexController and SearchController through the servletContext.
* This object will have the method contextInitialized() called when the tomcat
* server starts this webapp.
*
* The contextInitialized() will try to find the lucene index directory,
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
*
* To execute this at context creation put this in web.xml:
<listener>
<listener-class>
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
</listener-class>
</listener>
* @author bdc34
*
*/
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
/**
* Setup objects for lucene searching and indexing.
*
* The indexing and search objects, IndexBuilder and Searcher are found by the
* controllers IndexController and SearchController through the servletContext.
* This object will have the method contextInitialized() called when the tomcat
* server starts this webapp.
*
* The contextInitialized() will try to find the lucene index directory,
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
*
* To execute this at context creation put this in web.xml:
<listener>
<listener-class>
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
</listener-class>
</listener>
* @author bdc34
*
*/
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
private static String indexDir = null;
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex";
/**
* Gets run to set up DataSource when the webapp servlet context gets created.
*/
/**
* Gets run to set up DataSource when the webapp servlet context gets created.
*/
@Override
@SuppressWarnings("unchecked")
public void contextInitialized(ServletContextEvent sce) {
ServletContext context = sce.getServletContext();
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
try{
indexDir = getIndexDirName(sce);
log.info("Lucene indexDir: " + indexDir);
setBoolMax();
HashSet dataPropertyBlacklist = new HashSet<String>();
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
HashSet objectPropertyBlacklist = new HashSet<String>();
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
@SuppressWarnings("unchecked")
public void contextInitialized(ServletContextEvent sce) {
ServletContext context = sce.getServletContext();
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
try{
indexDir = getIndexDirName(sce);
log.info("Lucene indexDir: " + indexDir);
setBoolMax();
HashSet dataPropertyBlacklist = new HashSet<String>();
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
HashSet objectPropertyBlacklist = new HashSet<String>();
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
//This is where to get a LucenIndex from. The indexer will
//need to reference this to notify it of updates to the index
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir);
String liveIndexDir = lif.getLiveIndexDir(context);
//here we want to put the LuceneIndex object into the application scope
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
//here we want to put the LuceneIndex object into the application scope
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer());
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc translator = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) );
new IndividualProhibitedFromSearch(context),
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
indexer.addObj2Doc(translator);
indexer.setLuceneIndexFactory(lif);
//This is where the builder gets the list of places to try to
//get objects to index. It is filtered so that non-public text
//does not get into the search index.
WebappDaoFactory wadf =
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
//This is where the builder gets the list of places to try to
//get objects to index. It is filtered so that non-public text
//does not get into the search index.
WebappDaoFactory wadf =
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
VitroFilters vf = VitroFilterUtils.getPublicFilter(context);
wadf = new WebappDaoFactoryFiltering(wadf,vf);
List sources = new ArrayList();
sources.add(wadf.getIndividualDao());
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
// here we add the IndexBuilder with the LuceneIndexer
// to the servlet context so we can access it later in the webapp.
wadf = new WebappDaoFactoryFiltering(wadf,vf);
List sources = new ArrayList();
sources.add(wadf.getIndividualDao());
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
// here we add the IndexBuilder with the LuceneIndexer
// to the servlet context so we can access it later in the webapp.
context.setAttribute(IndexBuilder.class.getName(),builder);
//set up listeners so search index builder is notified of changes to model
@ -119,36 +121,36 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel");
SearchReindexingListener srl = new SearchReindexingListener( builder );
ModelContext.registerListenerForChanges(sce.getServletContext(), srl);
}catch(Exception ex){
log.error("Could not setup lucene full text search." , ex);
}
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
}
/**
* Gets run when the webApp Context gets destroyed.
*/
}catch(Exception ex){
log.error("Could not setup lucene full text search." , ex);
}
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
}
/**
* Gets run when the webApp Context gets destroyed.
*/
@Override
public void contextDestroyed(ServletContextEvent sce) {
log.info("**** Running "+this.getClass().getName()+".contextDestroyed()");
IndexBuilder builder = (IndexBuilder)sce.getServletContext().getAttribute(IndexBuilder.class.getName());
builder.stopIndexingThread();
}
/**
* In wild card searches the query is first broken into many boolean searches
* OR'ed together. So if there is a query that would match a lot of records
* we need a high max boolean limit for the lucene search.
*
* This sets some static method in the lucene library to achieve this.
*/
public static void setBoolMax() {
BooleanQuery.setMaxClauseCount(16384);
}
builder.stopIndexingThread();
}
/**
* In wild card searches the query is first broken into many boolean searches
* OR'ed together. So if there is a query that would match a lot of records
* we need a high max boolean limit for the lucene search.
*
* This sets some static method in the lucene library to achieve this.
*/
public static void setBoolMax() {
BooleanQuery.setMaxClauseCount(16384);
}
/**
* Gets the name of the directory to store the lucene index in. The
* {@link ConfigurationProperties} should have a property named
@ -190,14 +192,14 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
return luceneDir.getPath();
}
/**
* Gets the analyzer that will be used when building the indexing
* and when analyzing the incoming search terms.
*
* @return
*/
private Analyzer getAnalyzer() {
return new CJKAnalyzer();
}
}
/**
* Gets the analyzer that will be used when building the indexing
* and when analyzing the incoming search terms.
*
* @return
*/
private Analyzer getAnalyzer() {
return new CJKAnalyzer();
}
}

View file

@ -0,0 +1,209 @@
package edu.cornell.mannlib.vitro.webapp.search.lucene.test;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public class LuceneDocument {
private static final Log log = LogFactory.getLog(LuceneDocument.class.getName());
String URI;
String DOCID;
String JCLASS;
String RDFTYPE;
String CLASSGROUP_URI;
String MODTIME;
String NAME;
String PORTAL;
String INDEXEDTIME;
String TIMEKEY;
String SUNSET;
String MONIKER;
String ALLTEXT;
String KEYWORDS;
String THUMBNAIL;
String PROHIBITED_FROM_TEXT_RESULTS;
String CLASSLOCALNAME;
String CONTEXTNODE;
static final String FILE = "~/Desktop/LuceneIndividuals.txt";
public String getURI() {
return URI;
}
public void setURI(String uRI) {
URI = uRI;
}
public String getDOCID() {
return DOCID;
}
public void setDOCID(String dOCID) {
DOCID = dOCID;
}
public String getJCLASS() {
return JCLASS;
}
public void setJCLASS(String jCLASS) {
JCLASS = jCLASS;
}
public String getRDFTYPE() {
return RDFTYPE;
}
public void setRDFTYPE(String rDFTYPE) {
RDFTYPE = rDFTYPE;
}
public String getCLASSGROUP_URI() {
return CLASSGROUP_URI;
}
public void setCLASSGROUP_URI(String cLASSGROUP_URI) {
CLASSGROUP_URI = cLASSGROUP_URI;
}
public String getMODTIME() {
return MODTIME;
}
public void setMODTIME(String mODTIME) {
MODTIME = mODTIME;
}
public String getNAME() {
return NAME;
}
public void setNAME(String nAME) {
NAME = nAME;
}
public String getPORTAL() {
return PORTAL;
}
public void setPORTAL(String pORTAL) {
PORTAL = pORTAL;
}
public String getINDEXEDTIME() {
return INDEXEDTIME;
}
public void setINDEXEDTIME(String iNDEXEDTIME) {
INDEXEDTIME = iNDEXEDTIME;
}
public String getTIMEKEY() {
return TIMEKEY;
}
public void setTIMEKEY(String tIMEKEY) {
TIMEKEY = tIMEKEY;
}
public String getSUNSET() {
return SUNSET;
}
public void setSUNSET(String sUNSET) {
SUNSET = sUNSET;
}
public String getMONIKER() {
return MONIKER;
}
public void setMONIKER(String mONIKER) {
MONIKER = mONIKER;
}
public String getALLTEXT() {
return ALLTEXT;
}
public void setALLTEXT(String aLLTEXT) {
ALLTEXT = aLLTEXT;
}
public String getKEYWORDS() {
return KEYWORDS;
}
public void setKEYWORDS(String kEYWORDS) {
KEYWORDS = kEYWORDS;
}
public String getTHUMBNAIL() {
return THUMBNAIL;
}
public void setTHUMBNAIL(String tHUMBNAIL) {
THUMBNAIL = tHUMBNAIL;
}
public String getPROHIBITED_FROM_TEXT_RESULTS() {
return PROHIBITED_FROM_TEXT_RESULTS;
}
public void setPROHIBITED_FROM_TEXT_RESULTS(String pROHIBITED_FROM_TEXT_RESULTS) {
PROHIBITED_FROM_TEXT_RESULTS = pROHIBITED_FROM_TEXT_RESULTS;
}
public String getCLASSLOCALNAME() {
return CLASSLOCALNAME;
}
public void setCLASSLOCALNAME(String cLASSLOCALNAME) {
CLASSLOCALNAME = cLASSLOCALNAME;
}
@Override
public String toString(){
StringBuffer result = new StringBuffer();
result.append("\n==================================\n");
result.append("URI : " + URI);
result.append("\nDOCID : " + DOCID);
result.append("\nJCLASS : " + JCLASS);
result.append("\nRDFTYPE : " + RDFTYPE);
result.append("\nCLASSGROUP_URI : " + CLASSGROUP_URI);
result.append("\nMODTIME : " + MODTIME);
result.append("\nNAME : " + NAME);
result.append("\nPORTAL : " + PORTAL);
result.append("\nINDEXEDTIME : " + INDEXEDTIME);
result.append("\nCONTEXTNODE : " + CONTEXTNODE);
result.append("\nTIMEKEY : " + TIMEKEY);
result.append("\nSUNSET : " + SUNSET);
result.append("\nMONIKER : " + MONIKER);
result.append("\nALLTEXT : " + ALLTEXT);
result.append("\nKEYWORDS : " + KEYWORDS);
result.append("\nTHUMBNAIL : " + THUMBNAIL);
result.append("\nPROHIBITED_FROM_TEXT_RESULTS : " + PROHIBITED_FROM_TEXT_RESULTS);
result.append("\nCLASSLOCALNAME : " + CLASSLOCALNAME);
return result.toString();
}
public void writeToLog(){
log.info(this.toString());
}
public void setCONTEXTNODE(String contextNodePropertyValues) {
this.CONTEXTNODE = contextNodePropertyValues;
}
}

View file

@ -2,23 +2,261 @@
package edu.cornell.mannlib.vitro.webapp.search.solr;
import org.apache.solr.common.SolrDocument;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.document.Document;
import org.apache.solr.client.solrj.beans.Field;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.joda.time.DateTime;
import com.hp.hpl.jena.vocabulary.OWL;
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
public class IndividualToSolrDocument implements Obj2DocIface {
protected LuceneDocToSolrDoc luceneToSolr;
protected Entity2LuceneDoc entityToLucene;
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
entityToLucene = e2d;
luceneToSolr = new LuceneDocToSolrDoc();
public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
public static VitroTermNames term = new VitroTermNames();
private static String entClassName = Individual.class.getName();
private ProhibitedFromSearch classesProhibitedFromSearch;
private IndividualProhibitedFromSearch individualProhibitedFromSearch;
private ContextNodesInclusionFactory contextNodesInclusionFactory;
private static HashSet<String> objectProperties = new HashSet<String>();
public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch,
IndividualProhibitedFromSearch individualProhibitedFromSearch,
ContextNodesInclusionFactory contextNodesInclusionFactory){
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
this.contextNodesInclusionFactory = contextNodesInclusionFactory;
}
@Override
public Object translate(Object obj) throws IndexingException{
long tProhibited = System.currentTimeMillis();
if(!(obj instanceof Individual))
return null;
Individual ent = (Individual)obj;
String value;
String classPublicNames = "";
SolrInputDocument doc = new SolrInputDocument();
//DocId
String id = ent.getURI();
log.debug("translating " + id);
if(id == null){
log.debug("cannot add individuals without URIs to lucene Index");
return null;
}else if( id.startsWith(VitroVocabulary.vitroURI) ||
id.startsWith(VitroVocabulary.VITRO_PUBLIC) ||
id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) ||
id.startsWith(OWL.NS)){
log.debug("not indexing because of namespace:" + id);
return null;
}
//filter out class groups, owl:ObjectProperties etc..
if(individualProhibitedFromSearch.isIndividualProhibited(id)){
return null;
}
log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited));
// Types and classgroups
boolean prohibited = false;
List<VClass> vclasses = ent.getVClasses(false);
long tClassgroup = System.currentTimeMillis();
for(VClass clz : vclasses){
if(clz.getURI() == null){
continue;
}else if(OWL.Thing.getURI().equals(clz.getURI())){
//index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index
continue;
} else if(clz.getURI().startsWith(OWL.NS)){
log.debug("not indexing " + id + " because of type " + clz.getURI());
return null;
} else {
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()))
prohibited = true;
if( clz.getSearchBoost() != null)
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
doc.addField(term.RDFTYPE, clz.getURI());
if(clz.getLocalName() != null){
doc.addField(term.CLASSLOCALNAME, clz.getLocalName());
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
}
if(clz.getName() != null)
classPublicNames += clz.getName();
//Classgroup URI
if(clz.getGroupURI() != null){
doc.addField(term.CLASSGROUP_URI,clz.getGroupURI());
}
}
}
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0");
//lucene DocID
doc.addField(term.DOCID, entClassName + id);
//vitro id
doc.addField(term.URI, id);
//java class
doc.addField(term.JCLASS, entClassName);
//Individual Label
if(ent.getRdfsLabel() != null)
value = ent.getRdfsLabel();
else{
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
value = ent.getLocalName();
}
doc.addField(term.NAME_RAW, value, NAME_BOOST);
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
long tContextNodes = System.currentTimeMillis();
String contextNodePropertyValues = "";
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI());
doc.addField(term.CONTEXTNODE, contextNodePropertyValues);
log.debug("time to fire contextnode queries and include them in the index: " + Long.toString(System.currentTimeMillis() - tContextNodes));
long tMoniker = System.currentTimeMillis();
//Moniker
if(ent.getMoniker() != null){
doc.addField(term.MONIKER, ent.getMoniker());
}
//boost for entity
if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0)
doc.setDocumentBoost(ent.getSearchBoost());
//thumbnail
try{
value = null;
if(ent.hasThumb())
doc.addField(term.THUMBNAIL, "1");
else
doc.addField(term.THUMBNAIL, "0");
}catch(Exception ex){
log.debug("could not index thumbnail: " + ex);
}
//time of index in millis past epoc
Object anon[] = { new Long((new DateTime() ).getMillis()) };
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
log.debug("time to include moniker , thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
long tPropertyStatements = System.currentTimeMillis();
if(!prohibited){
//ALLTEXT, all of the 'full text'
String t=null;
value ="";
value+= " "+( ((t=ent.getName()) == null)?"":t );
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
value+= " " + classPublicNames;
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
if (dataPropertyStatements != null) {
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
while (dataPropertyStmtIter.hasNext()) {
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
}
}
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
if (objectPropertyStatements != null) {
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
while (objectPropertyStmtIter.hasNext()) {
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
continue;
try {
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
}
} catch (Exception e) {
log.debug("could not index name of related object: " + e.getMessage());
}
}
}
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
doc.addField(term.ALLTEXT, value);
doc.addField(term.ALLTEXTUNSTEMMED, value);
}
return doc;
}
// public IndividualToSolrDocument(Entity2LuceneDoc e2d){
//// entityToLucene = e2d;
// luceneToSolr = new LuceneDocToSolrDoc();
// }
@Override
public boolean canTranslate(Object obj) {
return obj != null && obj instanceof Individual;
@ -34,14 +272,23 @@ public class IndividualToSolrDocument implements Obj2DocIface {
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
}
@Override
public Object translate(Object obj) throws IndexingException {
return luceneToSolr.translate( entityToLucene.translate( obj ) );
}
// @Override
// public Object translate(Object obj) throws IndexingException {
// return luceneToSolr.translate( entityToLucene.translate( obj ) );
// }
@Override
public Object unTranslate(Object result) {
return luceneToSolr.unTranslate( result );
Individual ent = null;
if( result != null && result instanceof Document){
Document hit = (Document) result;
String id = hit.get(term.URI);
ent = new IndividualImpl();
ent.setURI(id);
}
return ent;
}
public static float NAME_BOOST = 3.0F;
}

View file

@ -23,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
@ -67,10 +68,15 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
/* setup the individual to solr doc translation */
//first we need a ent2luceneDoc translator
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context) );
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
// Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
// new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
// new IndividualProhibitedFromSearch(context),
// new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
// IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument(
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
new IndividualProhibitedFromSearch(context),
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
o2d.add(indToSolrDoc);