Committing the following changes to dev-search-relevance branch
1) ContextNodesInclusionFactory NIHVIVO-2204, NIHVIVO-2333(partly) 2) IndividualToSolrDoc to replace Entity2LuceneDoc while constructing SolrInputDocuments NIHVIVO-2653 3) LuceneDocument (test utility that prints contents from Lucene/Solr documents)
This commit is contained in:
parent
4a209a12ce
commit
a3471b7102
12 changed files with 3613 additions and 2246 deletions
|
@ -24,6 +24,9 @@ public class DisplayVocabulary {
|
|||
/* Individuals */
|
||||
public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex";
|
||||
|
||||
//bk392 for extracting properties beyond context nodes.
|
||||
public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes";
|
||||
|
||||
/* Page types */
|
||||
public static final String PAGE_TYPE = NS + "Page";
|
||||
public static final String HOME_PAGE_TYPE = NS + "HomePage";
|
||||
|
@ -35,8 +38,10 @@ public class DisplayVocabulary {
|
|||
/* Data Properties */
|
||||
public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping");
|
||||
public static final String TITLE = NS + "title";
|
||||
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
|
||||
|
||||
public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate");
|
||||
//bk392 for extracting properties beyond context nodes.
|
||||
public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining");
|
||||
|
||||
/* URIs for storing menu.n3 */
|
||||
public static final String MENU_TEXT_RES = NS + "MenuText";
|
||||
public static final String HAS_TEXT_REPRESENTATION = NS + "hasMenuText";
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search;
|
||||
|
||||
public class VitroTermNames {
|
||||
/** Id of entity, vclass or tab */
|
||||
public static String URI = "URI";
|
||||
/** lucene document id */
|
||||
public static String DOCID = "DocId";
|
||||
/** java class of the object that the Doc represents. */
|
||||
public static String JCLASS = "JCLASS";
|
||||
/** rdf:type */
|
||||
public static String RDFTYPE = "type";
|
||||
/** rdf:type */
|
||||
public static String CLASSGROUP_URI = "classgroup";
|
||||
/** Modtime from db */
|
||||
public static String MODTIME = "modTime";
|
||||
|
||||
/** time of index in msec since epoc */
|
||||
public static String INDEXEDTIME= "indexedTime";
|
||||
/** timekey of entity in yyyymmddhhmm */
|
||||
public static String TIMEKEY="TIMEKEY";
|
||||
/** time of sunset/end of entity in yyyymmddhhmm */
|
||||
public static String SUNSET="SUNSET";
|
||||
/** time of sunrise/start of entity in yyyymmddhhmm */
|
||||
public static String SUNRISE="SUNRISE";
|
||||
/** entity's moniker */
|
||||
public static String MONIKER="moniker";
|
||||
/** text for 'full text' search, this is stemmed */
|
||||
public static String ALLTEXT = "ALLTEXT";
|
||||
/** text for 'full text' search, this is unstemmed for
|
||||
* use with wildcards and prefix queries */
|
||||
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||
/** class name for storing context nodes **/
|
||||
public static final String CONTEXTNODE = "contextNode";
|
||||
/** keywords */
|
||||
public static final String KEYWORDS = "KEYWORDS";
|
||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||
public static final String THUMBNAIL = "THUMBNAIL";
|
||||
/** Should individual be included in full text search results? 1=yes 0=no */
|
||||
public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS";
|
||||
/** class names in human readable form of an individual*/
|
||||
public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase";
|
||||
/** class names in human readable form of an individual*/
|
||||
public static final String CLASSLOCALNAME = "classLocalName";
|
||||
|
||||
// Fields derived from rdfs:label
|
||||
/** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_RAW = "nameRaw"; // was NAMERAW
|
||||
|
||||
/** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/
|
||||
public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, no stemming **/
|
||||
public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED
|
||||
|
||||
/** rdfs:label lowercased, tokenized, stop words, stemmed **/
|
||||
public static String NAME_STEMMED = "nameStemmed"; // was NAME
|
||||
}
|
|
@ -0,0 +1,637 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search.beans;
|
||||
|
||||
import javax.servlet.ServletContext;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.hp.hpl.jena.ontology.OntModel;
|
||||
import com.hp.hpl.jena.query.Query;
|
||||
import com.hp.hpl.jena.query.QueryExecution;
|
||||
import com.hp.hpl.jena.query.QueryExecutionFactory;
|
||||
import com.hp.hpl.jena.query.QueryFactory;
|
||||
import com.hp.hpl.jena.query.QuerySolution;
|
||||
import com.hp.hpl.jena.query.QuerySolutionMap;
|
||||
import com.hp.hpl.jena.query.ResultSet;
|
||||
import com.hp.hpl.jena.query.Syntax;
|
||||
import com.hp.hpl.jena.rdf.model.Literal;
|
||||
import com.hp.hpl.jena.rdf.model.RDFNode;
|
||||
import com.hp.hpl.jena.rdf.model.Resource;
|
||||
import com.hp.hpl.jena.rdf.model.ResourceFactory;
|
||||
import com.hp.hpl.jena.shared.Lock;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
|
||||
public class ContextNodesInclusionFactory {
|
||||
|
||||
private OntModel fullModel;
|
||||
private String contextNodeURI;
|
||||
private String query = "";
|
||||
|
||||
private static final String queryForEducationalTraining = "SELECT ?query WHERE {" +
|
||||
"?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }";
|
||||
|
||||
private static Log log = LogFactory.getLog(ContextNodesInclusionFactory.class);
|
||||
|
||||
public ContextNodesInclusionFactory(String contextNodeURI,
|
||||
OntModel displayOntModel, ServletContext context) {
|
||||
this.fullModel = ModelContext.getJenaOntModel(context);
|
||||
this.contextNodeURI = contextNodeURI;
|
||||
query = getQueryFromModel(contextNodeURI, displayOntModel);
|
||||
}
|
||||
|
||||
private String getQueryFromModel(String uri, OntModel displayOntModel) {
|
||||
|
||||
String resultQuery = "";
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource searchConfig = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("searchConfig", searchConfig);
|
||||
|
||||
Query query = QueryFactory.create(queryForEducationalTraining);
|
||||
displayOntModel.enterCriticalSection(Lock.READ);
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(query, displayOntModel, initialBinding);
|
||||
try{
|
||||
ResultSet results = qExec.execSelect();
|
||||
while(results.hasNext()){
|
||||
QuerySolution soln = results.nextSolution();
|
||||
Literal node = soln.getLiteral("query");
|
||||
if(node.isLiteral()){
|
||||
resultQuery = node.toString();
|
||||
}else{
|
||||
log.warn("unexpected literal in the object position for context node queries " + node.toString());
|
||||
}
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
displayOntModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return resultQuery.substring(0, resultQuery.length() - 3);
|
||||
}
|
||||
|
||||
|
||||
// public List<Field> getFieldValues(String uri, Model modelToQuery, List<String> queries){
|
||||
|
||||
//what do the queries need to be like?
|
||||
// SELECT ?field ?value WHERE ....
|
||||
|
||||
// what to do with multiple values for a field?
|
||||
|
||||
// }
|
||||
|
||||
|
||||
|
||||
//in different object:
|
||||
/*
|
||||
* get queries from somewhere
|
||||
* get model to run queries on
|
||||
* get list of individuals
|
||||
* for each individual:
|
||||
* fields = getFieldValues(uri, model, queiries)
|
||||
* index(fields)?
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
public String getPropertiesAssociatedWithPosition(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT " +
|
||||
"(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " +
|
||||
" (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " +
|
||||
" (str(?TitleOrRole) as ?titleOrRole) (str(?PositionLabel) as ?positionLabel) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Position . "
|
||||
|
||||
+ " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . "
|
||||
+ " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ."
|
||||
+ " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . "
|
||||
+ " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . "
|
||||
+ " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . "
|
||||
+ " OPTIONAL { ?c rdfs:label ?PositionLabel . } "
|
||||
|
||||
+ " } ORDER BY ?PositionLabel ";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode hrJobTitle = soln.get("hrJobTitle");
|
||||
if(hrJobTitle != null){
|
||||
propertyValues.append(" " + hrJobTitle.toString());
|
||||
}else{
|
||||
log.warn("hrJobTitle is null ");
|
||||
}
|
||||
|
||||
RDFNode involvedOrganizationName = soln.get("involvedOrganizationName");
|
||||
if(involvedOrganizationName != null){
|
||||
propertyValues.append(" " + involvedOrganizationName.toString());
|
||||
}else{
|
||||
log.warn("involvedOrganizationName is null ");
|
||||
}
|
||||
|
||||
RDFNode positionForPerson = soln.get("positionForPerson");
|
||||
if(positionForPerson != null){
|
||||
propertyValues.append(" " + positionForPerson.toString());
|
||||
}else{
|
||||
log.warn("positionForPerson is null ");
|
||||
}
|
||||
|
||||
RDFNode positionInOrganization = soln.get("positionInOrganization");
|
||||
if(positionInOrganization != null){
|
||||
propertyValues.append(" " + positionInOrganization.toString());
|
||||
}else{
|
||||
log.warn("positionInOrganization is null ");
|
||||
}
|
||||
|
||||
RDFNode titleOrRole = soln.get("titleOrRole");
|
||||
if(titleOrRole != null){
|
||||
propertyValues.append(" " + titleOrRole.toString());
|
||||
}else{
|
||||
log.warn("titleOrRole is null ");
|
||||
}
|
||||
|
||||
RDFNode positionLabel = soln.get("positionLabel");
|
||||
if(positionLabel != null){
|
||||
propertyValues.append(" " + positionLabel.toString());
|
||||
}else{
|
||||
log.warn("positionLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
public String getPropertiesAssociatedWithRelationship(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " +
|
||||
" (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Relationship . "
|
||||
|
||||
+ " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . "
|
||||
+ " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ."
|
||||
+ " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . "
|
||||
+ " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . "
|
||||
|
||||
+ " } ";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode advisee = soln.get("advisee");
|
||||
if(advisee != null){
|
||||
propertyValues.append(" " + advisee.toString());
|
||||
}else{
|
||||
log.warn("advisee is null ");
|
||||
}
|
||||
|
||||
RDFNode degreeCandidacy = soln.get("degreeCandidacy");
|
||||
if(degreeCandidacy != null){
|
||||
propertyValues.append(" " + degreeCandidacy.toString());
|
||||
}else{
|
||||
log.warn("degreeCandidacy is null ");
|
||||
}
|
||||
|
||||
RDFNode linkedAuthor = soln.get("linkedAuthor");
|
||||
if(linkedAuthor != null){
|
||||
propertyValues.append(" " + linkedAuthor.toString());
|
||||
}else{
|
||||
log.warn("linkedAuthor is null ");
|
||||
}
|
||||
|
||||
RDFNode linkedInformationResource = soln.get("linkedInformationResource");
|
||||
if(linkedInformationResource != null){
|
||||
propertyValues.append(" " + linkedInformationResource.toString());
|
||||
}else{
|
||||
log.warn("linkedInformationResource is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
|
||||
public String getPropertiesAssociatedWithAwardReceipt(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " +
|
||||
" (str(?Description) as ?description) (str(?AwardReceiptLabel) as ?awardReceiptLabel) WHERE {"
|
||||
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:AwardReceipt . "
|
||||
|
||||
+ " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . "
|
||||
+ " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ."
|
||||
+ " OPTIONAL { ?c core:description ?Description . } . "
|
||||
+ " OPTIONAL { ?c rdfs:label ?AwardReceiptLabel . } . "
|
||||
|
||||
+ " } ORDER BY ?AwardReceiptLabel";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode awardConferredBy = soln.get("awardConferredBy");
|
||||
if(awardConferredBy != null){
|
||||
propertyValues.append(" " + awardConferredBy.toString());
|
||||
}else{
|
||||
log.warn("awardConferredBy is null ");
|
||||
}
|
||||
|
||||
RDFNode awardOrHonorFor = soln.get("awardOrHonorFor");
|
||||
if(awardOrHonorFor != null){
|
||||
propertyValues.append(" " + awardOrHonorFor.toString());
|
||||
}else{
|
||||
log.warn("awardOrHonorFor is null ");
|
||||
}
|
||||
|
||||
RDFNode description = soln.get("description");
|
||||
if(description != null){
|
||||
propertyValues.append(" " + description.toString());
|
||||
}else{
|
||||
log.warn("description is null ");
|
||||
}
|
||||
|
||||
RDFNode awardReceiptLabel = soln.get("awardReceiptLabel");
|
||||
if(awardReceiptLabel != null){
|
||||
propertyValues.append(" " + awardReceiptLabel.toString());
|
||||
}else{
|
||||
log.warn("awardReceiptLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
public String getPropertiesAssociatedWithRole(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT DISTINCT (str(?OrganizationLabel) as ?organizationLabel) WHERE {"
|
||||
+ "?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:Role ; core:roleIn ?Organization ."
|
||||
+ " ?Organization rdfs:label ?OrganizationLabel . "
|
||||
+ " } ORDER BY ?OrganizationLabel ";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode organizationLabel = soln.get("organizationLabel");
|
||||
if(organizationLabel != null){
|
||||
propertyValues.append(" " + organizationLabel.toString());
|
||||
}else{
|
||||
log.warn("organizationLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
|
||||
return propertyValues.toString();
|
||||
}
|
||||
|
||||
|
||||
|
||||
public String getPropertiesAssociatedWithEducationalTraining(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) "
|
||||
+ "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " +
|
||||
"(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {"
|
||||
|
||||
+ " ?uri rdf:type foaf:Agent ; ?b ?c . "
|
||||
+ " ?c rdf:type core:EducationalTraining . "
|
||||
|
||||
+ "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . "
|
||||
+ "OPTIONAL { ?c core:majorField ?MajorField .} ."
|
||||
+ " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }"
|
||||
+ " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . "
|
||||
|
||||
+"}";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode academicDegreeLabel = soln.get("academicDegreeLabel");
|
||||
if(academicDegreeLabel != null){
|
||||
propertyValues.append(" " + academicDegreeLabel.toString());
|
||||
}else{
|
||||
log.warn("academicDegreeLabel is null ");
|
||||
}
|
||||
|
||||
RDFNode academicDegreeAbbreviation = soln.get("academicDegreeAbbreviation");
|
||||
if(academicDegreeAbbreviation != null){
|
||||
propertyValues.append(" " + academicDegreeAbbreviation.toString());
|
||||
}else{
|
||||
log.warn("academicDegreeAbbreviation is null ");
|
||||
}
|
||||
|
||||
RDFNode majorField = soln.get("majorField");
|
||||
if(majorField != null){
|
||||
propertyValues.append(" " + majorField.toString());
|
||||
}else{
|
||||
log.warn("majorField is null ");
|
||||
}
|
||||
|
||||
RDFNode trainingAtDepartmentOrSchool = soln.get("departmentOrSchool");
|
||||
if(trainingAtDepartmentOrSchool != null){
|
||||
propertyValues.append(" " + trainingAtDepartmentOrSchool.toString());
|
||||
}else{
|
||||
log.warn("trainingAtDepartmentOrSchool is null ");
|
||||
}
|
||||
|
||||
RDFNode trainingAtOrganizationLabel = soln.get("trainingAtOrganizationLabel");
|
||||
if(trainingAtOrganizationLabel != null){
|
||||
propertyValues.append(" " + trainingAtOrganizationLabel.toString());
|
||||
}else{
|
||||
log.warn("trainingAtOrganizationLabel is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
return propertyValues.toString();
|
||||
|
||||
}
|
||||
|
||||
public String getPropertiesAssociatedWithInformationResource(String uri){
|
||||
|
||||
StringBuffer propertyValues = new StringBuffer();
|
||||
|
||||
QuerySolutionMap initialBinding = new QuerySolutionMap();
|
||||
Resource uriResource = ResourceFactory.createResource(uri);
|
||||
|
||||
initialBinding.add("uri", uriResource);
|
||||
|
||||
String prefix = "prefix owl: <http://www.w3.org/2002/07/owl#> "
|
||||
+ " prefix vitroDisplay: <http://vitro.mannlib.cornell.edu/ontologies/display/1.1#> "
|
||||
+ " prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> "
|
||||
+ " prefix foaf: <http://xmlns.com/foaf/0.1/> "
|
||||
+ " prefix core: <http://vivoweb.org/ontology/core#> "
|
||||
+ " prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> "
|
||||
+ " prefix localNav: <http://vitro.mannlib.cornell.edu/ns/localnav#> "
|
||||
+ " prefix bibo: <http://purl.org/ontology/bibo/> ";
|
||||
|
||||
String thisQuery = prefix +
|
||||
"SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) "
|
||||
+ "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " +
|
||||
"(str(?Features) as ?features) WHERE {"
|
||||
|
||||
+ " ?uri rdf:type core:InformationResource . "
|
||||
|
||||
+ "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." +
|
||||
"?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . "
|
||||
+ "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ."
|
||||
+ " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } "
|
||||
+ " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . "
|
||||
|
||||
+"}";
|
||||
|
||||
Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ);
|
||||
fullModel.enterCriticalSection(Lock.READ);
|
||||
|
||||
try{
|
||||
QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding);
|
||||
try{
|
||||
|
||||
ResultSet results = qExec.execSelect();
|
||||
|
||||
while(results.hasNext()){
|
||||
|
||||
QuerySolution soln = results.nextSolution();
|
||||
|
||||
RDFNode linkedAuthor = soln.get("linkedAuthor");
|
||||
if(linkedAuthor != null){
|
||||
propertyValues.append(" " + linkedAuthor.toString());
|
||||
}else{
|
||||
log.warn("linkedAuthor is null ");
|
||||
}
|
||||
|
||||
RDFNode linkedInformationResource = soln.get("linkedInformationResource");
|
||||
if(linkedInformationResource != null){
|
||||
propertyValues.append(" " + linkedInformationResource.toString());
|
||||
}else{
|
||||
log.warn("linkedInformationResource is null ");
|
||||
}
|
||||
|
||||
RDFNode editor = soln.get("editor");
|
||||
if(editor != null){
|
||||
propertyValues.append(" " + editor.toString());
|
||||
}else{
|
||||
log.warn("editor is null ");
|
||||
}
|
||||
|
||||
RDFNode subjectArea = soln.get("subjectArea");
|
||||
if(subjectArea != null){
|
||||
propertyValues.append(" " + subjectArea.toString());
|
||||
}else{
|
||||
log.warn("subjectArea is null ");
|
||||
}
|
||||
|
||||
RDFNode researchAreaOf = soln.get("researchAreaOf");
|
||||
if(researchAreaOf != null){
|
||||
propertyValues.append(" " + researchAreaOf.toString());
|
||||
}else{
|
||||
log.warn("researchAreaOf is null ");
|
||||
}
|
||||
|
||||
RDFNode features = soln.get("features");
|
||||
if(features != null){
|
||||
propertyValues.append(" " + features.toString());
|
||||
}else{
|
||||
log.warn("features is null ");
|
||||
}
|
||||
|
||||
}
|
||||
}catch(Throwable t){
|
||||
log.error(t,t);
|
||||
} finally{
|
||||
qExec.close();
|
||||
}
|
||||
}finally{
|
||||
fullModel.leaveCriticalSection();
|
||||
}
|
||||
return propertyValues.toString();
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -21,9 +23,11 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
|||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.test.LuceneDocument;
|
||||
|
||||
/**
|
||||
* This class expect that Entities passed to it will have
|
||||
|
@ -31,6 +35,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
|||
* be as full as possible.
|
||||
*/
|
||||
public class Entity2LuceneDoc implements Obj2DocIface{
|
||||
|
||||
/** These are the terms for the lucene index */
|
||||
public static class VitroLuceneTermNames{
|
||||
/** Id of entity, vclass or tab */
|
||||
|
@ -61,6 +66,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
/** text for 'full text' search, this is unstemmed for
|
||||
* use with wildcards and prefix queries */
|
||||
public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED";
|
||||
/** class name for storing context nodes **/
|
||||
public static final String CONTEXTNODE = "contextNode";
|
||||
/** keywords */
|
||||
public static final String KEYWORDS = "KEYWORDS";
|
||||
/** Does the individual have a thumbnail image? 1=yes 0=no */
|
||||
|
@ -103,12 +110,19 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
private ProhibitedFromSearch classesProhibitedFromSearch;
|
||||
|
||||
private IndividualProhibitedFromSearch individualProhibited;
|
||||
|
||||
private ContextNodesInclusionFactory contextNodesInclusionFactory;
|
||||
|
||||
private static HashMap<String, String> IndividualURIToObjectProperties = new HashMap<String, String>();
|
||||
|
||||
private static HashSet<String> objectProperties = new HashSet<String>();
|
||||
|
||||
public Entity2LuceneDoc(
|
||||
ProhibitedFromSearch classesProhibitedFromSearch,
|
||||
IndividualProhibitedFromSearch individualProhibited){
|
||||
IndividualProhibitedFromSearch individualProhibited, ContextNodesInclusionFactory contextNodesInclusionFactory){
|
||||
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
||||
this.individualProhibited = individualProhibited;
|
||||
this.contextNodesInclusionFactory = contextNodesInclusionFactory;
|
||||
}
|
||||
|
||||
public boolean canTranslate(Object obj) {
|
||||
|
@ -123,7 +137,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
String value;
|
||||
Document doc = new Document();
|
||||
String classPublicNames = "";
|
||||
|
||||
LuceneDocument document = new LuceneDocument();
|
||||
|
||||
//DocId
|
||||
String id = ent.getURI();
|
||||
|
@ -166,12 +180,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
|
||||
Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
doc.add( typeField);
|
||||
document.setRDFTYPE(clz.getURI());
|
||||
|
||||
if(clz.getLocalName() != null){
|
||||
Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED);
|
||||
Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED);
|
||||
doc.add(classLocalName);
|
||||
doc.add(classLocalNameLowerCase);
|
||||
document.setCLASSLOCALNAME(clz.getLocalName());
|
||||
}
|
||||
|
||||
if( clz.getName() != null )
|
||||
|
@ -183,22 +199,29 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
// classGroupField.setBoost(FIELD_BOOST);
|
||||
doc.add(classGroupField);
|
||||
document.setCLASSGROUP_URI(clz.getGroupURI());
|
||||
}
|
||||
}
|
||||
}
|
||||
doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0",
|
||||
Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS) );
|
||||
document.setPROHIBITED_FROM_TEXT_RESULTS(prohibited?"1":"0");
|
||||
|
||||
/* lucene DOCID */
|
||||
doc.add( new Field(term.DOCID, entClassName + id,
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
document.setDOCID(entClassName + id);
|
||||
|
||||
|
||||
//vitro Id
|
||||
doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
document.setURI(id);
|
||||
|
||||
|
||||
//java class
|
||||
doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
|
||||
document.setJCLASS(entClassName);
|
||||
|
||||
// Individual label
|
||||
if( ent.getRdfsLabel() != null )
|
||||
value=ent.getRdfsLabel();
|
||||
|
@ -208,10 +231,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
||||
value = ent.getLocalName();
|
||||
}
|
||||
|
||||
Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
nameRaw.setBoost(NAME_BOOST);
|
||||
doc.add(nameRaw);
|
||||
document.setNAME(value);
|
||||
|
||||
// RY Not sure if we need to store this. For Solr, see schema.xml field definition.
|
||||
Field nameLowerCase = new Field(term.NAME_LOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
|
@ -224,14 +247,30 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
|
||||
Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED);
|
||||
nameStemmed.setBoost(NAME_BOOST);
|
||||
doc.add(nameStemmed);
|
||||
doc.add(nameStemmed);
|
||||
|
||||
String contextNodePropertyValues;
|
||||
|
||||
// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
contextNodePropertyValues = contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI());
|
||||
|
||||
// }
|
||||
|
||||
Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED );
|
||||
doc.add(contextNodeInformation);
|
||||
document.setCONTEXTNODE(contextNodePropertyValues);
|
||||
|
||||
//Moniker
|
||||
|
||||
if(ent.getMoniker() != null){
|
||||
Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
doc.add(moniker);
|
||||
document.setMONIKER(ent.getMoniker());
|
||||
}
|
||||
|
||||
//boost for entity
|
||||
|
@ -274,31 +313,45 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||
doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ),
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
document.setINDEXEDTIME(String.format("%019d", anon));
|
||||
|
||||
|
||||
if( ! prohibited ){
|
||||
//ALLTEXT, all of the 'full text'
|
||||
StringBuffer alltext = new StringBuffer();
|
||||
|
||||
String t=null;
|
||||
value ="";
|
||||
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
||||
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
||||
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
||||
value+= " "+ getKeyterms(ent);
|
||||
value ="";
|
||||
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
||||
alltext.append("\t NAME: " + ( ((t=ent.getName()) == null)?"":t ));
|
||||
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
||||
alltext.append("\t ANCHOR: " + ( ((t=ent.getAnchor()) == null)?"":t));
|
||||
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
||||
alltext.append("\t MONIKER: " + ( ((t=ent.getMoniker()) == null)?"":t ));
|
||||
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
||||
alltext.append("\t DESCRIPTION: " + ( ((t=ent.getDescription()) == null)?"":t ));
|
||||
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
||||
alltext.append("\t BLURB: " + ( ((t=ent.getBlurb()) == null)?"":t ));
|
||||
value+= " "+ getKeyterms(ent);
|
||||
alltext.append("\t KEYTERMS: " + getKeyterms(ent));
|
||||
|
||||
value+= " " + classPublicNames;
|
||||
value+= " " + classPublicNames;
|
||||
alltext.append(" CLASSPUBLICNAMES: " + classPublicNames);
|
||||
|
||||
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
|
||||
if (dataPropertyStatements != null) {
|
||||
alltext.append("\n DATA_PROPERTY_STATEMENTS \n -------------------------------- \n");
|
||||
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
||||
while (dataPropertyStmtIter.hasNext()) {
|
||||
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
||||
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
|
||||
alltext.append("\n " + ( ((t=dataPropertyStmt.getData()) == null)?"":t ));
|
||||
}
|
||||
}
|
||||
|
||||
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
|
||||
if (objectPropertyStatements != null) {
|
||||
alltext.append("\n OBJECT_PROPERTY_STATEMENTS \n -------------------------------- \n");
|
||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||
while (objectPropertyStmtIter.hasNext()) {
|
||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||
|
@ -306,17 +359,31 @@ public class Entity2LuceneDoc implements Obj2DocIface{
|
|||
continue;
|
||||
try {
|
||||
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
||||
alltext.append("\n " +( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )
|
||||
+ " : " + ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ));
|
||||
|
||||
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
|
||||
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("could not index name of related object: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
//stemmed terms
|
||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED));
|
||||
//unstemmed terms
|
||||
doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED));
|
||||
document.setALLTEXT(alltext.toString());
|
||||
}
|
||||
|
||||
document.writeToLog();
|
||||
|
||||
// log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n");
|
||||
log.info(" \n Object Properties " + objectProperties.toString() + "\n\n");
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
|
|
@ -10,6 +10,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi
|
|||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE;
|
||||
import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -41,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
|||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup;
|
||||
|
||||
|
@ -111,10 +113,16 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
getAnalyzer());
|
||||
context.setAttribute(ANALYZER, getAnalyzer());
|
||||
|
||||
//bk392 adding another argument to Entity2LuceneDoc
|
||||
// that takes care of sparql queries for context nodes.
|
||||
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
new IndividualProhibitedFromSearch(context),
|
||||
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)
|
||||
|
||||
);
|
||||
indexer.addObj2Doc(translator);
|
||||
|
||||
context.setAttribute(LuceneIndexer.class.getName(), indexer);
|
||||
|
@ -250,9 +258,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
|
|||
analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer());
|
||||
analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29));
|
||||
analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer());
|
||||
analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer());
|
||||
|
||||
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.lucene;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -27,91 +27,93 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
|
||||
|
||||
/**
|
||||
* Setup objects for lucene searching and indexing.
|
||||
*
|
||||
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
||||
* controllers IndexController and SearchController through the servletContext.
|
||||
* This object will have the method contextInitialized() called when the tomcat
|
||||
* server starts this webapp.
|
||||
*
|
||||
* The contextInitialized() will try to find the lucene index directory,
|
||||
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
||||
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
||||
*
|
||||
* To execute this at context creation put this in web.xml:
|
||||
<listener>
|
||||
<listener-class>
|
||||
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
||||
</listener-class>
|
||||
</listener>
|
||||
|
||||
* @author bdc34
|
||||
*
|
||||
*/
|
||||
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
||||
|
||||
/**
|
||||
* Setup objects for lucene searching and indexing.
|
||||
*
|
||||
* The indexing and search objects, IndexBuilder and Searcher are found by the
|
||||
* controllers IndexController and SearchController through the servletContext.
|
||||
* This object will have the method contextInitialized() called when the tomcat
|
||||
* server starts this webapp.
|
||||
*
|
||||
* The contextInitialized() will try to find the lucene index directory,
|
||||
* make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will
|
||||
* also get a list of Obj2Doc objects so it can translate object to lucene docs.
|
||||
*
|
||||
* To execute this at context creation put this in web.xml:
|
||||
<listener>
|
||||
<listener-class>
|
||||
edu.cornell.mannlib.vitro.search.setup.LuceneSetup
|
||||
</listener-class>
|
||||
</listener>
|
||||
|
||||
* @author bdc34
|
||||
*
|
||||
*/
|
||||
public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
||||
private static String indexDir = null;
|
||||
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
|
||||
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
|
||||
private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName());
|
||||
private static final String PROPERTY_VITRO_HOME = "vitro.home.directory";
|
||||
private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex";
|
||||
|
||||
/**
|
||||
* Gets run to set up DataSource when the webapp servlet context gets created.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Gets run to set up DataSource when the webapp servlet context gets created.
|
||||
*/
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
ServletContext context = sce.getServletContext();
|
||||
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
|
||||
try{
|
||||
indexDir = getIndexDirName(sce);
|
||||
log.info("Lucene indexDir: " + indexDir);
|
||||
|
||||
setBoolMax();
|
||||
|
||||
HashSet dataPropertyBlacklist = new HashSet<String>();
|
||||
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
|
||||
|
||||
HashSet objectPropertyBlacklist = new HashSet<String>();
|
||||
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
|
||||
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
|
||||
@SuppressWarnings("unchecked")
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
ServletContext context = sce.getServletContext();
|
||||
log.info("**** Running "+this.getClass().getName()+".contextInitialized()");
|
||||
try{
|
||||
indexDir = getIndexDirName(sce);
|
||||
log.info("Lucene indexDir: " + indexDir);
|
||||
|
||||
setBoolMax();
|
||||
|
||||
HashSet dataPropertyBlacklist = new HashSet<String>();
|
||||
context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist);
|
||||
|
||||
HashSet objectPropertyBlacklist = new HashSet<String>();
|
||||
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
|
||||
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
|
||||
|
||||
//This is where to get a LucenIndex from. The indexer will
|
||||
//need to reference this to notify it of updates to the index
|
||||
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir);
|
||||
String liveIndexDir = lif.getLiveIndexDir(context);
|
||||
|
||||
//here we want to put the LuceneIndex object into the application scope
|
||||
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
|
||||
|
||||
//here we want to put the LuceneIndex object into the application scope
|
||||
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
|
||||
context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer());
|
||||
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc translator = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
new IndividualProhibitedFromSearch(context),
|
||||
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
|
||||
indexer.addObj2Doc(translator);
|
||||
|
||||
indexer.setLuceneIndexFactory(lif);
|
||||
|
||||
//This is where the builder gets the list of places to try to
|
||||
//get objects to index. It is filtered so that non-public text
|
||||
//does not get into the search index.
|
||||
WebappDaoFactory wadf =
|
||||
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
||||
|
||||
//This is where the builder gets the list of places to try to
|
||||
//get objects to index. It is filtered so that non-public text
|
||||
//does not get into the search index.
|
||||
WebappDaoFactory wadf =
|
||||
(WebappDaoFactory) context.getAttribute("webappDaoFactory");
|
||||
VitroFilters vf = VitroFilterUtils.getPublicFilter(context);
|
||||
wadf = new WebappDaoFactoryFiltering(wadf,vf);
|
||||
|
||||
List sources = new ArrayList();
|
||||
sources.add(wadf.getIndividualDao());
|
||||
|
||||
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
|
||||
|
||||
// here we add the IndexBuilder with the LuceneIndexer
|
||||
// to the servlet context so we can access it later in the webapp.
|
||||
wadf = new WebappDaoFactoryFiltering(wadf,vf);
|
||||
|
||||
List sources = new ArrayList();
|
||||
sources.add(wadf.getIndividualDao());
|
||||
|
||||
IndexBuilder builder = new IndexBuilder(context,indexer,sources);
|
||||
|
||||
// here we add the IndexBuilder with the LuceneIndexer
|
||||
// to the servlet context so we can access it later in the webapp.
|
||||
context.setAttribute(IndexBuilder.class.getName(),builder);
|
||||
|
||||
//set up listeners so search index builder is notified of changes to model
|
||||
|
@ -119,36 +121,36 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
|||
OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel");
|
||||
SearchReindexingListener srl = new SearchReindexingListener( builder );
|
||||
ModelContext.registerListenerForChanges(sce.getServletContext(), srl);
|
||||
|
||||
}catch(Exception ex){
|
||||
log.error("Could not setup lucene full text search." , ex);
|
||||
}
|
||||
|
||||
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets run when the webApp Context gets destroyed.
|
||||
*/
|
||||
|
||||
}catch(Exception ex){
|
||||
log.error("Could not setup lucene full text search." , ex);
|
||||
}
|
||||
|
||||
log.debug("**** End of "+this.getClass().getName()+".contextInitialized()");
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets run when the webApp Context gets destroyed.
|
||||
*/
|
||||
@Override
|
||||
public void contextDestroyed(ServletContextEvent sce) {
|
||||
|
||||
|
||||
log.info("**** Running "+this.getClass().getName()+".contextDestroyed()");
|
||||
IndexBuilder builder = (IndexBuilder)sce.getServletContext().getAttribute(IndexBuilder.class.getName());
|
||||
builder.stopIndexingThread();
|
||||
}
|
||||
|
||||
/**
|
||||
* In wild card searches the query is first broken into many boolean searches
|
||||
* OR'ed together. So if there is a query that would match a lot of records
|
||||
* we need a high max boolean limit for the lucene search.
|
||||
*
|
||||
* This sets some static method in the lucene library to achieve this.
|
||||
*/
|
||||
public static void setBoolMax() {
|
||||
BooleanQuery.setMaxClauseCount(16384);
|
||||
}
|
||||
|
||||
builder.stopIndexingThread();
|
||||
}
|
||||
|
||||
/**
|
||||
* In wild card searches the query is first broken into many boolean searches
|
||||
* OR'ed together. So if there is a query that would match a lot of records
|
||||
* we need a high max boolean limit for the lucene search.
|
||||
*
|
||||
* This sets some static method in the lucene library to achieve this.
|
||||
*/
|
||||
public static void setBoolMax() {
|
||||
BooleanQuery.setMaxClauseCount(16384);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the name of the directory to store the lucene index in. The
|
||||
* {@link ConfigurationProperties} should have a property named
|
||||
|
@ -190,14 +192,14 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
|
|||
return luceneDir.getPath();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the analyzer that will be used when building the indexing
|
||||
* and when analyzing the incoming search terms.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private Analyzer getAnalyzer() {
|
||||
return new CJKAnalyzer();
|
||||
}
|
||||
|
||||
}
|
||||
/**
|
||||
* Gets the analyzer that will be used when building the indexing
|
||||
* and when analyzing the incoming search terms.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private Analyzer getAnalyzer() {
|
||||
return new CJKAnalyzer();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -0,0 +1,209 @@
|
|||
package edu.cornell.mannlib.vitro.webapp.search.lucene.test;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
public class LuceneDocument {
|
||||
|
||||
private static final Log log = LogFactory.getLog(LuceneDocument.class.getName());
|
||||
|
||||
String URI;
|
||||
String DOCID;
|
||||
String JCLASS;
|
||||
String RDFTYPE;
|
||||
String CLASSGROUP_URI;
|
||||
String MODTIME;
|
||||
String NAME;
|
||||
String PORTAL;
|
||||
String INDEXEDTIME;
|
||||
String TIMEKEY;
|
||||
String SUNSET;
|
||||
String MONIKER;
|
||||
String ALLTEXT;
|
||||
String KEYWORDS;
|
||||
String THUMBNAIL;
|
||||
String PROHIBITED_FROM_TEXT_RESULTS;
|
||||
String CLASSLOCALNAME;
|
||||
String CONTEXTNODE;
|
||||
|
||||
static final String FILE = "~/Desktop/LuceneIndividuals.txt";
|
||||
|
||||
|
||||
public String getURI() {
|
||||
return URI;
|
||||
}
|
||||
|
||||
public void setURI(String uRI) {
|
||||
URI = uRI;
|
||||
}
|
||||
|
||||
public String getDOCID() {
|
||||
return DOCID;
|
||||
}
|
||||
|
||||
public void setDOCID(String dOCID) {
|
||||
DOCID = dOCID;
|
||||
}
|
||||
|
||||
public String getJCLASS() {
|
||||
return JCLASS;
|
||||
}
|
||||
|
||||
public void setJCLASS(String jCLASS) {
|
||||
JCLASS = jCLASS;
|
||||
}
|
||||
|
||||
public String getRDFTYPE() {
|
||||
return RDFTYPE;
|
||||
}
|
||||
|
||||
public void setRDFTYPE(String rDFTYPE) {
|
||||
RDFTYPE = rDFTYPE;
|
||||
}
|
||||
|
||||
public String getCLASSGROUP_URI() {
|
||||
return CLASSGROUP_URI;
|
||||
}
|
||||
|
||||
public void setCLASSGROUP_URI(String cLASSGROUP_URI) {
|
||||
CLASSGROUP_URI = cLASSGROUP_URI;
|
||||
}
|
||||
|
||||
public String getMODTIME() {
|
||||
return MODTIME;
|
||||
}
|
||||
|
||||
public void setMODTIME(String mODTIME) {
|
||||
MODTIME = mODTIME;
|
||||
}
|
||||
|
||||
public String getNAME() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
public void setNAME(String nAME) {
|
||||
NAME = nAME;
|
||||
}
|
||||
|
||||
public String getPORTAL() {
|
||||
return PORTAL;
|
||||
}
|
||||
|
||||
public void setPORTAL(String pORTAL) {
|
||||
PORTAL = pORTAL;
|
||||
}
|
||||
|
||||
public String getINDEXEDTIME() {
|
||||
return INDEXEDTIME;
|
||||
}
|
||||
|
||||
public void setINDEXEDTIME(String iNDEXEDTIME) {
|
||||
INDEXEDTIME = iNDEXEDTIME;
|
||||
}
|
||||
|
||||
public String getTIMEKEY() {
|
||||
return TIMEKEY;
|
||||
}
|
||||
|
||||
public void setTIMEKEY(String tIMEKEY) {
|
||||
TIMEKEY = tIMEKEY;
|
||||
}
|
||||
|
||||
public String getSUNSET() {
|
||||
return SUNSET;
|
||||
}
|
||||
|
||||
public void setSUNSET(String sUNSET) {
|
||||
SUNSET = sUNSET;
|
||||
}
|
||||
|
||||
public String getMONIKER() {
|
||||
return MONIKER;
|
||||
}
|
||||
|
||||
public void setMONIKER(String mONIKER) {
|
||||
MONIKER = mONIKER;
|
||||
}
|
||||
|
||||
public String getALLTEXT() {
|
||||
return ALLTEXT;
|
||||
}
|
||||
|
||||
public void setALLTEXT(String aLLTEXT) {
|
||||
ALLTEXT = aLLTEXT;
|
||||
}
|
||||
|
||||
public String getKEYWORDS() {
|
||||
return KEYWORDS;
|
||||
}
|
||||
|
||||
public void setKEYWORDS(String kEYWORDS) {
|
||||
KEYWORDS = kEYWORDS;
|
||||
}
|
||||
|
||||
public String getTHUMBNAIL() {
|
||||
return THUMBNAIL;
|
||||
}
|
||||
|
||||
public void setTHUMBNAIL(String tHUMBNAIL) {
|
||||
THUMBNAIL = tHUMBNAIL;
|
||||
}
|
||||
|
||||
public String getPROHIBITED_FROM_TEXT_RESULTS() {
|
||||
return PROHIBITED_FROM_TEXT_RESULTS;
|
||||
}
|
||||
|
||||
public void setPROHIBITED_FROM_TEXT_RESULTS(String pROHIBITED_FROM_TEXT_RESULTS) {
|
||||
PROHIBITED_FROM_TEXT_RESULTS = pROHIBITED_FROM_TEXT_RESULTS;
|
||||
}
|
||||
|
||||
public String getCLASSLOCALNAME() {
|
||||
return CLASSLOCALNAME;
|
||||
}
|
||||
|
||||
public void setCLASSLOCALNAME(String cLASSLOCALNAME) {
|
||||
CLASSLOCALNAME = cLASSLOCALNAME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(){
|
||||
|
||||
StringBuffer result = new StringBuffer();
|
||||
|
||||
result.append("\n==================================\n");
|
||||
|
||||
result.append("URI : " + URI);
|
||||
result.append("\nDOCID : " + DOCID);
|
||||
result.append("\nJCLASS : " + JCLASS);
|
||||
result.append("\nRDFTYPE : " + RDFTYPE);
|
||||
result.append("\nCLASSGROUP_URI : " + CLASSGROUP_URI);
|
||||
result.append("\nMODTIME : " + MODTIME);
|
||||
result.append("\nNAME : " + NAME);
|
||||
result.append("\nPORTAL : " + PORTAL);
|
||||
result.append("\nINDEXEDTIME : " + INDEXEDTIME);
|
||||
result.append("\nCONTEXTNODE : " + CONTEXTNODE);
|
||||
result.append("\nTIMEKEY : " + TIMEKEY);
|
||||
result.append("\nSUNSET : " + SUNSET);
|
||||
result.append("\nMONIKER : " + MONIKER);
|
||||
result.append("\nALLTEXT : " + ALLTEXT);
|
||||
result.append("\nKEYWORDS : " + KEYWORDS);
|
||||
result.append("\nTHUMBNAIL : " + THUMBNAIL);
|
||||
result.append("\nPROHIBITED_FROM_TEXT_RESULTS : " + PROHIBITED_FROM_TEXT_RESULTS);
|
||||
result.append("\nCLASSLOCALNAME : " + CLASSLOCALNAME);
|
||||
|
||||
return result.toString();
|
||||
|
||||
}
|
||||
|
||||
public void writeToLog(){
|
||||
log.info(this.toString());
|
||||
}
|
||||
|
||||
public void setCONTEXTNODE(String contextNodePropertyValues) {
|
||||
this.CONTEXTNODE = contextNodePropertyValues;
|
||||
}
|
||||
}
|
|
@ -2,23 +2,261 @@
|
|||
|
||||
package edu.cornell.mannlib.vitro.webapp.search.solr;
|
||||
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.solr.client.solrj.beans.Field;
|
||||
import org.apache.solr.common.SolrDocument;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.joda.time.DateTime;
|
||||
|
||||
import com.hp.hpl.jena.vocabulary.OWL;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement;
|
||||
import edu.cornell.mannlib.vitro.webapp.beans.VClass;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
|
||||
|
||||
public class IndividualToSolrDocument implements Obj2DocIface {
|
||||
|
||||
protected LuceneDocToSolrDoc luceneToSolr;
|
||||
protected Entity2LuceneDoc entityToLucene;
|
||||
|
||||
public IndividualToSolrDocument(Entity2LuceneDoc e2d){
|
||||
entityToLucene = e2d;
|
||||
luceneToSolr = new LuceneDocToSolrDoc();
|
||||
public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName());
|
||||
|
||||
public static VitroTermNames term = new VitroTermNames();
|
||||
|
||||
private static String entClassName = Individual.class.getName();
|
||||
|
||||
private ProhibitedFromSearch classesProhibitedFromSearch;
|
||||
|
||||
private IndividualProhibitedFromSearch individualProhibitedFromSearch;
|
||||
|
||||
private ContextNodesInclusionFactory contextNodesInclusionFactory;
|
||||
|
||||
private static HashSet<String> objectProperties = new HashSet<String>();
|
||||
|
||||
|
||||
public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch,
|
||||
IndividualProhibitedFromSearch individualProhibitedFromSearch,
|
||||
ContextNodesInclusionFactory contextNodesInclusionFactory){
|
||||
this.classesProhibitedFromSearch = classesProhibitedFromSearch;
|
||||
this.individualProhibitedFromSearch = individualProhibitedFromSearch;
|
||||
this.contextNodesInclusionFactory = contextNodesInclusionFactory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object translate(Object obj) throws IndexingException{
|
||||
long tProhibited = System.currentTimeMillis();
|
||||
|
||||
if(!(obj instanceof Individual))
|
||||
return null;
|
||||
|
||||
Individual ent = (Individual)obj;
|
||||
String value;
|
||||
String classPublicNames = "";
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
|
||||
//DocId
|
||||
String id = ent.getURI();
|
||||
log.debug("translating " + id);
|
||||
|
||||
if(id == null){
|
||||
log.debug("cannot add individuals without URIs to lucene Index");
|
||||
return null;
|
||||
}else if( id.startsWith(VitroVocabulary.vitroURI) ||
|
||||
id.startsWith(VitroVocabulary.VITRO_PUBLIC) ||
|
||||
id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) ||
|
||||
id.startsWith(OWL.NS)){
|
||||
log.debug("not indexing because of namespace:" + id);
|
||||
return null;
|
||||
}
|
||||
|
||||
//filter out class groups, owl:ObjectProperties etc..
|
||||
if(individualProhibitedFromSearch.isIndividualProhibited(id)){
|
||||
return null;
|
||||
}
|
||||
|
||||
log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited));
|
||||
|
||||
// Types and classgroups
|
||||
boolean prohibited = false;
|
||||
List<VClass> vclasses = ent.getVClasses(false);
|
||||
long tClassgroup = System.currentTimeMillis();
|
||||
for(VClass clz : vclasses){
|
||||
if(clz.getURI() == null){
|
||||
continue;
|
||||
}else if(OWL.Thing.getURI().equals(clz.getURI())){
|
||||
//index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index
|
||||
continue;
|
||||
} else if(clz.getURI().startsWith(OWL.NS)){
|
||||
log.debug("not indexing " + id + " because of type " + clz.getURI());
|
||||
return null;
|
||||
} else {
|
||||
if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI()))
|
||||
prohibited = true;
|
||||
if( clz.getSearchBoost() != null)
|
||||
doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost());
|
||||
|
||||
doc.addField(term.RDFTYPE, clz.getURI());
|
||||
|
||||
if(clz.getLocalName() != null){
|
||||
doc.addField(term.CLASSLOCALNAME, clz.getLocalName());
|
||||
doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase());
|
||||
}
|
||||
|
||||
if(clz.getName() != null)
|
||||
classPublicNames += clz.getName();
|
||||
|
||||
//Classgroup URI
|
||||
if(clz.getGroupURI() != null){
|
||||
doc.addField(term.CLASSGROUP_URI,clz.getGroupURI());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup));
|
||||
|
||||
|
||||
doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0");
|
||||
|
||||
//lucene DocID
|
||||
doc.addField(term.DOCID, entClassName + id);
|
||||
|
||||
//vitro id
|
||||
doc.addField(term.URI, id);
|
||||
|
||||
//java class
|
||||
doc.addField(term.JCLASS, entClassName);
|
||||
|
||||
//Individual Label
|
||||
if(ent.getRdfsLabel() != null)
|
||||
value = ent.getRdfsLabel();
|
||||
else{
|
||||
log.debug("Using local name for individual with rdfs:label " + ent.getURI());
|
||||
value = ent.getLocalName();
|
||||
}
|
||||
|
||||
doc.addField(term.NAME_RAW, value, NAME_BOOST);
|
||||
doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST);
|
||||
doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST);
|
||||
doc.addField(term.NAME_STEMMED, value, NAME_BOOST);
|
||||
|
||||
long tContextNodes = System.currentTimeMillis();
|
||||
|
||||
String contextNodePropertyValues = "";
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI());
|
||||
contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI());
|
||||
|
||||
|
||||
doc.addField(term.CONTEXTNODE, contextNodePropertyValues);
|
||||
|
||||
log.debug("time to fire contextnode queries and include them in the index: " + Long.toString(System.currentTimeMillis() - tContextNodes));
|
||||
|
||||
|
||||
long tMoniker = System.currentTimeMillis();
|
||||
|
||||
//Moniker
|
||||
if(ent.getMoniker() != null){
|
||||
doc.addField(term.MONIKER, ent.getMoniker());
|
||||
}
|
||||
|
||||
//boost for entity
|
||||
if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0)
|
||||
doc.setDocumentBoost(ent.getSearchBoost());
|
||||
|
||||
//thumbnail
|
||||
try{
|
||||
value = null;
|
||||
if(ent.hasThumb())
|
||||
doc.addField(term.THUMBNAIL, "1");
|
||||
else
|
||||
doc.addField(term.THUMBNAIL, "0");
|
||||
}catch(Exception ex){
|
||||
log.debug("could not index thumbnail: " + ex);
|
||||
}
|
||||
|
||||
|
||||
//time of index in millis past epoc
|
||||
Object anon[] = { new Long((new DateTime() ).getMillis()) };
|
||||
doc.addField(term.INDEXEDTIME, String.format("%019d", anon));
|
||||
|
||||
log.debug("time to include moniker , thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker));
|
||||
|
||||
long tPropertyStatements = System.currentTimeMillis();
|
||||
if(!prohibited){
|
||||
//ALLTEXT, all of the 'full text'
|
||||
String t=null;
|
||||
value ="";
|
||||
value+= " "+( ((t=ent.getName()) == null)?"":t );
|
||||
value+= " "+( ((t=ent.getAnchor()) == null)?"":t);
|
||||
value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getDescription()) == null)?"":t );
|
||||
value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t );
|
||||
|
||||
value+= " " + classPublicNames;
|
||||
|
||||
List<DataPropertyStatement> dataPropertyStatements = ent.getDataPropertyStatements();
|
||||
if (dataPropertyStatements != null) {
|
||||
Iterator<DataPropertyStatement> dataPropertyStmtIter = dataPropertyStatements.iterator();
|
||||
while (dataPropertyStmtIter.hasNext()) {
|
||||
DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next();
|
||||
value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t );
|
||||
}
|
||||
}
|
||||
|
||||
List<ObjectPropertyStatement> objectPropertyStatements = ent.getObjectPropertyStatements();
|
||||
if (objectPropertyStatements != null) {
|
||||
Iterator<ObjectPropertyStatement> objectPropertyStmtIter = objectPropertyStatements.iterator();
|
||||
while (objectPropertyStmtIter.hasNext()) {
|
||||
ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next();
|
||||
if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) )
|
||||
continue;
|
||||
try {
|
||||
value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t );
|
||||
if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){
|
||||
//IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) );
|
||||
objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ));
|
||||
}
|
||||
|
||||
} catch (Exception e) {
|
||||
log.debug("could not index name of related object: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements));
|
||||
|
||||
doc.addField(term.ALLTEXT, value);
|
||||
doc.addField(term.ALLTEXTUNSTEMMED, value);
|
||||
}
|
||||
|
||||
return doc;
|
||||
}
|
||||
|
||||
// public IndividualToSolrDocument(Entity2LuceneDoc e2d){
|
||||
//// entityToLucene = e2d;
|
||||
// luceneToSolr = new LuceneDocToSolrDoc();
|
||||
// }
|
||||
|
||||
@Override
|
||||
public boolean canTranslate(Object obj) {
|
||||
return obj != null && obj instanceof Individual;
|
||||
|
@ -34,14 +272,23 @@ public class IndividualToSolrDocument implements Obj2DocIface {
|
|||
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object translate(Object obj) throws IndexingException {
|
||||
return luceneToSolr.translate( entityToLucene.translate( obj ) );
|
||||
}
|
||||
// @Override
|
||||
// public Object translate(Object obj) throws IndexingException {
|
||||
// return luceneToSolr.translate( entityToLucene.translate( obj ) );
|
||||
// }
|
||||
|
||||
@Override
|
||||
public Object unTranslate(Object result) {
|
||||
return luceneToSolr.unTranslate( result );
|
||||
Individual ent = null;
|
||||
if( result != null && result instanceof Document){
|
||||
Document hit = (Document) result;
|
||||
String id = hit.get(term.URI);
|
||||
ent = new IndividualImpl();
|
||||
ent.setURI(id);
|
||||
}
|
||||
return ent;
|
||||
}
|
||||
|
||||
public static float NAME_BOOST = 3.0F;
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
|
|||
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
|
||||
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
|
||||
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
|
||||
|
@ -67,10 +68,15 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
|
|||
/* setup the individual to solr doc translation */
|
||||
//first we need a ent2luceneDoc translator
|
||||
OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel");
|
||||
Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context) );
|
||||
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
|
||||
// Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc(
|
||||
// new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
// new IndividualProhibitedFromSearch(context),
|
||||
// new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
|
||||
// IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc );
|
||||
IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument(
|
||||
new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel),
|
||||
new IndividualProhibitedFromSearch(context),
|
||||
new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context));
|
||||
List<Obj2DocIface> o2d = new ArrayList<Obj2DocIface>();
|
||||
o2d.add(indToSolrDoc);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue