From a3471b7102f62955c0543e970654f001f162f3d8 Mon Sep 17 00:00:00 2001 From: deepakkoni Date: Wed, 18 May 2011 20:35:42 +0000 Subject: [PATCH] Committing the following changes to dev-search-relevance branch 1) ContextNodesInclusionFactory NIHVIVO-2204, NIHVIVO-2333(partly) 2) IndividualToSolrDoc to replace Entity2LuceneDoc while constructing SolrInputDocuments NIHVIVO-2653 3) LuceneDocument (test utility that prints contents from Lucene/Solr documents) --- solr/exampleSolr/conf/schema.xml | 1212 +++---- solr/exampleSolr/conf/solrconfig.xml | 3016 ++++++++--------- solr/exampleSolr/conf/stopwords.txt | 119 + .../vitro/webapp/dao/DisplayVocabulary.java | 9 +- .../vitro/webapp/search/VitroTermNames.java | 57 + .../beans/ContextNodesInclusionFactory.java | 637 ++++ .../search/lucene/Entity2LuceneDoc.java | 97 +- .../webapp/search/lucene/LuceneSetup.java | 12 +- .../webapp/search/lucene/LuceneSetupCJK.java | 210 +- .../search/lucene/test/LuceneDocument.java | 209 ++ .../search/solr/IndividualToSolrDocument.java | 267 +- .../vitro/webapp/search/solr/SolrSetup.java | 14 +- 12 files changed, 3613 insertions(+), 2246 deletions(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index 7a220a86b..0a4cc7445 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xml @@ -1,602 +1,610 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - DocId - - - ALLTEXT - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DocId + + + ALLTEXT + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/exampleSolr/conf/solrconfig.xml b/solr/exampleSolr/conf/solrconfig.xml index a3bf11307..300d04530 100644 --- a/solr/exampleSolr/conf/solrconfig.xml +++ b/solr/exampleSolr/conf/solrconfig.xml @@ -1,1508 +1,1508 @@ - - - - - - - - - ${solr.abortOnConfigurationError:true} - - - LUCENE_31 - - - - - - - - - - - - - - - - - - - - - - - - - - - false - - 10 - - 32 - - - - 10000 - 1000 - 10000 - - - - - - - - - native - - - - - - - - - false - 32 - 10 - - - false - - - true - - - - - 1 - - 0 - - - - - - false - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1024 - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - static firstSearcher warming in solrconfig.xml - - - - - - false - - - 2 - - - - - - - - - - - - - - - - - - - - - - - explicit - 10 - - - - - - - - - - - - - - explicit - - - velocity - - browse - layout - Solritas - - edismax - *:* - 10 - *,score - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - text,features,name,sku,id,manu,cat - 3 - - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - - on - cat - manu_exact - ipod - GB - 1 - cat,inStock - price - 0 - 600 - 50 - after - manufacturedate_dt - NOW/YEAR-10YEARS - NOW - +1YEAR - before - after - - - - on - text features name - 0 - name - - - spellcheck - - - - - - - - - - - - - - - - - - - - - - - text - true - ignored_ - - - true - links - ignored_ - - - - - - - - - - - - - - - - - - - - - search - solrpingquery - all - - - - - - - explicit - true - - - - - - - - - - - - textSpell - - - - - - default - name - spellchecker - - - - - - - - - - - - - - - - false - false - 1 - - - spellcheck - - - - - - - - - - true - - - tvComponent - - - - - - - - - default - - org.carrot2.clustering.lingo.LingoClusteringAlgorithm - - 20 - - - ENGLISH - - - stc - org.carrot2.clustering.stc.STCClusteringAlgorithm - - - - - - - true - default - true - - name - id - - features - - true - - - - false - - edismax - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - *:* - 10 - *,score - - - clustering - - - - - - - - - - true - - - terms - - - - - - - - string - elevate.xml - - - - - - explicit - - - elevator - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - - - - - - - - - - - - - 5 - - - - - - - - - - - - - *:* - - - - - - + + + + + + + + + ${solr.abortOnConfigurationError:true} + + + LUCENE_31 + + + + + + + + + + + + + + + + + + + + + + + + + + + false + + 10 + + 32 + + + + 10000 + 1000 + 10000 + + + + + + + + + native + + + + + + + + + false + 32 + 10 + + + false + + + true + + + + + 1 + + 0 + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + + + + + + + explicit + + + velocity + + browse + layout + Solritas + + edismax + *:* + 10 + *,score + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + text,features,name,sku,id,manu,cat + 3 + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + + on + cat + manu_exact + ipod + GB + 1 + cat,inStock + price + 0 + 600 + 50 + after + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + + on + text features name + 0 + name + + + spellcheck + + + + + + + + + + + + + + + + + + + + + + + text + true + ignored_ + + + true + links + ignored_ + + + + + + + + + + + + + + + + + + + + + search + solrpingquery + all + + + + + + + explicit + true + + + + + + + + + + + + textSpell + + + + + + default + name + spellchecker + + + + + + + + + + + + + + + + false + false + 1 + + + spellcheck + + + + + + + + + + true + + + tvComponent + + + + + + + + + default + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + 20 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + + + + + + + + + + + + + 5 + + + + + + + + + + + + + *:* + + + + + + diff --git a/solr/exampleSolr/conf/stopwords.txt b/solr/exampleSolr/conf/stopwords.txt index 22f277fe0..5f383a458 100644 --- a/solr/exampleSolr/conf/stopwords.txt +++ b/solr/exampleSolr/conf/stopwords.txt @@ -56,3 +56,122 @@ was will with +# these stopwords are taken +# from http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html?page=2 + +about +after +all +also +an +and +another +any +are +as +at +be +because +been +before +being +between +both +but +by +came +can +come +could +did +do +does +each +else +for +from +get +got +has +had +he +have +her +here +him +himself +his +how +if +in +into +is +it +its +just +like +make +many +me +might +more +most +much +must +my +never +now +of +on +only +or +other +our +out +over +re +said +same +see +should +since +so +some +still +such +take +than +that +the +their +them +then +there +these +they +this +those +through +to +too +under +up +use +very +want +was +way +we +well +were +what +when +where +which +while +who +will +with +would +you +your diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java index 86fb805c1..10ad80244 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java @@ -24,6 +24,9 @@ public class DisplayVocabulary { /* Individuals */ public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex"; + //bk392 for extracting properties beyond context nodes. + public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes"; + /* Page types */ public static final String PAGE_TYPE = NS + "Page"; public static final String HOME_PAGE_TYPE = NS + "HomePage"; @@ -35,8 +38,10 @@ public class DisplayVocabulary { /* Data Properties */ public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping"); public static final String TITLE = NS + "title"; - public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate"); - + public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate"); + //bk392 for extracting properties beyond context nodes. + public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining"); + /* URIs for storing menu.n3 */ public static final String MENU_TEXT_RES = NS + "MenuText"; public static final String HAS_TEXT_REPRESENTATION = NS + "hasMenuText"; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java new file mode 100644 index 000000000..74d719823 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java @@ -0,0 +1,57 @@ +package edu.cornell.mannlib.vitro.webapp.search; + +public class VitroTermNames { + /** Id of entity, vclass or tab */ + public static String URI = "URI"; + /** lucene document id */ + public static String DOCID = "DocId"; + /** java class of the object that the Doc represents. */ + public static String JCLASS = "JCLASS"; + /** rdf:type */ + public static String RDFTYPE = "type"; + /** rdf:type */ + public static String CLASSGROUP_URI = "classgroup"; + /** Modtime from db */ + public static String MODTIME = "modTime"; + + /** time of index in msec since epoc */ + public static String INDEXEDTIME= "indexedTime"; + /** timekey of entity in yyyymmddhhmm */ + public static String TIMEKEY="TIMEKEY"; + /** time of sunset/end of entity in yyyymmddhhmm */ + public static String SUNSET="SUNSET"; + /** time of sunrise/start of entity in yyyymmddhhmm */ + public static String SUNRISE="SUNRISE"; + /** entity's moniker */ + public static String MONIKER="moniker"; + /** text for 'full text' search, this is stemmed */ + public static String ALLTEXT = "ALLTEXT"; + /** text for 'full text' search, this is unstemmed for + * use with wildcards and prefix queries */ + public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED"; + /** class name for storing context nodes **/ + public static final String CONTEXTNODE = "contextNode"; + /** keywords */ + public static final String KEYWORDS = "KEYWORDS"; + /** Does the individual have a thumbnail image? 1=yes 0=no */ + public static final String THUMBNAIL = "THUMBNAIL"; + /** Should individual be included in full text search results? 1=yes 0=no */ + public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS"; + /** class names in human readable form of an individual*/ + public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase"; + /** class names in human readable form of an individual*/ + public static final String CLASSLOCALNAME = "classLocalName"; + + // Fields derived from rdfs:label + /** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/ + public static String NAME_RAW = "nameRaw"; // was NAMERAW + + /** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/ + public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE + + /** rdfs:label lowercased, tokenized, stop words, no stemming **/ + public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED + + /** rdfs:label lowercased, tokenized, stop words, stemmed **/ + public static String NAME_STEMMED = "nameStemmed"; // was NAME +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java new file mode 100644 index 000000000..80805c030 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java @@ -0,0 +1,637 @@ +package edu.cornell.mannlib.vitro.webapp.search.beans; + +import javax.servlet.ServletContext; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.QuerySolutionMap; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.shared.Lock; + +import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; +import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; + +public class ContextNodesInclusionFactory { + + private OntModel fullModel; + private String contextNodeURI; + private String query = ""; + + private static final String queryForEducationalTraining = "SELECT ?query WHERE {" + + "?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }"; + + private static Log log = LogFactory.getLog(ContextNodesInclusionFactory.class); + + public ContextNodesInclusionFactory(String contextNodeURI, + OntModel displayOntModel, ServletContext context) { + this.fullModel = ModelContext.getJenaOntModel(context); + this.contextNodeURI = contextNodeURI; + query = getQueryFromModel(contextNodeURI, displayOntModel); + } + + private String getQueryFromModel(String uri, OntModel displayOntModel) { + + String resultQuery = ""; + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource searchConfig = ResourceFactory.createResource(uri); + + initialBinding.add("searchConfig", searchConfig); + + Query query = QueryFactory.create(queryForEducationalTraining); + displayOntModel.enterCriticalSection(Lock.READ); + try{ + QueryExecution qExec = QueryExecutionFactory.create(query, displayOntModel, initialBinding); + try{ + ResultSet results = qExec.execSelect(); + while(results.hasNext()){ + QuerySolution soln = results.nextSolution(); + Literal node = soln.getLiteral("query"); + if(node.isLiteral()){ + resultQuery = node.toString(); + }else{ + log.warn("unexpected literal in the object position for context node queries " + node.toString()); + } + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + displayOntModel.leaveCriticalSection(); + } + + return resultQuery.substring(0, resultQuery.length() - 3); + } + + +// public List getFieldValues(String uri, Model modelToQuery, List queries){ + + //what do the queries need to be like? + // SELECT ?field ?value WHERE .... + + // what to do with multiple values for a field? + +// } + + + + //in different object: + /* + * get queries from somewhere + * get model to run queries on + * get list of individuals + * for each individual: + * fields = getFieldValues(uri, model, queiries) + * index(fields)? + * + * + */ + + public String getPropertiesAssociatedWithPosition(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT " + + "(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " + + " (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " + + " (str(?TitleOrRole) as ?titleOrRole) (str(?PositionLabel) as ?positionLabel) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Position . " + + + " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . " + + " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ." + + " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . " + + " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . " + + " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . " + + " OPTIONAL { ?c rdfs:label ?PositionLabel . } " + + + " } ORDER BY ?PositionLabel "; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode hrJobTitle = soln.get("hrJobTitle"); + if(hrJobTitle != null){ + propertyValues.append(" " + hrJobTitle.toString()); + }else{ + log.warn("hrJobTitle is null "); + } + + RDFNode involvedOrganizationName = soln.get("involvedOrganizationName"); + if(involvedOrganizationName != null){ + propertyValues.append(" " + involvedOrganizationName.toString()); + }else{ + log.warn("involvedOrganizationName is null "); + } + + RDFNode positionForPerson = soln.get("positionForPerson"); + if(positionForPerson != null){ + propertyValues.append(" " + positionForPerson.toString()); + }else{ + log.warn("positionForPerson is null "); + } + + RDFNode positionInOrganization = soln.get("positionInOrganization"); + if(positionInOrganization != null){ + propertyValues.append(" " + positionInOrganization.toString()); + }else{ + log.warn("positionInOrganization is null "); + } + + RDFNode titleOrRole = soln.get("titleOrRole"); + if(titleOrRole != null){ + propertyValues.append(" " + titleOrRole.toString()); + }else{ + log.warn("titleOrRole is null "); + } + + RDFNode positionLabel = soln.get("positionLabel"); + if(positionLabel != null){ + propertyValues.append(" " + positionLabel.toString()); + }else{ + log.warn("positionLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + public String getPropertiesAssociatedWithRelationship(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " + + " (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Relationship . " + + + " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . " + + " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ." + + " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . " + + " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . " + + + " } "; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode advisee = soln.get("advisee"); + if(advisee != null){ + propertyValues.append(" " + advisee.toString()); + }else{ + log.warn("advisee is null "); + } + + RDFNode degreeCandidacy = soln.get("degreeCandidacy"); + if(degreeCandidacy != null){ + propertyValues.append(" " + degreeCandidacy.toString()); + }else{ + log.warn("degreeCandidacy is null "); + } + + RDFNode linkedAuthor = soln.get("linkedAuthor"); + if(linkedAuthor != null){ + propertyValues.append(" " + linkedAuthor.toString()); + }else{ + log.warn("linkedAuthor is null "); + } + + RDFNode linkedInformationResource = soln.get("linkedInformationResource"); + if(linkedInformationResource != null){ + propertyValues.append(" " + linkedInformationResource.toString()); + }else{ + log.warn("linkedInformationResource is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + + public String getPropertiesAssociatedWithAwardReceipt(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " + + " (str(?Description) as ?description) (str(?AwardReceiptLabel) as ?awardReceiptLabel) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:AwardReceipt . " + + + " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . " + + " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ." + + " OPTIONAL { ?c core:description ?Description . } . " + + " OPTIONAL { ?c rdfs:label ?AwardReceiptLabel . } . " + + + " } ORDER BY ?AwardReceiptLabel"; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode awardConferredBy = soln.get("awardConferredBy"); + if(awardConferredBy != null){ + propertyValues.append(" " + awardConferredBy.toString()); + }else{ + log.warn("awardConferredBy is null "); + } + + RDFNode awardOrHonorFor = soln.get("awardOrHonorFor"); + if(awardOrHonorFor != null){ + propertyValues.append(" " + awardOrHonorFor.toString()); + }else{ + log.warn("awardOrHonorFor is null "); + } + + RDFNode description = soln.get("description"); + if(description != null){ + propertyValues.append(" " + description.toString()); + }else{ + log.warn("description is null "); + } + + RDFNode awardReceiptLabel = soln.get("awardReceiptLabel"); + if(awardReceiptLabel != null){ + propertyValues.append(" " + awardReceiptLabel.toString()); + }else{ + log.warn("awardReceiptLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + public String getPropertiesAssociatedWithRole(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT DISTINCT (str(?OrganizationLabel) as ?organizationLabel) WHERE {" + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Role ; core:roleIn ?Organization ." + + " ?Organization rdfs:label ?OrganizationLabel . " + + " } ORDER BY ?OrganizationLabel "; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode organizationLabel = soln.get("organizationLabel"); + if(organizationLabel != null){ + propertyValues.append(" " + organizationLabel.toString()); + }else{ + log.warn("organizationLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + + + public String getPropertiesAssociatedWithEducationalTraining(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix foaf: " + + " prefix core: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) " + + "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " + + "(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {" + + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:EducationalTraining . " + + + "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . " + + "OPTIONAL { ?c core:majorField ?MajorField .} ." + + " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }" + + " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . " + + +"}"; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode academicDegreeLabel = soln.get("academicDegreeLabel"); + if(academicDegreeLabel != null){ + propertyValues.append(" " + academicDegreeLabel.toString()); + }else{ + log.warn("academicDegreeLabel is null "); + } + + RDFNode academicDegreeAbbreviation = soln.get("academicDegreeAbbreviation"); + if(academicDegreeAbbreviation != null){ + propertyValues.append(" " + academicDegreeAbbreviation.toString()); + }else{ + log.warn("academicDegreeAbbreviation is null "); + } + + RDFNode majorField = soln.get("majorField"); + if(majorField != null){ + propertyValues.append(" " + majorField.toString()); + }else{ + log.warn("majorField is null "); + } + + RDFNode trainingAtDepartmentOrSchool = soln.get("departmentOrSchool"); + if(trainingAtDepartmentOrSchool != null){ + propertyValues.append(" " + trainingAtDepartmentOrSchool.toString()); + }else{ + log.warn("trainingAtDepartmentOrSchool is null "); + } + + RDFNode trainingAtOrganizationLabel = soln.get("trainingAtOrganizationLabel"); + if(trainingAtOrganizationLabel != null){ + propertyValues.append(" " + trainingAtOrganizationLabel.toString()); + }else{ + log.warn("trainingAtOrganizationLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + return propertyValues.toString(); + + } + + public String getPropertiesAssociatedWithInformationResource(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix foaf: " + + " prefix core: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) " + + "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " + + "(str(?Features) as ?features) WHERE {" + + + " ?uri rdf:type core:InformationResource . " + + + "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." + + "?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . " + + "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ." + + " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } " + + " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . " + + +"}"; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode linkedAuthor = soln.get("linkedAuthor"); + if(linkedAuthor != null){ + propertyValues.append(" " + linkedAuthor.toString()); + }else{ + log.warn("linkedAuthor is null "); + } + + RDFNode linkedInformationResource = soln.get("linkedInformationResource"); + if(linkedInformationResource != null){ + propertyValues.append(" " + linkedInformationResource.toString()); + }else{ + log.warn("linkedInformationResource is null "); + } + + RDFNode editor = soln.get("editor"); + if(editor != null){ + propertyValues.append(" " + editor.toString()); + }else{ + log.warn("editor is null "); + } + + RDFNode subjectArea = soln.get("subjectArea"); + if(subjectArea != null){ + propertyValues.append(" " + subjectArea.toString()); + }else{ + log.warn("subjectArea is null "); + } + + RDFNode researchAreaOf = soln.get("researchAreaOf"); + if(researchAreaOf != null){ + propertyValues.append(" " + researchAreaOf.toString()); + }else{ + log.warn("researchAreaOf is null "); + } + + RDFNode features = soln.get("features"); + if(features != null){ + propertyValues.append(" " + features.toString()); + }else{ + log.warn("features is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + return propertyValues.toString(); + + } + + + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index 30831f197..a0b48278f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -2,6 +2,8 @@ package edu.cornell.mannlib.vitro.webapp.search.lucene; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -21,9 +23,11 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement; import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; +import edu.cornell.mannlib.vitro.webapp.search.lucene.test.LuceneDocument; /** * This class expect that Entities passed to it will have @@ -31,6 +35,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; * be as full as possible. */ public class Entity2LuceneDoc implements Obj2DocIface{ + /** These are the terms for the lucene index */ public static class VitroLuceneTermNames{ /** Id of entity, vclass or tab */ @@ -61,6 +66,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{ /** text for 'full text' search, this is unstemmed for * use with wildcards and prefix queries */ public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED"; + /** class name for storing context nodes **/ + public static final String CONTEXTNODE = "contextNode"; /** keywords */ public static final String KEYWORDS = "KEYWORDS"; /** Does the individual have a thumbnail image? 1=yes 0=no */ @@ -103,12 +110,19 @@ public class Entity2LuceneDoc implements Obj2DocIface{ private ProhibitedFromSearch classesProhibitedFromSearch; private IndividualProhibitedFromSearch individualProhibited; + + private ContextNodesInclusionFactory contextNodesInclusionFactory; + + private static HashMap IndividualURIToObjectProperties = new HashMap(); + + private static HashSet objectProperties = new HashSet(); public Entity2LuceneDoc( ProhibitedFromSearch classesProhibitedFromSearch, - IndividualProhibitedFromSearch individualProhibited){ + IndividualProhibitedFromSearch individualProhibited, ContextNodesInclusionFactory contextNodesInclusionFactory){ this.classesProhibitedFromSearch = classesProhibitedFromSearch; this.individualProhibited = individualProhibited; + this.contextNodesInclusionFactory = contextNodesInclusionFactory; } public boolean canTranslate(Object obj) { @@ -123,7 +137,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{ String value; Document doc = new Document(); String classPublicNames = ""; - + LuceneDocument document = new LuceneDocument(); //DocId String id = ent.getURI(); @@ -166,12 +180,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add( typeField); + document.setRDFTYPE(clz.getURI()); if(clz.getLocalName() != null){ Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED); Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED); doc.add(classLocalName); doc.add(classLocalNameLowerCase); + document.setCLASSLOCALNAME(clz.getLocalName()); } if( clz.getName() != null ) @@ -183,22 +199,29 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); // classGroupField.setBoost(FIELD_BOOST); doc.add(classGroupField); + document.setCLASSGROUP_URI(clz.getGroupURI()); } } } doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0", Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS) ); + document.setPROHIBITED_FROM_TEXT_RESULTS(prohibited?"1":"0"); /* lucene DOCID */ doc.add( new Field(term.DOCID, entClassName + id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - + document.setDOCID(entClassName + id); + + //vitro Id doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + document.setURI(id); + //java class doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - + document.setJCLASS(entClassName); + // Individual label if( ent.getRdfsLabel() != null ) value=ent.getRdfsLabel(); @@ -208,10 +231,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{ log.debug("Using local name for individual with rdfs:label " + ent.getURI()); value = ent.getLocalName(); } - Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED); nameRaw.setBoost(NAME_BOOST); doc.add(nameRaw); + document.setNAME(value); // RY Not sure if we need to store this. For Solr, see schema.xml field definition. Field nameLowerCase = new Field(term.NAME_LOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -224,14 +247,30 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED); nameStemmed.setBoost(NAME_BOOST); - doc.add(nameStemmed); + doc.add(nameStemmed); + String contextNodePropertyValues; + +// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ + contextNodePropertyValues = contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI()); + +// } + + Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED ); + doc.add(contextNodeInformation); + document.setCONTEXTNODE(contextNodePropertyValues); //Moniker if(ent.getMoniker() != null){ Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(moniker); + document.setMONIKER(ent.getMoniker()); } //boost for entity @@ -274,31 +313,45 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Object anon[] = { new Long((new DateTime() ).getMillis()) }; doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + document.setINDEXEDTIME(String.format("%019d", anon)); + if( ! prohibited ){ //ALLTEXT, all of the 'full text' + StringBuffer alltext = new StringBuffer(); + String t=null; - value =""; - value+= " "+( ((t=ent.getName()) == null)?"":t ); - value+= " "+( ((t=ent.getAnchor()) == null)?"":t); - value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t ); - value+= " "+ ( ((t=ent.getDescription()) == null)?"":t ); - value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t ); - value+= " "+ getKeyterms(ent); + value =""; + value+= " "+( ((t=ent.getName()) == null)?"":t ); + alltext.append("\t NAME: " + ( ((t=ent.getName()) == null)?"":t )); + value+= " "+( ((t=ent.getAnchor()) == null)?"":t); + alltext.append("\t ANCHOR: " + ( ((t=ent.getAnchor()) == null)?"":t)); + value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t ); + alltext.append("\t MONIKER: " + ( ((t=ent.getMoniker()) == null)?"":t )); + value+= " "+ ( ((t=ent.getDescription()) == null)?"":t ); + alltext.append("\t DESCRIPTION: " + ( ((t=ent.getDescription()) == null)?"":t )); + value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t ); + alltext.append("\t BLURB: " + ( ((t=ent.getBlurb()) == null)?"":t )); + value+= " "+ getKeyterms(ent); + alltext.append("\t KEYTERMS: " + getKeyterms(ent)); - value+= " " + classPublicNames; + value+= " " + classPublicNames; + alltext.append(" CLASSPUBLICNAMES: " + classPublicNames); List dataPropertyStatements = ent.getDataPropertyStatements(); if (dataPropertyStatements != null) { + alltext.append("\n DATA_PROPERTY_STATEMENTS \n -------------------------------- \n"); Iterator dataPropertyStmtIter = dataPropertyStatements.iterator(); while (dataPropertyStmtIter.hasNext()) { DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next(); value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t ); + alltext.append("\n " + ( ((t=dataPropertyStmt.getData()) == null)?"":t )); } } List objectPropertyStatements = ent.getObjectPropertyStatements(); if (objectPropertyStatements != null) { + alltext.append("\n OBJECT_PROPERTY_STATEMENTS \n -------------------------------- \n"); Iterator objectPropertyStmtIter = objectPropertyStatements.iterator(); while (objectPropertyStmtIter.hasNext()) { ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next(); @@ -306,17 +359,31 @@ public class Entity2LuceneDoc implements Obj2DocIface{ continue; try { value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ); + alltext.append("\n " +( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) + + " : " + ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t )); + + if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ + //IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) ); + objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )); + } + } catch (Exception e) { log.debug("could not index name of related object: " + e.getMessage()); } } } //stemmed terms - doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED)); + doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED)); //unstemmed terms doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED)); + document.setALLTEXT(alltext.toString()); } + document.writeToLog(); + + // log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n"); + log.info(" \n Object Properties " + objectProperties.toString() + "\n\n"); + return doc; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java index 149fa4914..6872596fa 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java @@ -10,6 +10,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE; +import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE; import java.io.File; import java.io.IOException; @@ -41,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup; @@ -111,10 +113,16 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { getAnalyzer()); context.setAttribute(ANALYZER, getAnalyzer()); + //bk392 adding another argument to Entity2LuceneDoc + // that takes care of sparql queries for context nodes. + OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); Entity2LuceneDoc translator = new Entity2LuceneDoc( new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearch(context) ); + new IndividualProhibitedFromSearch(context), + new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context) + + ); indexer.addObj2Doc(translator); context.setAttribute(LuceneIndexer.class.getName(), indexer); @@ -250,9 +258,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29)); + analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer()); + return analyzer; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java index 3e3949ed6..3323dabe5 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java @@ -1,7 +1,7 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.lucene; - +package edu.cornell.mannlib.vitro.webapp.search.lucene; + import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -27,91 +27,93 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; - -/** - * Setup objects for lucene searching and indexing. - * - * The indexing and search objects, IndexBuilder and Searcher are found by the - * controllers IndexController and SearchController through the servletContext. - * This object will have the method contextInitialized() called when the tomcat - * server starts this webapp. - * - * The contextInitialized() will try to find the lucene index directory, - * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will - * also get a list of Obj2Doc objects so it can translate object to lucene docs. - * - * To execute this at context creation put this in web.xml: - - - edu.cornell.mannlib.vitro.search.setup.LuceneSetup - - - - * @author bdc34 - * - */ -public class LuceneSetupCJK implements javax.servlet.ServletContextListener { + +/** + * Setup objects for lucene searching and indexing. + * + * The indexing and search objects, IndexBuilder and Searcher are found by the + * controllers IndexController and SearchController through the servletContext. + * This object will have the method contextInitialized() called when the tomcat + * server starts this webapp. + * + * The contextInitialized() will try to find the lucene index directory, + * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will + * also get a list of Obj2Doc objects so it can translate object to lucene docs. + * + * To execute this at context creation put this in web.xml: + + + edu.cornell.mannlib.vitro.search.setup.LuceneSetup + + + + * @author bdc34 + * + */ +public class LuceneSetupCJK implements javax.servlet.ServletContextListener { private static String indexDir = null; - private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName()); - private static final String PROPERTY_VITRO_HOME = "vitro.home.directory"; + private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName()); + private static final String PROPERTY_VITRO_HOME = "vitro.home.directory"; private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex"; - - /** - * Gets run to set up DataSource when the webapp servlet context gets created. - */ + + /** + * Gets run to set up DataSource when the webapp servlet context gets created. + */ @Override - @SuppressWarnings("unchecked") - public void contextInitialized(ServletContextEvent sce) { - ServletContext context = sce.getServletContext(); - log.info("**** Running "+this.getClass().getName()+".contextInitialized()"); - try{ - indexDir = getIndexDirName(sce); - log.info("Lucene indexDir: " + indexDir); - - setBoolMax(); - - HashSet dataPropertyBlacklist = new HashSet(); - context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); - - HashSet objectPropertyBlacklist = new HashSet(); - objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom"); - context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist); + @SuppressWarnings("unchecked") + public void contextInitialized(ServletContextEvent sce) { + ServletContext context = sce.getServletContext(); + log.info("**** Running "+this.getClass().getName()+".contextInitialized()"); + try{ + indexDir = getIndexDirName(sce); + log.info("Lucene indexDir: " + indexDir); + + setBoolMax(); + + HashSet dataPropertyBlacklist = new HashSet(); + context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); + + HashSet objectPropertyBlacklist = new HashSet(); + objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom"); + context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist); //This is where to get a LucenIndex from. The indexer will //need to reference this to notify it of updates to the index LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir); String liveIndexDir = lif.getLiveIndexDir(context); - - //here we want to put the LuceneIndex object into the application scope - LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer()); + + //here we want to put the LuceneIndex object into the application scope + LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer()); context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer()); OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); Entity2LuceneDoc translator = new Entity2LuceneDoc( new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearch(context) ); + new IndividualProhibitedFromSearch(context), + new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); indexer.addObj2Doc(translator); indexer.setLuceneIndexFactory(lif); - - //This is where the builder gets the list of places to try to - //get objects to index. It is filtered so that non-public text - //does not get into the search index. - WebappDaoFactory wadf = - (WebappDaoFactory) context.getAttribute("webappDaoFactory"); + + //This is where the builder gets the list of places to try to + //get objects to index. It is filtered so that non-public text + //does not get into the search index. + WebappDaoFactory wadf = + (WebappDaoFactory) context.getAttribute("webappDaoFactory"); VitroFilters vf = VitroFilterUtils.getPublicFilter(context); - wadf = new WebappDaoFactoryFiltering(wadf,vf); - - List sources = new ArrayList(); - sources.add(wadf.getIndividualDao()); - - IndexBuilder builder = new IndexBuilder(context,indexer,sources); - - // here we add the IndexBuilder with the LuceneIndexer - // to the servlet context so we can access it later in the webapp. + wadf = new WebappDaoFactoryFiltering(wadf,vf); + + List sources = new ArrayList(); + sources.add(wadf.getIndividualDao()); + + IndexBuilder builder = new IndexBuilder(context,indexer,sources); + + // here we add the IndexBuilder with the LuceneIndexer + // to the servlet context so we can access it later in the webapp. context.setAttribute(IndexBuilder.class.getName(),builder); //set up listeners so search index builder is notified of changes to model @@ -119,36 +121,36 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener { OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel"); SearchReindexingListener srl = new SearchReindexingListener( builder ); ModelContext.registerListenerForChanges(sce.getServletContext(), srl); - - }catch(Exception ex){ - log.error("Could not setup lucene full text search." , ex); - } - - log.debug("**** End of "+this.getClass().getName()+".contextInitialized()"); - } - - /** - * Gets run when the webApp Context gets destroyed. - */ + + }catch(Exception ex){ + log.error("Could not setup lucene full text search." , ex); + } + + log.debug("**** End of "+this.getClass().getName()+".contextInitialized()"); + } + + /** + * Gets run when the webApp Context gets destroyed. + */ @Override public void contextDestroyed(ServletContextEvent sce) { - + log.info("**** Running "+this.getClass().getName()+".contextDestroyed()"); IndexBuilder builder = (IndexBuilder)sce.getServletContext().getAttribute(IndexBuilder.class.getName()); - builder.stopIndexingThread(); - } - - /** - * In wild card searches the query is first broken into many boolean searches - * OR'ed together. So if there is a query that would match a lot of records - * we need a high max boolean limit for the lucene search. - * - * This sets some static method in the lucene library to achieve this. - */ - public static void setBoolMax() { - BooleanQuery.setMaxClauseCount(16384); - } - + builder.stopIndexingThread(); + } + + /** + * In wild card searches the query is first broken into many boolean searches + * OR'ed together. So if there is a query that would match a lot of records + * we need a high max boolean limit for the lucene search. + * + * This sets some static method in the lucene library to achieve this. + */ + public static void setBoolMax() { + BooleanQuery.setMaxClauseCount(16384); + } + /** * Gets the name of the directory to store the lucene index in. The * {@link ConfigurationProperties} should have a property named @@ -190,14 +192,14 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener { return luceneDir.getPath(); } - /** - * Gets the analyzer that will be used when building the indexing - * and when analyzing the incoming search terms. - * - * @return - */ - private Analyzer getAnalyzer() { - return new CJKAnalyzer(); - } - -} + /** + * Gets the analyzer that will be used when building the indexing + * and when analyzing the incoming search terms. + * + * @return + */ + private Analyzer getAnalyzer() { + return new CJKAnalyzer(); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java new file mode 100644 index 000000000..ad2ec0114 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java @@ -0,0 +1,209 @@ +package edu.cornell.mannlib.vitro.webapp.search.lucene.test; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +public class LuceneDocument { + + private static final Log log = LogFactory.getLog(LuceneDocument.class.getName()); + + String URI; + String DOCID; + String JCLASS; + String RDFTYPE; + String CLASSGROUP_URI; + String MODTIME; + String NAME; + String PORTAL; + String INDEXEDTIME; + String TIMEKEY; + String SUNSET; + String MONIKER; + String ALLTEXT; + String KEYWORDS; + String THUMBNAIL; + String PROHIBITED_FROM_TEXT_RESULTS; + String CLASSLOCALNAME; + String CONTEXTNODE; + + static final String FILE = "~/Desktop/LuceneIndividuals.txt"; + + + public String getURI() { + return URI; + } + + public void setURI(String uRI) { + URI = uRI; + } + + public String getDOCID() { + return DOCID; + } + + public void setDOCID(String dOCID) { + DOCID = dOCID; + } + + public String getJCLASS() { + return JCLASS; + } + + public void setJCLASS(String jCLASS) { + JCLASS = jCLASS; + } + + public String getRDFTYPE() { + return RDFTYPE; + } + + public void setRDFTYPE(String rDFTYPE) { + RDFTYPE = rDFTYPE; + } + + public String getCLASSGROUP_URI() { + return CLASSGROUP_URI; + } + + public void setCLASSGROUP_URI(String cLASSGROUP_URI) { + CLASSGROUP_URI = cLASSGROUP_URI; + } + + public String getMODTIME() { + return MODTIME; + } + + public void setMODTIME(String mODTIME) { + MODTIME = mODTIME; + } + + public String getNAME() { + return NAME; + } + + public void setNAME(String nAME) { + NAME = nAME; + } + + public String getPORTAL() { + return PORTAL; + } + + public void setPORTAL(String pORTAL) { + PORTAL = pORTAL; + } + + public String getINDEXEDTIME() { + return INDEXEDTIME; + } + + public void setINDEXEDTIME(String iNDEXEDTIME) { + INDEXEDTIME = iNDEXEDTIME; + } + + public String getTIMEKEY() { + return TIMEKEY; + } + + public void setTIMEKEY(String tIMEKEY) { + TIMEKEY = tIMEKEY; + } + + public String getSUNSET() { + return SUNSET; + } + + public void setSUNSET(String sUNSET) { + SUNSET = sUNSET; + } + + public String getMONIKER() { + return MONIKER; + } + + public void setMONIKER(String mONIKER) { + MONIKER = mONIKER; + } + + public String getALLTEXT() { + return ALLTEXT; + } + + public void setALLTEXT(String aLLTEXT) { + ALLTEXT = aLLTEXT; + } + + public String getKEYWORDS() { + return KEYWORDS; + } + + public void setKEYWORDS(String kEYWORDS) { + KEYWORDS = kEYWORDS; + } + + public String getTHUMBNAIL() { + return THUMBNAIL; + } + + public void setTHUMBNAIL(String tHUMBNAIL) { + THUMBNAIL = tHUMBNAIL; + } + + public String getPROHIBITED_FROM_TEXT_RESULTS() { + return PROHIBITED_FROM_TEXT_RESULTS; + } + + public void setPROHIBITED_FROM_TEXT_RESULTS(String pROHIBITED_FROM_TEXT_RESULTS) { + PROHIBITED_FROM_TEXT_RESULTS = pROHIBITED_FROM_TEXT_RESULTS; + } + + public String getCLASSLOCALNAME() { + return CLASSLOCALNAME; + } + + public void setCLASSLOCALNAME(String cLASSLOCALNAME) { + CLASSLOCALNAME = cLASSLOCALNAME; + } + + @Override + public String toString(){ + + StringBuffer result = new StringBuffer(); + + result.append("\n==================================\n"); + + result.append("URI : " + URI); + result.append("\nDOCID : " + DOCID); + result.append("\nJCLASS : " + JCLASS); + result.append("\nRDFTYPE : " + RDFTYPE); + result.append("\nCLASSGROUP_URI : " + CLASSGROUP_URI); + result.append("\nMODTIME : " + MODTIME); + result.append("\nNAME : " + NAME); + result.append("\nPORTAL : " + PORTAL); + result.append("\nINDEXEDTIME : " + INDEXEDTIME); + result.append("\nCONTEXTNODE : " + CONTEXTNODE); + result.append("\nTIMEKEY : " + TIMEKEY); + result.append("\nSUNSET : " + SUNSET); + result.append("\nMONIKER : " + MONIKER); + result.append("\nALLTEXT : " + ALLTEXT); + result.append("\nKEYWORDS : " + KEYWORDS); + result.append("\nTHUMBNAIL : " + THUMBNAIL); + result.append("\nPROHIBITED_FROM_TEXT_RESULTS : " + PROHIBITED_FROM_TEXT_RESULTS); + result.append("\nCLASSLOCALNAME : " + CLASSLOCALNAME); + + return result.toString(); + + } + + public void writeToLog(){ + log.info(this.toString()); + } + + public void setCONTEXTNODE(String contextNodePropertyValues) { + this.CONTEXTNODE = contextNodePropertyValues; + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index 1bd7b2df8..797f05d88 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -2,23 +2,261 @@ package edu.cornell.mannlib.vitro.webapp.search.solr; -import org.apache.solr.common.SolrDocument; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.lucene.document.Document; +import org.apache.solr.client.solrj.beans.Field; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.joda.time.DateTime; + +import com.hp.hpl.jena.vocabulary.OWL; + +import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement; import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl; +import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement; +import edu.cornell.mannlib.vitro.webapp.beans.VClass; +import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; +import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; +import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; +import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; public class IndividualToSolrDocument implements Obj2DocIface { protected LuceneDocToSolrDoc luceneToSolr; - protected Entity2LuceneDoc entityToLucene; - public IndividualToSolrDocument(Entity2LuceneDoc e2d){ - entityToLucene = e2d; - luceneToSolr = new LuceneDocToSolrDoc(); + public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName()); + + public static VitroTermNames term = new VitroTermNames(); + + private static String entClassName = Individual.class.getName(); + + private ProhibitedFromSearch classesProhibitedFromSearch; + + private IndividualProhibitedFromSearch individualProhibitedFromSearch; + + private ContextNodesInclusionFactory contextNodesInclusionFactory; + + private static HashSet objectProperties = new HashSet(); + + + public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, + IndividualProhibitedFromSearch individualProhibitedFromSearch, + ContextNodesInclusionFactory contextNodesInclusionFactory){ + this.classesProhibitedFromSearch = classesProhibitedFromSearch; + this.individualProhibitedFromSearch = individualProhibitedFromSearch; + this.contextNodesInclusionFactory = contextNodesInclusionFactory; } + @Override + public Object translate(Object obj) throws IndexingException{ + long tProhibited = System.currentTimeMillis(); + + if(!(obj instanceof Individual)) + return null; + + Individual ent = (Individual)obj; + String value; + String classPublicNames = ""; + SolrInputDocument doc = new SolrInputDocument(); + + //DocId + String id = ent.getURI(); + log.debug("translating " + id); + + if(id == null){ + log.debug("cannot add individuals without URIs to lucene Index"); + return null; + }else if( id.startsWith(VitroVocabulary.vitroURI) || + id.startsWith(VitroVocabulary.VITRO_PUBLIC) || + id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) || + id.startsWith(OWL.NS)){ + log.debug("not indexing because of namespace:" + id); + return null; + } + + //filter out class groups, owl:ObjectProperties etc.. + if(individualProhibitedFromSearch.isIndividualProhibited(id)){ + return null; + } + + log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited)); + + // Types and classgroups + boolean prohibited = false; + List vclasses = ent.getVClasses(false); + long tClassgroup = System.currentTimeMillis(); + for(VClass clz : vclasses){ + if(clz.getURI() == null){ + continue; + }else if(OWL.Thing.getURI().equals(clz.getURI())){ + //index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index + continue; + } else if(clz.getURI().startsWith(OWL.NS)){ + log.debug("not indexing " + id + " because of type " + clz.getURI()); + return null; + } else { + if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI())) + prohibited = true; + if( clz.getSearchBoost() != null) + doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost()); + + doc.addField(term.RDFTYPE, clz.getURI()); + + if(clz.getLocalName() != null){ + doc.addField(term.CLASSLOCALNAME, clz.getLocalName()); + doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase()); + } + + if(clz.getName() != null) + classPublicNames += clz.getName(); + + //Classgroup URI + if(clz.getGroupURI() != null){ + doc.addField(term.CLASSGROUP_URI,clz.getGroupURI()); + } + + } + } + + log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup)); + + + doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0"); + + //lucene DocID + doc.addField(term.DOCID, entClassName + id); + + //vitro id + doc.addField(term.URI, id); + + //java class + doc.addField(term.JCLASS, entClassName); + + //Individual Label + if(ent.getRdfsLabel() != null) + value = ent.getRdfsLabel(); + else{ + log.debug("Using local name for individual with rdfs:label " + ent.getURI()); + value = ent.getLocalName(); + } + + doc.addField(term.NAME_RAW, value, NAME_BOOST); + doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST); + doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST); + doc.addField(term.NAME_STEMMED, value, NAME_BOOST); + + long tContextNodes = System.currentTimeMillis(); + + String contextNodePropertyValues = ""; + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI()); + + + doc.addField(term.CONTEXTNODE, contextNodePropertyValues); + + log.debug("time to fire contextnode queries and include them in the index: " + Long.toString(System.currentTimeMillis() - tContextNodes)); + + + long tMoniker = System.currentTimeMillis(); + + //Moniker + if(ent.getMoniker() != null){ + doc.addField(term.MONIKER, ent.getMoniker()); + } + + //boost for entity + if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0) + doc.setDocumentBoost(ent.getSearchBoost()); + + //thumbnail + try{ + value = null; + if(ent.hasThumb()) + doc.addField(term.THUMBNAIL, "1"); + else + doc.addField(term.THUMBNAIL, "0"); + }catch(Exception ex){ + log.debug("could not index thumbnail: " + ex); + } + + + //time of index in millis past epoc + Object anon[] = { new Long((new DateTime() ).getMillis()) }; + doc.addField(term.INDEXEDTIME, String.format("%019d", anon)); + + log.debug("time to include moniker , thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker)); + + long tPropertyStatements = System.currentTimeMillis(); + if(!prohibited){ + //ALLTEXT, all of the 'full text' + String t=null; + value =""; + value+= " "+( ((t=ent.getName()) == null)?"":t ); + value+= " "+( ((t=ent.getAnchor()) == null)?"":t); + value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t ); + value+= " "+ ( ((t=ent.getDescription()) == null)?"":t ); + value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t ); + + value+= " " + classPublicNames; + + List dataPropertyStatements = ent.getDataPropertyStatements(); + if (dataPropertyStatements != null) { + Iterator dataPropertyStmtIter = dataPropertyStatements.iterator(); + while (dataPropertyStmtIter.hasNext()) { + DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next(); + value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t ); + } + } + + List objectPropertyStatements = ent.getObjectPropertyStatements(); + if (objectPropertyStatements != null) { + Iterator objectPropertyStmtIter = objectPropertyStatements.iterator(); + while (objectPropertyStmtIter.hasNext()) { + ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next(); + if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) ) + continue; + try { + value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ); + if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ + //IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) ); + objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )); + } + + } catch (Exception e) { + log.debug("could not index name of related object: " + e.getMessage()); + } + } + } + + log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements)); + + doc.addField(term.ALLTEXT, value); + doc.addField(term.ALLTEXTUNSTEMMED, value); + } + + return doc; + } + +// public IndividualToSolrDocument(Entity2LuceneDoc e2d){ +//// entityToLucene = e2d; +// luceneToSolr = new LuceneDocToSolrDoc(); +// } + @Override public boolean canTranslate(Object obj) { return obj != null && obj instanceof Individual; @@ -34,14 +272,23 @@ public class IndividualToSolrDocument implements Obj2DocIface { throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented"); } - @Override - public Object translate(Object obj) throws IndexingException { - return luceneToSolr.translate( entityToLucene.translate( obj ) ); - } +// @Override +// public Object translate(Object obj) throws IndexingException { +// return luceneToSolr.translate( entityToLucene.translate( obj ) ); +// } @Override public Object unTranslate(Object result) { - return luceneToSolr.unTranslate( result ); + Individual ent = null; + if( result != null && result instanceof Document){ + Document hit = (Document) result; + String id = hit.get(term.URI); + ent = new IndividualImpl(); + ent.setURI(id); + } + return ent; } + public static float NAME_BOOST = 3.0F; + } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java index ae89cd381..0f142c6bb 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java @@ -23,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; @@ -67,10 +68,15 @@ public class SolrSetup implements javax.servlet.ServletContextListener{ /* setup the individual to solr doc translation */ //first we need a ent2luceneDoc translator OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); - Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc( - new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearch(context) ); - IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc ); +// Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc( +// new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), +// new IndividualProhibitedFromSearch(context), +// new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); +// IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc ); + IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( + new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), + new IndividualProhibitedFromSearch(context), + new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); List o2d = new ArrayList(); o2d.add(indToSolrDoc);