From 625ae19ece9057298354841b32739161b72ffc94 Mon Sep 17 00:00:00 2001 From: j2blake Date: Wed, 11 May 2011 16:16:23 +0000 Subject: [PATCH 01/38] A branch for Deepak and Anup's work on search relevance. From a3471b7102f62955c0543e970654f001f162f3d8 Mon Sep 17 00:00:00 2001 From: deepakkoni Date: Wed, 18 May 2011 20:35:42 +0000 Subject: [PATCH 02/38] Committing the following changes to dev-search-relevance branch 1) ContextNodesInclusionFactory NIHVIVO-2204, NIHVIVO-2333(partly) 2) IndividualToSolrDoc to replace Entity2LuceneDoc while constructing SolrInputDocuments NIHVIVO-2653 3) LuceneDocument (test utility that prints contents from Lucene/Solr documents) --- solr/exampleSolr/conf/schema.xml | 1212 +++---- solr/exampleSolr/conf/solrconfig.xml | 3016 ++++++++--------- solr/exampleSolr/conf/stopwords.txt | 119 + .../vitro/webapp/dao/DisplayVocabulary.java | 9 +- .../vitro/webapp/search/VitroTermNames.java | 57 + .../beans/ContextNodesInclusionFactory.java | 637 ++++ .../search/lucene/Entity2LuceneDoc.java | 97 +- .../webapp/search/lucene/LuceneSetup.java | 12 +- .../webapp/search/lucene/LuceneSetupCJK.java | 210 +- .../search/lucene/test/LuceneDocument.java | 209 ++ .../search/solr/IndividualToSolrDocument.java | 267 +- .../vitro/webapp/search/solr/SolrSetup.java | 14 +- 12 files changed, 3613 insertions(+), 2246 deletions(-) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index 7a220a86b..0a4cc7445 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xmlocIdocId + + + ALLTEXT + + + + + + + + + + + + + + + + + + + + + diff --git a/solr/exampleSolr/conf/solrconfig.xml b/solr/exampleSolr/conf/solrconfig.xml index a3bf11307..300d04530 100644 --- a/solr/exampleSolr/conf/solrconfig.xml +++ b/solr/exampleSolr/conf/solrconfig.xml @@ -1,1508 +1,1508 @@ - - - - - - - - - ${solr.abortOnConfigurationError:true} - - - LUCENE_31 - - - - - - - - - - - - - - - - - - - - - - - - - - - false - - 10 - - 32 - - - - 10000 - 1000 - 10000 - - - - - - - - - native - - - - - - - - - false - 32 - 10 - - - false - - - true - - - - - 1 - - 0 - - - - - - false - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 1024 - - - - - - - - - - - - - - - - - - - - - - true - - - - - - 20 - - - 200 - - - - - - - - - - - - static firstSearcher warming in solrconfig.xml - - - - - - false - - - 2 - - - - - - - - - - - - - - - - - - - - - - - explicit - 10 - - - - - - - - - - - - - - explicit - - - velocity - - browse - layout - Solritas - - edismax - *:* - 10 - *,score - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - text,features,name,sku,id,manu,cat - 3 - - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - - on - cat - manu_exact - ipod - GB - 1 - cat,inStock - price - 0 - 600 - 50 - after - manufacturedate_dt - NOW/YEAR-10YEARS - NOW - +1YEAR - before - after - - - - on - text features name - 0 - name - - - spellcheck - - - - - - - - - - - - - - - - - - - - - - - text - true - ignored_ - - - true - links - ignored_ - - - - - - - - - - - - - - - - - - - - - search - solrpingquery - all - - - - - - - explicit - true - - - - - - - - - - - - textSpell - - - - - - default - name - spellchecker - - - - - - - - - - - - - - - - false - false - 1 - - - spellcheck - - - - - - - - - - true - - - tvComponent - - - - - - - - - default - - org.carrot2.clustering.lingo.LingoClusteringAlgorithm - - 20 - - - ENGLISH - - - stc - org.carrot2.clustering.stc.STCClusteringAlgorithm - - - - - - - true - default - true - - name - id - - features - - true - - - - false - - edismax - - text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - - *:* - 10 - *,score - - - clustering - - - - - - - - - - true - - - terms - - - - - - - - string - elevate.xml - - - - - - explicit - - - elevator - - - - - - - - - - - 100 - - - - - - - - 70 - - 0.5 - - [-\w ,/\n\"']{20,200} - - - - - - - ]]> - ]]> - - - - - - - - - - - - - - - - - - - - - ,, - ,, - ,, - ,, - ,]]> - ]]> - - - - - - - - - - - - - - - - - - 5 - - - - - - - - - - - - - *:* - - - - - - + + + + + + + + + ${solr.abortOnConfigurationError:true} + + + LUCENE_31 + + + + + + + + + + + + + + + + + + + + + + + + + + + false + + 10 + + 32 + + + + 10000 + 1000 + 10000 + + + + + + + + + native + + + + + + + + + false + 32 + 10 + + + false + + + true + + + + + 1 + + 0 + + + + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 1024 + + + + + + + + + + + + + + + + + + + + + + true + + + + + + 20 + + + 200 + + + + + + + + + + + + static firstSearcher warming in solrconfig.xml + + + + + + false + + + 2 + + + + + + + + + + + + + + + + + + + + + + + explicit + 10 + + + + + + + + + + + + + + explicit + + + velocity + + browse + layout + Solritas + + edismax + *:* + 10 + *,score + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + text,features,name,sku,id,manu,cat + 3 + + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + + on + cat + manu_exact + ipod + GB + 1 + cat,inStock + price + 0 + 600 + 50 + after + manufacturedate_dt + NOW/YEAR-10YEARS + NOW + +1YEAR + before + after + + + + on + text features name + 0 + name + + + spellcheck + + + + + + + + + + + + + + + + + + + + + + + text + true + ignored_ + + + true + links + ignored_ + + + + + + + + + + + + + + + + + + + + + search + solrpingquery + all + + + + + + + explicit + true + + + + + + + + + + + + textSpell + + + + + + default + name + spellchecker + + + + + + + + + + + + + + + + false + false + 1 + + + spellcheck + + + + + + + + + + true + + + tvComponent + + + + + + + + + default + + org.carrot2.clustering.lingo.LingoClusteringAlgorithm + + 20 + + + ENGLISH + + + stc + org.carrot2.clustering.stc.STCClusteringAlgorithm + + + + + + + true + default + true + + name + id + + features + + true + + + + false + + edismax + + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + + *:* + 10 + *,score + + + clustering + + + + + + + + + + true + + + terms + + + + + + + + string + elevate.xml + + + + + + explicit + + + elevator + + + + + + + + + + + 100 + + + + + + + + 70 + + 0.5 + + [-\w ,/\n\"']{20,200} + + + + + + + ]]> + ]]> + + + + + + + + + + + + + + + + + + + + + ,, + ,, + ,, + ,, + ,]]> + ]]> + + + + + + + + + + + + + + + + + + 5 + + + + + + + + + + + + + *:* + + + + + + diff --git a/solr/exampleSolr/conf/stopwords.txt b/solr/exampleSolr/conf/stopwords.txt index 22f277fe0..5f383a458 100644 --- a/solr/exampleSolr/conf/stopwords.txt +++ b/solr/exampleSolr/conf/stopwords.txt @@ -56,3 +56,122 @@ was will with +# these stopwords are taken +# from http://www.onjava.com/pub/a/onjava/2003/01/15/lucene.html?page=2 + +about +after +all +also +an +and +another +any +are +as +at +be +because +been +before +being +between +both +but +by +came +can +come +could +did +do +does +each +else +for +from +get +got +has +had +he +have +her +here +him +himself +his +how +if +in +into +is +it +its +just +like +make +many +me +might +more +most +much +must +my +never +now +of +on +only +or +other +our +out +over +re +said +same +see +should +since +so +some +still +such +take +than +that +the +their +them +then +there +these +they +this +those +through +to +too +under +up +use +very +want +was +way +we +well +were +what +when +where +which +while +who +will +with +would +you +your diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java index 86fb805c1..10ad80244 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/DisplayVocabulary.java @@ -24,6 +24,9 @@ public class DisplayVocabulary { /* Individuals */ public static final String PRIMARY_LUCENE_INDEX_URI = NS + "PrimaryLuceneIndex"; + //bk392 for extracting properties beyond context nodes. + public static final String CONTEXT_NODES_URI = NS + "QueryForContextNodes"; + /* Page types */ public static final String PAGE_TYPE = NS + "Page"; public static final String HOME_PAGE_TYPE = NS + "HomePage"; @@ -35,8 +38,10 @@ public class DisplayVocabulary { /* Data Properties */ public static final DatatypeProperty URL_MAPPING = m_model.createDatatypeProperty(NS + "urlMapping"); public static final String TITLE = NS + "title"; - public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate"); - + public static final DatatypeProperty REQUIRES_BODY_TEMPLATE = m_model.createDatatypeProperty(NS + "requiresBodyTemplate"); + //bk392 for extracting properties beyond context nodes. + public static final DatatypeProperty QUERY_FOR_EDUCATIONAL_TRAINING = m_model.createDatatypeProperty(NS + "queryForEducationalTraining"); + /* URIs for storing menu.n3 */ public static final String MENU_TEXT_RES = NS + "MenuText"; public static final String HAS_TEXT_REPRESENTATION = NS + "hasMenuText"; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java new file mode 100644 index 000000000..74d719823 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java @@ -0,0 +1,57 @@ +package edu.cornell.mannlib.vitro.webapp.search; + +public class VitroTermNames { + /** Id of entity, vclass or tab */ + public static String URI = "URI"; + /** lucene document id */ + public static String DOCID = "DocId"; + /** java class of the object that the Doc represents. */ + public static String JCLASS = "JCLASS"; + /** rdf:type */ + public static String RDFTYPE = "type"; + /** rdf:type */ + public static String CLASSGROUP_URI = "classgroup"; + /** Modtime from db */ + public static String MODTIME = "modTime"; + + /** time of index in msec since epoc */ + public static String INDEXEDTIME= "indexedTime"; + /** timekey of entity in yyyymmddhhmm */ + public static String TIMEKEY="TIMEKEY"; + /** time of sunset/end of entity in yyyymmddhhmm */ + public static String SUNSET="SUNSET"; + /** time of sunrise/start of entity in yyyymmddhhmm */ + public static String SUNRISE="SUNRISE"; + /** entity's moniker */ + public static String MONIKER="moniker"; + /** text for 'full text' search, this is stemmed */ + public static String ALLTEXT = "ALLTEXT"; + /** text for 'full text' search, this is unstemmed for + * use with wildcards and prefix queries */ + public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED"; + /** class name for storing context nodes **/ + public static final String CONTEXTNODE = "contextNode"; + /** keywords */ + public static final String KEYWORDS = "KEYWORDS"; + /** Does the individual have a thumbnail image? 1=yes 0=no */ + public static final String THUMBNAIL = "THUMBNAIL"; + /** Should individual be included in full text search results? 1=yes 0=no */ + public static final String PROHIBITED_FROM_TEXT_RESULTS = "PROHIBITED_FROM_TEXT_RESULTS"; + /** class names in human readable form of an individual*/ + public static final String CLASSLOCALNAMELOWERCASE = "classLocalNameLowerCase"; + /** class names in human readable form of an individual*/ + public static final String CLASSLOCALNAME = "classLocalName"; + + // Fields derived from rdfs:label + /** Raw rdfs:label: no lowercasing, no tokenizing, no stop words, no stemming **/ + public static String NAME_RAW = "nameRaw"; // was NAMERAW + + /** rdfs:label lowercased, no tokenizing, no stop words, no stemming **/ + public static String NAME_LOWERCASE = "nameLowercase"; // was NAMELOWERCASE + + /** rdfs:label lowercased, tokenized, stop words, no stemming **/ + public static String NAME_UNSTEMMED = "nameUnstemmed"; // was NAMEUNSTEMMED + + /** rdfs:label lowercased, tokenized, stop words, stemmed **/ + public static String NAME_STEMMED = "nameStemmed"; // was NAME +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java new file mode 100644 index 000000000..80805c030 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java @@ -0,0 +1,637 @@ +package edu.cornell.mannlib.vitro.webapp.search.beans; + +import javax.servlet.ServletContext; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import com.hp.hpl.jena.ontology.OntModel; +import com.hp.hpl.jena.query.Query; +import com.hp.hpl.jena.query.QueryExecution; +import com.hp.hpl.jena.query.QueryExecutionFactory; +import com.hp.hpl.jena.query.QueryFactory; +import com.hp.hpl.jena.query.QuerySolution; +import com.hp.hpl.jena.query.QuerySolutionMap; +import com.hp.hpl.jena.query.ResultSet; +import com.hp.hpl.jena.query.Syntax; +import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.shared.Lock; + +import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; +import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; + +public class ContextNodesInclusionFactory { + + private OntModel fullModel; + private String contextNodeURI; + private String query = ""; + + private static final String queryForEducationalTraining = "SELECT ?query WHERE {" + + "?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }"; + + private static Log log = LogFactory.getLog(ContextNodesInclusionFactory.class); + + public ContextNodesInclusionFactory(String contextNodeURI, + OntModel displayOntModel, ServletContext context) { + this.fullModel = ModelContext.getJenaOntModel(context); + this.contextNodeURI = contextNodeURI; + query = getQueryFromModel(contextNodeURI, displayOntModel); + } + + private String getQueryFromModel(String uri, OntModel displayOntModel) { + + String resultQuery = ""; + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource searchConfig = ResourceFactory.createResource(uri); + + initialBinding.add("searchConfig", searchConfig); + + Query query = QueryFactory.create(queryForEducationalTraining); + displayOntModel.enterCriticalSection(Lock.READ); + try{ + QueryExecution qExec = QueryExecutionFactory.create(query, displayOntModel, initialBinding); + try{ + ResultSet results = qExec.execSelect(); + while(results.hasNext()){ + QuerySolution soln = results.nextSolution(); + Literal node = soln.getLiteral("query"); + if(node.isLiteral()){ + resultQuery = node.toString(); + }else{ + log.warn("unexpected literal in the object position for context node queries " + node.toString()); + } + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + displayOntModel.leaveCriticalSection(); + } + + return resultQuery.substring(0, resultQuery.length() - 3); + } + + +// public List getFieldValues(String uri, Model modelToQuery, List queries){ + + //what do the queries need to be like? + // SELECT ?field ?value WHERE .... + + // what to do with multiple values for a field? + +// } + + + + //in different object: + /* + * get queries from somewhere + * get model to run queries on + * get list of individuals + * for each individual: + * fields = getFieldValues(uri, model, queiries) + * index(fields)? + * + * + */ + + public String getPropertiesAssociatedWithPosition(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT " + + "(str(?HRJobTitle) as ?hrJobTitle) (str(?InvolvedOrganizationName) as ?involvedOrganizationName) " + + " (str(?PositionForPerson) as ?positionForPerson) (str(?PositionInOrganization) as ?positionInOrganization) " + + " (str(?TitleOrRole) as ?titleOrRole) (str(?PositionLabel) as ?positionLabel) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Position . " + + + " OPTIONAL { ?c core:hrJobTitle ?HRJobTitle . } . " + + " OPTIONAL { ?c core:involvedOrganizationName ?InvolvedOrganizationName . } ." + + " OPTIONAL { ?c core:positionForPerson ?f . ?f rdfs:label ?PositionForPerson . } . " + + " OPTIONAL { ?c core:positionInOrganization ?i . ?i rdfs:label ?PositionInOrganization . } . " + + " OPTIONAL { ?c core:titleOrRole ?TitleOrRole . } . " + + " OPTIONAL { ?c rdfs:label ?PositionLabel . } " + + + " } ORDER BY ?PositionLabel "; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode hrJobTitle = soln.get("hrJobTitle"); + if(hrJobTitle != null){ + propertyValues.append(" " + hrJobTitle.toString()); + }else{ + log.warn("hrJobTitle is null "); + } + + RDFNode involvedOrganizationName = soln.get("involvedOrganizationName"); + if(involvedOrganizationName != null){ + propertyValues.append(" " + involvedOrganizationName.toString()); + }else{ + log.warn("involvedOrganizationName is null "); + } + + RDFNode positionForPerson = soln.get("positionForPerson"); + if(positionForPerson != null){ + propertyValues.append(" " + positionForPerson.toString()); + }else{ + log.warn("positionForPerson is null "); + } + + RDFNode positionInOrganization = soln.get("positionInOrganization"); + if(positionInOrganization != null){ + propertyValues.append(" " + positionInOrganization.toString()); + }else{ + log.warn("positionInOrganization is null "); + } + + RDFNode titleOrRole = soln.get("titleOrRole"); + if(titleOrRole != null){ + propertyValues.append(" " + titleOrRole.toString()); + }else{ + log.warn("titleOrRole is null "); + } + + RDFNode positionLabel = soln.get("positionLabel"); + if(positionLabel != null){ + propertyValues.append(" " + positionLabel.toString()); + }else{ + log.warn("positionLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + public String getPropertiesAssociatedWithRelationship(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?Advisee) as ?advisee) (str(?DegreeCandidacy) as ?degreeCandidacy) " + + " (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Relationship . " + + + " OPTIONAL { ?c core:advisee ?d . ?d rdfs:label ?Advisee . } . " + + " OPTIONAL { ?c core:degreeCandidacy ?e . ?e rdfs:label ?DegreeCandidacy . } ." + + " OPTIONAL { ?c core:linkedAuthor ?f . ?f rdfs:label ?LinkedAuthor . } . " + + " OPTIONAL { ?c core:linkedInformationResource ?h . ?h rdfs:label ?LinkedInformationResource . } . " + + + " } "; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode advisee = soln.get("advisee"); + if(advisee != null){ + propertyValues.append(" " + advisee.toString()); + }else{ + log.warn("advisee is null "); + } + + RDFNode degreeCandidacy = soln.get("degreeCandidacy"); + if(degreeCandidacy != null){ + propertyValues.append(" " + degreeCandidacy.toString()); + }else{ + log.warn("degreeCandidacy is null "); + } + + RDFNode linkedAuthor = soln.get("linkedAuthor"); + if(linkedAuthor != null){ + propertyValues.append(" " + linkedAuthor.toString()); + }else{ + log.warn("linkedAuthor is null "); + } + + RDFNode linkedInformationResource = soln.get("linkedInformationResource"); + if(linkedInformationResource != null){ + propertyValues.append(" " + linkedInformationResource.toString()); + }else{ + log.warn("linkedInformationResource is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + + public String getPropertiesAssociatedWithAwardReceipt(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?AwardConferredBy) as ?awardConferredBy) (str(?AwardOrHonorFor) as ?awardOrHonorFor) " + + " (str(?Description) as ?description) (str(?AwardReceiptLabel) as ?awardReceiptLabel) WHERE {" + + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:AwardReceipt . " + + + " OPTIONAL { ?c core:awardConferredBy ?d . ?d rdfs:label ?AwardConferredBy } . " + + " OPTIONAL { ?c core:awardOrHonorFor ?e . ?e rdfs:label ?AwardOrHonorFor } ." + + " OPTIONAL { ?c core:description ?Description . } . " + + " OPTIONAL { ?c rdfs:label ?AwardReceiptLabel . } . " + + + " } ORDER BY ?AwardReceiptLabel"; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode awardConferredBy = soln.get("awardConferredBy"); + if(awardConferredBy != null){ + propertyValues.append(" " + awardConferredBy.toString()); + }else{ + log.warn("awardConferredBy is null "); + } + + RDFNode awardOrHonorFor = soln.get("awardOrHonorFor"); + if(awardOrHonorFor != null){ + propertyValues.append(" " + awardOrHonorFor.toString()); + }else{ + log.warn("awardOrHonorFor is null "); + } + + RDFNode description = soln.get("description"); + if(description != null){ + propertyValues.append(" " + description.toString()); + }else{ + log.warn("description is null "); + } + + RDFNode awardReceiptLabel = soln.get("awardReceiptLabel"); + if(awardReceiptLabel != null){ + propertyValues.append(" " + awardReceiptLabel.toString()); + }else{ + log.warn("awardReceiptLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + public String getPropertiesAssociatedWithRole(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix core: " + + " prefix foaf: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT DISTINCT (str(?OrganizationLabel) as ?organizationLabel) WHERE {" + + "?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:Role ; core:roleIn ?Organization ." + + " ?Organization rdfs:label ?OrganizationLabel . " + + " } ORDER BY ?OrganizationLabel "; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode organizationLabel = soln.get("organizationLabel"); + if(organizationLabel != null){ + propertyValues.append(" " + organizationLabel.toString()); + }else{ + log.warn("organizationLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + + return propertyValues.toString(); + } + + + + public String getPropertiesAssociatedWithEducationalTraining(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix foaf: " + + " prefix core: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?AcademicDegreeLabel) as ?academicDegreeLabel) (str(?AcademicDegreeAbbreviation) as ?academicDegreeAbbreviation) " + + "(str(?MajorField) as ?majorField) (str(?DepartmentOrSchool) as ?departmentOrSchool) " + + "(str(?TrainingAtOrganizationLabel) as ?trainingAtOrganizationLabel) WHERE {" + + + " ?uri rdf:type foaf:Agent ; ?b ?c . " + + " ?c rdf:type core:EducationalTraining . " + + + "OPTIONAL { ?c core:degreeEarned ?d . ?d rdfs:label ?AcademicDegreeLabel ; core:abbreviation ?AcademicDegreeAbbreviation . } . " + + "OPTIONAL { ?c core:majorField ?MajorField .} ." + + " OPTIONAL { ?c core:departmentOrSchool ?DepartmentOrSchool . }" + + " OPTIONAL { ?c core:trainingAtOrganization ?e . ?e rdfs:label ?TrainingAtOrganizationLabel . } . " + + +"}"; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode academicDegreeLabel = soln.get("academicDegreeLabel"); + if(academicDegreeLabel != null){ + propertyValues.append(" " + academicDegreeLabel.toString()); + }else{ + log.warn("academicDegreeLabel is null "); + } + + RDFNode academicDegreeAbbreviation = soln.get("academicDegreeAbbreviation"); + if(academicDegreeAbbreviation != null){ + propertyValues.append(" " + academicDegreeAbbreviation.toString()); + }else{ + log.warn("academicDegreeAbbreviation is null "); + } + + RDFNode majorField = soln.get("majorField"); + if(majorField != null){ + propertyValues.append(" " + majorField.toString()); + }else{ + log.warn("majorField is null "); + } + + RDFNode trainingAtDepartmentOrSchool = soln.get("departmentOrSchool"); + if(trainingAtDepartmentOrSchool != null){ + propertyValues.append(" " + trainingAtDepartmentOrSchool.toString()); + }else{ + log.warn("trainingAtDepartmentOrSchool is null "); + } + + RDFNode trainingAtOrganizationLabel = soln.get("trainingAtOrganizationLabel"); + if(trainingAtOrganizationLabel != null){ + propertyValues.append(" " + trainingAtOrganizationLabel.toString()); + }else{ + log.warn("trainingAtOrganizationLabel is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + return propertyValues.toString(); + + } + + public String getPropertiesAssociatedWithInformationResource(String uri){ + + StringBuffer propertyValues = new StringBuffer(); + + QuerySolutionMap initialBinding = new QuerySolutionMap(); + Resource uriResource = ResourceFactory.createResource(uri); + + initialBinding.add("uri", uriResource); + + String prefix = "prefix owl: " + + " prefix vitroDisplay: " + + " prefix rdf: " + + " prefix foaf: " + + " prefix core: " + + " prefix rdfs: " + + " prefix localNav: " + + " prefix bibo: "; + + String thisQuery = prefix + + "SELECT (str(?LinkedAuthor) as ?linkedAuthor) (str(?LinkedInformationResource) as ?linkedInformationResource) " + + "(str(?Editor) as ?editor) (str(?SubjectArea) as ?subjectArea) (str(?ResearchAreaOf) as ?researchAreaOf) " + + "(str(?Features) as ?features) WHERE {" + + + " ?uri rdf:type core:InformationResource . " + + + "OPTIONAL { ?uri core:informationResourceInAuthorship ?a . ?a core:linkedAuthor ?b ; core:linkedInformationResource ?d ." + + "?b rdfs:label ?LinkedAuthor . ?d rdfs:label ?LinkedInformationResource } . " + + "OPTIONAL { ?uri bibo:editor ?e . ?e rdfs:label ?Editor . } ." + + " OPTIONAL { ?uri core:hasSubjectArea ?f . ?f rdfs:label ?SubjectArea ; core:researchAreaOf ?h . ?h rdfs:label ?ResearchAreaOf . } " + + " OPTIONAL { ?uri core:features ?i . ?i rdfs:label ?Features . } . " + + +"}"; + + Query sparqlQuery = QueryFactory.create(thisQuery, Syntax.syntaxARQ); + fullModel.enterCriticalSection(Lock.READ); + + try{ + QueryExecution qExec = QueryExecutionFactory.create(sparqlQuery, fullModel, initialBinding); + try{ + + ResultSet results = qExec.execSelect(); + + while(results.hasNext()){ + + QuerySolution soln = results.nextSolution(); + + RDFNode linkedAuthor = soln.get("linkedAuthor"); + if(linkedAuthor != null){ + propertyValues.append(" " + linkedAuthor.toString()); + }else{ + log.warn("linkedAuthor is null "); + } + + RDFNode linkedInformationResource = soln.get("linkedInformationResource"); + if(linkedInformationResource != null){ + propertyValues.append(" " + linkedInformationResource.toString()); + }else{ + log.warn("linkedInformationResource is null "); + } + + RDFNode editor = soln.get("editor"); + if(editor != null){ + propertyValues.append(" " + editor.toString()); + }else{ + log.warn("editor is null "); + } + + RDFNode subjectArea = soln.get("subjectArea"); + if(subjectArea != null){ + propertyValues.append(" " + subjectArea.toString()); + }else{ + log.warn("subjectArea is null "); + } + + RDFNode researchAreaOf = soln.get("researchAreaOf"); + if(researchAreaOf != null){ + propertyValues.append(" " + researchAreaOf.toString()); + }else{ + log.warn("researchAreaOf is null "); + } + + RDFNode features = soln.get("features"); + if(features != null){ + propertyValues.append(" " + features.toString()); + }else{ + log.warn("features is null "); + } + + } + }catch(Throwable t){ + log.error(t,t); + } finally{ + qExec.close(); + } + }finally{ + fullModel.leaveCriticalSection(); + } + return propertyValues.toString(); + + } + + + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index 30831f197..a0b48278f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -2,6 +2,8 @@ package edu.cornell.mannlib.vitro.webapp.search.lucene; +import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; @@ -21,9 +23,11 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement; import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; +import edu.cornell.mannlib.vitro.webapp.search.lucene.test.LuceneDocument; /** * This class expect that Entities passed to it will have @@ -31,6 +35,7 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; * be as full as possible. */ public class Entity2LuceneDoc implements Obj2DocIface{ + /** These are the terms for the lucene index */ public static class VitroLuceneTermNames{ /** Id of entity, vclass or tab */ @@ -61,6 +66,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{ /** text for 'full text' search, this is unstemmed for * use with wildcards and prefix queries */ public static String ALLTEXTUNSTEMMED = "ALLTEXTUNSTEMMED"; + /** class name for storing context nodes **/ + public static final String CONTEXTNODE = "contextNode"; /** keywords */ public static final String KEYWORDS = "KEYWORDS"; /** Does the individual have a thumbnail image? 1=yes 0=no */ @@ -103,12 +110,19 @@ public class Entity2LuceneDoc implements Obj2DocIface{ private ProhibitedFromSearch classesProhibitedFromSearch; private IndividualProhibitedFromSearch individualProhibited; + + private ContextNodesInclusionFactory contextNodesInclusionFactory; + + private static HashMap IndividualURIToObjectProperties = new HashMap(); + + private static HashSet objectProperties = new HashSet(); public Entity2LuceneDoc( ProhibitedFromSearch classesProhibitedFromSearch, - IndividualProhibitedFromSearch individualProhibited){ + IndividualProhibitedFromSearch individualProhibited, ContextNodesInclusionFactory contextNodesInclusionFactory){ this.classesProhibitedFromSearch = classesProhibitedFromSearch; this.individualProhibited = individualProhibited; + this.contextNodesInclusionFactory = contextNodesInclusionFactory; } public boolean canTranslate(Object obj) { @@ -123,7 +137,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{ String value; Document doc = new Document(); String classPublicNames = ""; - + LuceneDocument document = new LuceneDocument(); //DocId String id = ent.getURI(); @@ -166,12 +180,14 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add( typeField); + document.setRDFTYPE(clz.getURI()); if(clz.getLocalName() != null){ Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED); Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED); doc.add(classLocalName); doc.add(classLocalNameLowerCase); + document.setCLASSLOCALNAME(clz.getLocalName()); } if( clz.getName() != null ) @@ -183,22 +199,29 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); // classGroupField.setBoost(FIELD_BOOST); doc.add(classGroupField); + document.setCLASSGROUP_URI(clz.getGroupURI()); } } } doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0", Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS) ); + document.setPROHIBITED_FROM_TEXT_RESULTS(prohibited?"1":"0"); /* lucene DOCID */ doc.add( new Field(term.DOCID, entClassName + id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - + document.setDOCID(entClassName + id); + + //vitro Id doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + document.setURI(id); + //java class doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - + document.setJCLASS(entClassName); + // Individual label if( ent.getRdfsLabel() != null ) value=ent.getRdfsLabel(); @@ -208,10 +231,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{ log.debug("Using local name for individual with rdfs:label " + ent.getURI()); value = ent.getLocalName(); } - Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED); nameRaw.setBoost(NAME_BOOST); doc.add(nameRaw); + document.setNAME(value); // RY Not sure if we need to store this. For Solr, see schema.xml field definition. Field nameLowerCase = new Field(term.NAME_LOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -224,14 +247,30 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field nameStemmed = new Field(term.AC_NAME_STEMMED, value, Field.Store.NO, Field.Index.ANALYZED); nameStemmed.setBoost(NAME_BOOST); - doc.add(nameStemmed); + doc.add(nameStemmed); + String contextNodePropertyValues; + +// if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ + contextNodePropertyValues = contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI()); + +// } + + Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED ); + doc.add(contextNodeInformation); + document.setCONTEXTNODE(contextNodePropertyValues); //Moniker if(ent.getMoniker() != null){ Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(moniker); + document.setMONIKER(ent.getMoniker()); } //boost for entity @@ -274,31 +313,45 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Object anon[] = { new Long((new DateTime() ).getMillis()) }; doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + document.setINDEXEDTIME(String.format("%019d", anon)); + if( ! prohibited ){ //ALLTEXT, all of the 'full text' + StringBuffer alltext = new StringBuffer(); + String t=null; - value =""; - value+= " "+( ((t=ent.getName()) == null)?"":t ); - value+= " "+( ((t=ent.getAnchor()) == null)?"":t); - value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t ); - value+= " "+ ( ((t=ent.getDescription()) == null)?"":t ); - value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t ); - value+= " "+ getKeyterms(ent); + value =""; + value+= " "+( ((t=ent.getName()) == null)?"":t ); + alltext.append("\t NAME: " + ( ((t=ent.getName()) == null)?"":t )); + value+= " "+( ((t=ent.getAnchor()) == null)?"":t); + alltext.append("\t ANCHOR: " + ( ((t=ent.getAnchor()) == null)?"":t)); + value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t ); + alltext.append("\t MONIKER: " + ( ((t=ent.getMoniker()) == null)?"":t )); + value+= " "+ ( ((t=ent.getDescription()) == null)?"":t ); + alltext.append("\t DESCRIPTION: " + ( ((t=ent.getDescription()) == null)?"":t )); + value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t ); + alltext.append("\t BLURB: " + ( ((t=ent.getBlurb()) == null)?"":t )); + value+= " "+ getKeyterms(ent); + alltext.append("\t KEYTERMS: " + getKeyterms(ent)); - value+= " " + classPublicNames; + value+= " " + classPublicNames; + alltext.append(" CLASSPUBLICNAMES: " + classPublicNames); List dataPropertyStatements = ent.getDataPropertyStatements(); if (dataPropertyStatements != null) { + alltext.append("\n DATA_PROPERTY_STATEMENTS \n -------------------------------- \n"); Iterator dataPropertyStmtIter = dataPropertyStatements.iterator(); while (dataPropertyStmtIter.hasNext()) { DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next(); value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t ); + alltext.append("\n " + ( ((t=dataPropertyStmt.getData()) == null)?"":t )); } } List objectPropertyStatements = ent.getObjectPropertyStatements(); if (objectPropertyStatements != null) { + alltext.append("\n OBJECT_PROPERTY_STATEMENTS \n -------------------------------- \n"); Iterator objectPropertyStmtIter = objectPropertyStatements.iterator(); while (objectPropertyStmtIter.hasNext()) { ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next(); @@ -306,17 +359,31 @@ public class Entity2LuceneDoc implements Obj2DocIface{ continue; try { value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ); + alltext.append("\n " +( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) + + " : " + ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t )); + + if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ + //IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) ); + objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )); + } + } catch (Exception e) { log.debug("could not index name of related object: " + e.getMessage()); } } } //stemmed terms - doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED)); + doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED)); //unstemmed terms doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED)); + document.setALLTEXT(alltext.toString()); } + document.writeToLog(); + + // log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n"); + log.info(" \n Object Properties " + objectProperties.toString() + "\n\n"); + return doc; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java index 149fa4914..6872596fa 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java @@ -10,6 +10,7 @@ import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.Vi import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_STEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.AC_NAME_UNSTEMMED; import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.RDFTYPE; +import static edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc.VitroLuceneTermNames.CONTEXTNODE; import java.io.File; import java.io.IOException; @@ -41,6 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup; @@ -111,10 +113,16 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { getAnalyzer()); context.setAttribute(ANALYZER, getAnalyzer()); + //bk392 adding another argument to Entity2LuceneDoc + // that takes care of sparql queries for context nodes. + OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); Entity2LuceneDoc translator = new Entity2LuceneDoc( new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearch(context) ); + new IndividualProhibitedFromSearch(context), + new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context) + + ); indexer.addObj2Doc(translator); context.setAttribute(LuceneIndexer.class.getName(), indexer); @@ -250,9 +258,11 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { analyzer.addAnalyzer(AC_NAME_STEMMED, new HtmlLowerStopStemAnalyzer()); analyzer.addAnalyzer(MONIKER, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(RDFTYPE, new StandardAnalyzer(Version.LUCENE_29)); + analyzer.addAnalyzer(CONTEXTNODE, new StandardAnalyzer(Version.LUCENE_29)); analyzer.addAnalyzer(CLASSLOCALNAME, new HtmlLowerStopAnalyzer()); analyzer.addAnalyzer(CLASSLOCALNAMELOWERCASE, new HtmlLowerStopAnalyzer()); + return analyzer; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java index 3e3949ed6..3323dabe5 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java @@ -1,7 +1,7 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.lucene; - +package edu.cornell.mannlib.vitro.webapp.search.lucene; + import java.io.File; import java.io.IOException; import java.util.ArrayList; @@ -27,91 +27,93 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; - -/** - * Setup objects for lucene searching and indexing. - * - * The indexing and search objects, IndexBuilder and Searcher are found by the - * controllers IndexController and SearchController through the servletContext. - * This object will have the method contextInitialized() called when the tomcat - * server starts this webapp. - * - * The contextInitialized() will try to find the lucene index directory, - * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will - * also get a list of Obj2Doc objects so it can translate object to lucene docs. - * - * To execute this at context creation put this in web.xml: - - - edu.cornell.mannlib.vitro.search.setup.LuceneSetup - - - - * @author bdc34 - * - */ -public class LuceneSetupCJK implements javax.servlet.ServletContextListener { + +/** + * Setup objects for lucene searching and indexing. + * + * The indexing and search objects, IndexBuilder and Searcher are found by the + * controllers IndexController and SearchController through the servletContext. + * This object will have the method contextInitialized() called when the tomcat + * server starts this webapp. + * + * The contextInitialized() will try to find the lucene index directory, + * make a LueceneIndexer and a LuceneSearcher. The LuceneIndexer will + * also get a list of Obj2Doc objects so it can translate object to lucene docs. + * + * To execute this at context creation put this in web.xml: + + + edu.cornell.mannlib.vitro.search.setup.LuceneSetup + + + + * @author bdc34 + * + */ +public class LuceneSetupCJK implements javax.servlet.ServletContextListener { private static String indexDir = null; - private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName()); - private static final String PROPERTY_VITRO_HOME = "vitro.home.directory"; + private static final Log log = LogFactory.getLog(LuceneSetupCJK.class.getName()); + private static final String PROPERTY_VITRO_HOME = "vitro.home.directory"; private static final String LUCENE_SUBDIRECTORY_NAME = "luceneIndex"; - - /** - * Gets run to set up DataSource when the webapp servlet context gets created. - */ + + /** + * Gets run to set up DataSource when the webapp servlet context gets created. + */ @Override - @SuppressWarnings("unchecked") - public void contextInitialized(ServletContextEvent sce) { - ServletContext context = sce.getServletContext(); - log.info("**** Running "+this.getClass().getName()+".contextInitialized()"); - try{ - indexDir = getIndexDirName(sce); - log.info("Lucene indexDir: " + indexDir); - - setBoolMax(); - - HashSet dataPropertyBlacklist = new HashSet(); - context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); - - HashSet objectPropertyBlacklist = new HashSet(); - objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom"); - context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist); + @SuppressWarnings("unchecked") + public void contextInitialized(ServletContextEvent sce) { + ServletContext context = sce.getServletContext(); + log.info("**** Running "+this.getClass().getName()+".contextInitialized()"); + try{ + indexDir = getIndexDirName(sce); + log.info("Lucene indexDir: " + indexDir); + + setBoolMax(); + + HashSet dataPropertyBlacklist = new HashSet(); + context.setAttribute(LuceneSetup.SEARCH_DATAPROPERTY_BLACKLIST, dataPropertyBlacklist); + + HashSet objectPropertyBlacklist = new HashSet(); + objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom"); + context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist); //This is where to get a LucenIndex from. The indexer will //need to reference this to notify it of updates to the index LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir); String liveIndexDir = lif.getLiveIndexDir(context); - - //here we want to put the LuceneIndex object into the application scope - LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer()); + + //here we want to put the LuceneIndex object into the application scope + LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer()); context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer()); OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); Entity2LuceneDoc translator = new Entity2LuceneDoc( new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearch(context) ); + new IndividualProhibitedFromSearch(context), + new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); indexer.addObj2Doc(translator); indexer.setLuceneIndexFactory(lif); - - //This is where the builder gets the list of places to try to - //get objects to index. It is filtered so that non-public text - //does not get into the search index. - WebappDaoFactory wadf = - (WebappDaoFactory) context.getAttribute("webappDaoFactory"); + + //This is where the builder gets the list of places to try to + //get objects to index. It is filtered so that non-public text + //does not get into the search index. + WebappDaoFactory wadf = + (WebappDaoFactory) context.getAttribute("webappDaoFactory"); VitroFilters vf = VitroFilterUtils.getPublicFilter(context); - wadf = new WebappDaoFactoryFiltering(wadf,vf); - - List sources = new ArrayList(); - sources.add(wadf.getIndividualDao()); - - IndexBuilder builder = new IndexBuilder(context,indexer,sources); - - // here we add the IndexBuilder with the LuceneIndexer - // to the servlet context so we can access it later in the webapp. + wadf = new WebappDaoFactoryFiltering(wadf,vf); + + List sources = new ArrayList(); + sources.add(wadf.getIndividualDao()); + + IndexBuilder builder = new IndexBuilder(context,indexer,sources); + + // here we add the IndexBuilder with the LuceneIndexer + // to the servlet context so we can access it later in the webapp. context.setAttribute(IndexBuilder.class.getName(),builder); //set up listeners so search index builder is notified of changes to model @@ -119,36 +121,36 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener { OntModel jenaOntModel = (OntModel)sce.getServletContext().getAttribute("jenaOntModel"); SearchReindexingListener srl = new SearchReindexingListener( builder ); ModelContext.registerListenerForChanges(sce.getServletContext(), srl); - - }catch(Exception ex){ - log.error("Could not setup lucene full text search." , ex); - } - - log.debug("**** End of "+this.getClass().getName()+".contextInitialized()"); - } - - /** - * Gets run when the webApp Context gets destroyed. - */ + + }catch(Exception ex){ + log.error("Could not setup lucene full text search." , ex); + } + + log.debug("**** End of "+this.getClass().getName()+".contextInitialized()"); + } + + /** + * Gets run when the webApp Context gets destroyed. + */ @Override public void contextDestroyed(ServletContextEvent sce) { - + log.info("**** Running "+this.getClass().getName()+".contextDestroyed()"); IndexBuilder builder = (IndexBuilder)sce.getServletContext().getAttribute(IndexBuilder.class.getName()); - builder.stopIndexingThread(); - } - - /** - * In wild card searches the query is first broken into many boolean searches - * OR'ed together. So if there is a query that would match a lot of records - * we need a high max boolean limit for the lucene search. - * - * This sets some static method in the lucene library to achieve this. - */ - public static void setBoolMax() { - BooleanQuery.setMaxClauseCount(16384); - } - + builder.stopIndexingThread(); + } + + /** + * In wild card searches the query is first broken into many boolean searches + * OR'ed together. So if there is a query that would match a lot of records + * we need a high max boolean limit for the lucene search. + * + * This sets some static method in the lucene library to achieve this. + */ + public static void setBoolMax() { + BooleanQuery.setMaxClauseCount(16384); + } + /** * Gets the name of the directory to store the lucene index in. The * {@link ConfigurationProperties} should have a property named @@ -190,14 +192,14 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener { return luceneDir.getPath(); } - /** - * Gets the analyzer that will be used when building the indexing - * and when analyzing the incoming search terms. - * - * @return - */ - private Analyzer getAnalyzer() { - return new CJKAnalyzer(); - } - -} + /** + * Gets the analyzer that will be used when building the indexing + * and when analyzing the incoming search terms. + * + * @return + */ + private Analyzer getAnalyzer() { + return new CJKAnalyzer(); + } + +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java new file mode 100644 index 000000000..ad2ec0114 --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java @@ -0,0 +1,209 @@ +package edu.cornell.mannlib.vitro.webapp.search.lucene.test; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +public class LuceneDocument { + + private static final Log log = LogFactory.getLog(LuceneDocument.class.getName()); + + String URI; + String DOCID; + String JCLASS; + String RDFTYPE; + String CLASSGROUP_URI; + String MODTIME; + String NAME; + String PORTAL; + String INDEXEDTIME; + String TIMEKEY; + String SUNSET; + String MONIKER; + String ALLTEXT; + String KEYWORDS; + String THUMBNAIL; + String PROHIBITED_FROM_TEXT_RESULTS; + String CLASSLOCALNAME; + String CONTEXTNODE; + + static final String FILE = "~/Desktop/LuceneIndividuals.txt"; + + + public String getURI() { + return URI; + } + + public void setURI(String uRI) { + URI = uRI; + } + + public String getDOCID() { + return DOCID; + } + + public void setDOCID(String dOCID) { + DOCID = dOCID; + } + + public String getJCLASS() { + return JCLASS; + } + + public void setJCLASS(String jCLASS) { + JCLASS = jCLASS; + } + + public String getRDFTYPE() { + return RDFTYPE; + } + + public void setRDFTYPE(String rDFTYPE) { + RDFTYPE = rDFTYPE; + } + + public String getCLASSGROUP_URI() { + return CLASSGROUP_URI; + } + + public void setCLASSGROUP_URI(String cLASSGROUP_URI) { + CLASSGROUP_URI = cLASSGROUP_URI; + } + + public String getMODTIME() { + return MODTIME; + } + + public void setMODTIME(String mODTIME) { + MODTIME = mODTIME; + } + + public String getNAME() { + return NAME; + } + + public void setNAME(String nAME) { + NAME = nAME; + } + + public String getPORTAL() { + return PORTAL; + } + + public void setPORTAL(String pORTAL) { + PORTAL = pORTAL; + } + + public String getINDEXEDTIME() { + return INDEXEDTIME; + } + + public void setINDEXEDTIME(String iNDEXEDTIME) { + INDEXEDTIME = iNDEXEDTIME; + } + + public String getTIMEKEY() { + return TIMEKEY; + } + + public void setTIMEKEY(String tIMEKEY) { + TIMEKEY = tIMEKEY; + } + + public String getSUNSET() { + return SUNSET; + } + + public void setSUNSET(String sUNSET) { + SUNSET = sUNSET; + } + + public String getMONIKER() { + return MONIKER; + } + + public void setMONIKER(String mONIKER) { + MONIKER = mONIKER; + } + + public String getALLTEXT() { + return ALLTEXT; + } + + public void setALLTEXT(String aLLTEXT) { + ALLTEXT = aLLTEXT; + } + + public String getKEYWORDS() { + return KEYWORDS; + } + + public void setKEYWORDS(String kEYWORDS) { + KEYWORDS = kEYWORDS; + } + + public String getTHUMBNAIL() { + return THUMBNAIL; + } + + public void setTHUMBNAIL(String tHUMBNAIL) { + THUMBNAIL = tHUMBNAIL; + } + + public String getPROHIBITED_FROM_TEXT_RESULTS() { + return PROHIBITED_FROM_TEXT_RESULTS; + } + + public void setPROHIBITED_FROM_TEXT_RESULTS(String pROHIBITED_FROM_TEXT_RESULTS) { + PROHIBITED_FROM_TEXT_RESULTS = pROHIBITED_FROM_TEXT_RESULTS; + } + + public String getCLASSLOCALNAME() { + return CLASSLOCALNAME; + } + + public void setCLASSLOCALNAME(String cLASSLOCALNAME) { + CLASSLOCALNAME = cLASSLOCALNAME; + } + + @Override + public String toString(){ + + StringBuffer result = new StringBuffer(); + + result.append("\n==================================\n"); + + result.append("URI : " + URI); + result.append("\nDOCID : " + DOCID); + result.append("\nJCLASS : " + JCLASS); + result.append("\nRDFTYPE : " + RDFTYPE); + result.append("\nCLASSGROUP_URI : " + CLASSGROUP_URI); + result.append("\nMODTIME : " + MODTIME); + result.append("\nNAME : " + NAME); + result.append("\nPORTAL : " + PORTAL); + result.append("\nINDEXEDTIME : " + INDEXEDTIME); + result.append("\nCONTEXTNODE : " + CONTEXTNODE); + result.append("\nTIMEKEY : " + TIMEKEY); + result.append("\nSUNSET : " + SUNSET); + result.append("\nMONIKER : " + MONIKER); + result.append("\nALLTEXT : " + ALLTEXT); + result.append("\nKEYWORDS : " + KEYWORDS); + result.append("\nTHUMBNAIL : " + THUMBNAIL); + result.append("\nPROHIBITED_FROM_TEXT_RESULTS : " + PROHIBITED_FROM_TEXT_RESULTS); + result.append("\nCLASSLOCALNAME : " + CLASSLOCALNAME); + + return result.toString(); + + } + + public void writeToLog(){ + log.info(this.toString()); + } + + public void setCONTEXTNODE(String contextNodePropertyValues) { + this.CONTEXTNODE = contextNodePropertyValues; + } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index 1bd7b2df8..797f05d88 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -2,23 +2,261 @@ package edu.cornell.mannlib.vitro.webapp.search.solr; -import org.apache.solr.common.SolrDocument; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.lucene.document.Document; +import org.apache.solr.client.solrj.beans.Field; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.SolrInputField; +import org.joda.time.DateTime; + +import com.hp.hpl.jena.vocabulary.OWL; + +import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement; import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl; +import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement; +import edu.cornell.mannlib.vitro.webapp.beans.VClass; +import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; +import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; +import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; +import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; public class IndividualToSolrDocument implements Obj2DocIface { protected LuceneDocToSolrDoc luceneToSolr; - protected Entity2LuceneDoc entityToLucene; - public IndividualToSolrDocument(Entity2LuceneDoc e2d){ - entityToLucene = e2d; - luceneToSolr = new LuceneDocToSolrDoc(); + public static final Log log = LogFactory.getLog(IndividualToSolrDocument.class.getName()); + + public static VitroTermNames term = new VitroTermNames(); + + private static String entClassName = Individual.class.getName(); + + private ProhibitedFromSearch classesProhibitedFromSearch; + + private IndividualProhibitedFromSearch individualProhibitedFromSearch; + + private ContextNodesInclusionFactory contextNodesInclusionFactory; + + private static HashSet objectProperties = new HashSet(); + + + public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, + IndividualProhibitedFromSearch individualProhibitedFromSearch, + ContextNodesInclusionFactory contextNodesInclusionFactory){ + this.classesProhibitedFromSearch = classesProhibitedFromSearch; + this.individualProhibitedFromSearch = individualProhibitedFromSearch; + this.contextNodesInclusionFactory = contextNodesInclusionFactory; } + @Override + public Object translate(Object obj) throws IndexingException{ + long tProhibited = System.currentTimeMillis(); + + if(!(obj instanceof Individual)) + return null; + + Individual ent = (Individual)obj; + String value; + String classPublicNames = ""; + SolrInputDocument doc = new SolrInputDocument(); + + //DocId + String id = ent.getURI(); + log.debug("translating " + id); + + if(id == null){ + log.debug("cannot add individuals without URIs to lucene Index"); + return null; + }else if( id.startsWith(VitroVocabulary.vitroURI) || + id.startsWith(VitroVocabulary.VITRO_PUBLIC) || + id.startsWith(VitroVocabulary.PSEUDO_BNODE_NS) || + id.startsWith(OWL.NS)){ + log.debug("not indexing because of namespace:" + id); + return null; + } + + //filter out class groups, owl:ObjectProperties etc.. + if(individualProhibitedFromSearch.isIndividualProhibited(id)){ + return null; + } + + log.debug("time to check if individual is prohibited:" + Long.toString(System.currentTimeMillis() - tProhibited)); + + // Types and classgroups + boolean prohibited = false; + List vclasses = ent.getVClasses(false); + long tClassgroup = System.currentTimeMillis(); + for(VClass clz : vclasses){ + if(clz.getURI() == null){ + continue; + }else if(OWL.Thing.getURI().equals(clz.getURI())){ + //index individuals of type owl:Thing, just don't add owl:Thing as the type field in the index + continue; + } else if(clz.getURI().startsWith(OWL.NS)){ + log.debug("not indexing " + id + " because of type " + clz.getURI()); + return null; + } else { + if( !prohibited && classesProhibitedFromSearch.isClassProhibited(clz.getURI())) + prohibited = true; + if( clz.getSearchBoost() != null) + doc.setDocumentBoost(doc.getDocumentBoost() + clz.getSearchBoost()); + + doc.addField(term.RDFTYPE, clz.getURI()); + + if(clz.getLocalName() != null){ + doc.addField(term.CLASSLOCALNAME, clz.getLocalName()); + doc.addField(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase()); + } + + if(clz.getName() != null) + classPublicNames += clz.getName(); + + //Classgroup URI + if(clz.getGroupURI() != null){ + doc.addField(term.CLASSGROUP_URI,clz.getGroupURI()); + } + + } + } + + log.debug("time to check if class is prohibited and adding classes, classgroups and type to the index: " + Long.toString(System.currentTimeMillis() - tClassgroup)); + + + doc.addField(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0"); + + //lucene DocID + doc.addField(term.DOCID, entClassName + id); + + //vitro id + doc.addField(term.URI, id); + + //java class + doc.addField(term.JCLASS, entClassName); + + //Individual Label + if(ent.getRdfsLabel() != null) + value = ent.getRdfsLabel(); + else{ + log.debug("Using local name for individual with rdfs:label " + ent.getURI()); + value = ent.getLocalName(); + } + + doc.addField(term.NAME_RAW, value, NAME_BOOST); + doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST); + doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST); + doc.addField(term.NAME_STEMMED, value, NAME_BOOST); + + long tContextNodes = System.currentTimeMillis(); + + String contextNodePropertyValues = ""; + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); + contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI()); + + + doc.addField(term.CONTEXTNODE, contextNodePropertyValues); + + log.debug("time to fire contextnode queries and include them in the index: " + Long.toString(System.currentTimeMillis() - tContextNodes)); + + + long tMoniker = System.currentTimeMillis(); + + //Moniker + if(ent.getMoniker() != null){ + doc.addField(term.MONIKER, ent.getMoniker()); + } + + //boost for entity + if(ent.getSearchBoost() != null && ent.getSearchBoost() != 0) + doc.setDocumentBoost(ent.getSearchBoost()); + + //thumbnail + try{ + value = null; + if(ent.hasThumb()) + doc.addField(term.THUMBNAIL, "1"); + else + doc.addField(term.THUMBNAIL, "0"); + }catch(Exception ex){ + log.debug("could not index thumbnail: " + ex); + } + + + //time of index in millis past epoc + Object anon[] = { new Long((new DateTime() ).getMillis()) }; + doc.addField(term.INDEXEDTIME, String.format("%019d", anon)); + + log.debug("time to include moniker , thumbnail and indexedtime in the index: " + Long.toString(System.currentTimeMillis() - tMoniker)); + + long tPropertyStatements = System.currentTimeMillis(); + if(!prohibited){ + //ALLTEXT, all of the 'full text' + String t=null; + value =""; + value+= " "+( ((t=ent.getName()) == null)?"":t ); + value+= " "+( ((t=ent.getAnchor()) == null)?"":t); + value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t ); + value+= " "+ ( ((t=ent.getDescription()) == null)?"":t ); + value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t ); + + value+= " " + classPublicNames; + + List dataPropertyStatements = ent.getDataPropertyStatements(); + if (dataPropertyStatements != null) { + Iterator dataPropertyStmtIter = dataPropertyStatements.iterator(); + while (dataPropertyStmtIter.hasNext()) { + DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next(); + value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t ); + } + } + + List objectPropertyStatements = ent.getObjectPropertyStatements(); + if (objectPropertyStatements != null) { + Iterator objectPropertyStmtIter = objectPropertyStatements.iterator(); + while (objectPropertyStmtIter.hasNext()) { + ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next(); + if( "http://www.w3.org/2002/07/owl#differentFrom".equals(objectPropertyStmt.getPropertyURI()) ) + continue; + try { + value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ); + if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ + //IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) ); + objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )); + } + + } catch (Exception e) { + log.debug("could not index name of related object: " + e.getMessage()); + } + } + } + + log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements)); + + doc.addField(term.ALLTEXT, value); + doc.addField(term.ALLTEXTUNSTEMMED, value); + } + + return doc; + } + +// public IndividualToSolrDocument(Entity2LuceneDoc e2d){ +//// entityToLucene = e2d; +// luceneToSolr = new LuceneDocToSolrDoc(); +// } + @Override public boolean canTranslate(Object obj) { return obj != null && obj instanceof Individual; @@ -34,14 +272,23 @@ public class IndividualToSolrDocument implements Obj2DocIface { throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented"); } - @Override - public Object translate(Object obj) throws IndexingException { - return luceneToSolr.translate( entityToLucene.translate( obj ) ); - } +// @Override +// public Object translate(Object obj) throws IndexingException { +// return luceneToSolr.translate( entityToLucene.translate( obj ) ); +// } @Override public Object unTranslate(Object result) { - return luceneToSolr.unTranslate( result ); + Individual ent = null; + if( result != null && result instanceof Document){ + Document hit = (Document) result; + String id = hit.get(term.URI); + ent = new IndividualImpl(); + ent.setURI(id); + } + return ent; } + public static float NAME_BOOST = 3.0F; + } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java index ae89cd381..0f142c6bb 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java @@ -23,6 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; +import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; @@ -67,10 +68,15 @@ public class SolrSetup implements javax.servlet.ServletContextListener{ /* setup the individual to solr doc translation */ //first we need a ent2luceneDoc translator OntModel displayOntModel = (OntModel) sce.getServletContext().getAttribute("displayOntModel"); - Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc( - new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), - new IndividualProhibitedFromSearch(context) ); - IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc ); +// Entity2LuceneDoc ent2LuceneDoc = new Entity2LuceneDoc( +// new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), +// new IndividualProhibitedFromSearch(context), +// new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); +// IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( ent2LuceneDoc ); + IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( + new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), + new IndividualProhibitedFromSearch(context), + new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); List o2d = new ArrayList(); o2d.add(indToSolrDoc); From f7eb16b7c807dcd96671dfaef124764f0c41856c Mon Sep 17 00:00:00 2001 From: deepakkoni Date: Thu, 19 May 2011 14:49:51 +0000 Subject: [PATCH 03/38] Removing LuceneDocument, that was intended to be used as a test utility to inspect Lucene Documents. and removing references to LuceneDocument from Entity2LuceneDoc --- .../search/lucene/Entity2LuceneDoc.java | 29 --- .../search/lucene/test/LuceneDocument.java | 209 ------------------ 2 files changed, 238 deletions(-) delete mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index a0b48278f..1228ba45f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -27,7 +27,6 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactor import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; -import edu.cornell.mannlib.vitro.webapp.search.lucene.test.LuceneDocument; /** * This class expect that Entities passed to it will have @@ -137,7 +136,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{ String value; Document doc = new Document(); String classPublicNames = ""; - LuceneDocument document = new LuceneDocument(); //DocId String id = ent.getURI(); @@ -180,14 +178,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field typeField = new Field (term.RDFTYPE, clz.getURI(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add( typeField); - document.setRDFTYPE(clz.getURI()); if(clz.getLocalName() != null){ Field classLocalName = new Field(term.CLASSLOCALNAME, clz.getLocalName(), Field.Store.YES, Field.Index.ANALYZED); Field classLocalNameLowerCase = new Field(term.CLASSLOCALNAMELOWERCASE, clz.getLocalName().toLowerCase(), Field.Store.YES, Field.Index.ANALYZED); doc.add(classLocalName); doc.add(classLocalNameLowerCase); - document.setCLASSLOCALNAME(clz.getLocalName()); } if( clz.getName() != null ) @@ -199,28 +195,23 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); // classGroupField.setBoost(FIELD_BOOST); doc.add(classGroupField); - document.setCLASSGROUP_URI(clz.getGroupURI()); } } } doc.add( new Field(term.PROHIBITED_FROM_TEXT_RESULTS, prohibited?"1":"0", Field.Store.NO,Field.Index.NOT_ANALYZED_NO_NORMS) ); - document.setPROHIBITED_FROM_TEXT_RESULTS(prohibited?"1":"0"); /* lucene DOCID */ doc.add( new Field(term.DOCID, entClassName + id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - document.setDOCID(entClassName + id); //vitro Id doc.add( new Field(term.URI, id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - document.setURI(id); //java class doc.add( new Field(term.JCLASS, entClassName, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - document.setJCLASS(entClassName); // Individual label if( ent.getRdfsLabel() != null ) @@ -234,7 +225,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field nameRaw = new Field(term.NAME_RAW, value, Field.Store.YES, Field.Index.NOT_ANALYZED); nameRaw.setBoost(NAME_BOOST); doc.add(nameRaw); - document.setNAME(value); // RY Not sure if we need to store this. For Solr, see schema.xml field definition. Field nameLowerCase = new Field(term.NAME_LOWERCASE, value.toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED); @@ -263,14 +253,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED ); doc.add(contextNodeInformation); - document.setCONTEXTNODE(contextNodePropertyValues); //Moniker if(ent.getMoniker() != null){ Field moniker = new Field(term.MONIKER, ent.getMoniker(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); doc.add(moniker); - document.setMONIKER(ent.getMoniker()); } //boost for entity @@ -313,45 +301,32 @@ public class Entity2LuceneDoc implements Obj2DocIface{ Object anon[] = { new Long((new DateTime() ).getMillis()) }; doc.add( new Field(term.INDEXEDTIME, String.format( "%019d", anon ), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - document.setINDEXEDTIME(String.format("%019d", anon)); if( ! prohibited ){ //ALLTEXT, all of the 'full text' - StringBuffer alltext = new StringBuffer(); - String t=null; value =""; value+= " "+( ((t=ent.getName()) == null)?"":t ); - alltext.append("\t NAME: " + ( ((t=ent.getName()) == null)?"":t )); value+= " "+( ((t=ent.getAnchor()) == null)?"":t); - alltext.append("\t ANCHOR: " + ( ((t=ent.getAnchor()) == null)?"":t)); value+= " "+ ( ((t=ent.getMoniker()) == null)?"":t ); - alltext.append("\t MONIKER: " + ( ((t=ent.getMoniker()) == null)?"":t )); value+= " "+ ( ((t=ent.getDescription()) == null)?"":t ); - alltext.append("\t DESCRIPTION: " + ( ((t=ent.getDescription()) == null)?"":t )); value+= " "+ ( ((t=ent.getBlurb()) == null)?"":t ); - alltext.append("\t BLURB: " + ( ((t=ent.getBlurb()) == null)?"":t )); value+= " "+ getKeyterms(ent); - alltext.append("\t KEYTERMS: " + getKeyterms(ent)); value+= " " + classPublicNames; - alltext.append(" CLASSPUBLICNAMES: " + classPublicNames); List dataPropertyStatements = ent.getDataPropertyStatements(); if (dataPropertyStatements != null) { - alltext.append("\n DATA_PROPERTY_STATEMENTS \n -------------------------------- \n"); Iterator dataPropertyStmtIter = dataPropertyStatements.iterator(); while (dataPropertyStmtIter.hasNext()) { DataPropertyStatement dataPropertyStmt = dataPropertyStmtIter.next(); value+= " "+ ( ((t=dataPropertyStmt.getData()) == null)?"":t ); - alltext.append("\n " + ( ((t=dataPropertyStmt.getData()) == null)?"":t )); } } List objectPropertyStatements = ent.getObjectPropertyStatements(); if (objectPropertyStatements != null) { - alltext.append("\n OBJECT_PROPERTY_STATEMENTS \n -------------------------------- \n"); Iterator objectPropertyStmtIter = objectPropertyStatements.iterator(); while (objectPropertyStmtIter.hasNext()) { ObjectPropertyStatement objectPropertyStmt = objectPropertyStmtIter.next(); @@ -359,8 +334,6 @@ public class Entity2LuceneDoc implements Obj2DocIface{ continue; try { value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ); - alltext.append("\n " +( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) - + " : " + ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t )); if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ //IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) ); @@ -376,10 +349,8 @@ public class Entity2LuceneDoc implements Obj2DocIface{ doc.add( new Field(term.ALLTEXT, value , Field.Store.NO, Field.Index.ANALYZED)); //unstemmed terms doc.add( new Field(term.ALLTEXTUNSTEMMED, value, Field.Store.NO, Field.Index.ANALYZED)); - document.setALLTEXT(alltext.toString()); } - document.writeToLog(); // log.info("\n IndividualURItoObjectProperties " + IndividualURIToObjectProperties.toString() + " \n\n"); log.info(" \n Object Properties " + objectProperties.toString() + "\n\n"); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java deleted file mode 100644 index ad2ec0114..000000000 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/test/LuceneDocument.java +++ /dev/null @@ -1,209 +0,0 @@ -package edu.cornell.mannlib.vitro.webapp.search.lucene.test; - -import java.io.BufferedWriter; -import java.io.FileWriter; -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -public class LuceneDocument { - - private static final Log log = LogFactory.getLog(LuceneDocument.class.getName()); - - String URI; - String DOCID; - String JCLASS; - String RDFTYPE; - String CLASSGROUP_URI; - String MODTIME; - String NAME; - String PORTAL; - String INDEXEDTIME; - String TIMEKEY; - String SUNSET; - String MONIKER; - String ALLTEXT; - String KEYWORDS; - String THUMBNAIL; - String PROHIBITED_FROM_TEXT_RESULTS; - String CLASSLOCALNAME; - String CONTEXTNODE; - - static final String FILE = "~/Desktop/LuceneIndividuals.txt"; - - - public String getURI() { - return URI; - } - - public void setURI(String uRI) { - URI = uRI; - } - - public String getDOCID() { - return DOCID; - } - - public void setDOCID(String dOCID) { - DOCID = dOCID; - } - - public String getJCLASS() { - return JCLASS; - } - - public void setJCLASS(String jCLASS) { - JCLASS = jCLASS; - } - - public String getRDFTYPE() { - return RDFTYPE; - } - - public void setRDFTYPE(String rDFTYPE) { - RDFTYPE = rDFTYPE; - } - - public String getCLASSGROUP_URI() { - return CLASSGROUP_URI; - } - - public void setCLASSGROUP_URI(String cLASSGROUP_URI) { - CLASSGROUP_URI = cLASSGROUP_URI; - } - - public String getMODTIME() { - return MODTIME; - } - - public void setMODTIME(String mODTIME) { - MODTIME = mODTIME; - } - - public String getNAME() { - return NAME; - } - - public void setNAME(String nAME) { - NAME = nAME; - } - - public String getPORTAL() { - return PORTAL; - } - - public void setPORTAL(String pORTAL) { - PORTAL = pORTAL; - } - - public String getINDEXEDTIME() { - return INDEXEDTIME; - } - - public void setINDEXEDTIME(String iNDEXEDTIME) { - INDEXEDTIME = iNDEXEDTIME; - } - - public String getTIMEKEY() { - return TIMEKEY; - } - - public void setTIMEKEY(String tIMEKEY) { - TIMEKEY = tIMEKEY; - } - - public String getSUNSET() { - return SUNSET; - } - - public void setSUNSET(String sUNSET) { - SUNSET = sUNSET; - } - - public String getMONIKER() { - return MONIKER; - } - - public void setMONIKER(String mONIKER) { - MONIKER = mONIKER; - } - - public String getALLTEXT() { - return ALLTEXT; - } - - public void setALLTEXT(String aLLTEXT) { - ALLTEXT = aLLTEXT; - } - - public String getKEYWORDS() { - return KEYWORDS; - } - - public void setKEYWORDS(String kEYWORDS) { - KEYWORDS = kEYWORDS; - } - - public String getTHUMBNAIL() { - return THUMBNAIL; - } - - public void setTHUMBNAIL(String tHUMBNAIL) { - THUMBNAIL = tHUMBNAIL; - } - - public String getPROHIBITED_FROM_TEXT_RESULTS() { - return PROHIBITED_FROM_TEXT_RESULTS; - } - - public void setPROHIBITED_FROM_TEXT_RESULTS(String pROHIBITED_FROM_TEXT_RESULTS) { - PROHIBITED_FROM_TEXT_RESULTS = pROHIBITED_FROM_TEXT_RESULTS; - } - - public String getCLASSLOCALNAME() { - return CLASSLOCALNAME; - } - - public void setCLASSLOCALNAME(String cLASSLOCALNAME) { - CLASSLOCALNAME = cLASSLOCALNAME; - } - - @Override - public String toString(){ - - StringBuffer result = new StringBuffer(); - - result.append("\n==================================\n"); - - result.append("URI : " + URI); - result.append("\nDOCID : " + DOCID); - result.append("\nJCLASS : " + JCLASS); - result.append("\nRDFTYPE : " + RDFTYPE); - result.append("\nCLASSGROUP_URI : " + CLASSGROUP_URI); - result.append("\nMODTIME : " + MODTIME); - result.append("\nNAME : " + NAME); - result.append("\nPORTAL : " + PORTAL); - result.append("\nINDEXEDTIME : " + INDEXEDTIME); - result.append("\nCONTEXTNODE : " + CONTEXTNODE); - result.append("\nTIMEKEY : " + TIMEKEY); - result.append("\nSUNSET : " + SUNSET); - result.append("\nMONIKER : " + MONIKER); - result.append("\nALLTEXT : " + ALLTEXT); - result.append("\nKEYWORDS : " + KEYWORDS); - result.append("\nTHUMBNAIL : " + THUMBNAIL); - result.append("\nPROHIBITED_FROM_TEXT_RESULTS : " + PROHIBITED_FROM_TEXT_RESULTS); - result.append("\nCLASSLOCALNAME : " + CLASSLOCALNAME); - - return result.toString(); - - } - - public void writeToLog(){ - log.info(this.toString()); - } - - public void setCONTEXTNODE(String contextNodePropertyValues) { - this.CONTEXTNODE = contextNodePropertyValues; - } -} From 5f8f5fa8f5335d8e5c7edfd27ed95d1f5277b0e2 Mon Sep 17 00:00:00 2001 From: deepakkoni Date: Thu, 19 May 2011 16:44:23 +0000 Subject: [PATCH 04/38] Fixing the code in ContextNodesInclusionFactory. getQueryFromModel() is commented out because there is nothing specified in search.n3 to be read into the model during the startup. Eventually, all the contextnode queries willl be extracted out into search.n3 --- .../beans/ContextNodesInclusionFactory.java | 93 +++++++++++-------- 1 file changed, 52 insertions(+), 41 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java index 80805c030..4f95392f4 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java @@ -26,56 +26,67 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; public class ContextNodesInclusionFactory { private OntModel fullModel; - private String contextNodeURI; - private String query = ""; +// private String contextNodeURI; + //private String query = ""; - private static final String queryForEducationalTraining = "SELECT ?query WHERE {" + - "?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }"; +// private static final String queryForEducationalTraining = "SELECT ?query WHERE {" + +// "?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }"; private static Log log = LogFactory.getLog(ContextNodesInclusionFactory.class); public ContextNodesInclusionFactory(String contextNodeURI, OntModel displayOntModel, ServletContext context) { this.fullModel = ModelContext.getJenaOntModel(context); - this.contextNodeURI = contextNodeURI; - query = getQueryFromModel(contextNodeURI, displayOntModel); - } - - private String getQueryFromModel(String uri, OntModel displayOntModel) { - - String resultQuery = ""; - QuerySolutionMap initialBinding = new QuerySolutionMap(); - Resource searchConfig = ResourceFactory.createResource(uri); - - initialBinding.add("searchConfig", searchConfig); - - Query query = QueryFactory.create(queryForEducationalTraining); - displayOntModel.enterCriticalSection(Lock.READ); - try{ - QueryExecution qExec = QueryExecutionFactory.create(query, displayOntModel, initialBinding); - try{ - ResultSet results = qExec.execSelect(); - while(results.hasNext()){ - QuerySolution soln = results.nextSolution(); - Literal node = soln.getLiteral("query"); - if(node.isLiteral()){ - resultQuery = node.toString(); - }else{ - log.warn("unexpected literal in the object position for context node queries " + node.toString()); - } - } - }catch(Throwable t){ - log.error(t,t); - } finally{ - qExec.close(); - } - }finally{ - displayOntModel.leaveCriticalSection(); - } - - return resultQuery.substring(0, resultQuery.length() - 3); +// this.contextNodeURI = contextNodeURI; + //query = getQueryFromModel(contextNodeURI, displayOntModel); } + /* + * bk392 : The original idea behind writing up this method was to check + * if I can read the queries from search.n3, write them to the displayOntModel(during startup) and + * read them in this class. + * + * Eventually, its going to be like that. All these hardcoded queries + * will go into search.n3 and will be written into the display model. + * ContextNodesInclusionFactors gets the queries out from the display Model + * and fires them, gets the values, concatenates them and passes them back to + * IndividualToSolrDoc. + */ +// private String getQueryFromModel(String uri, OntModel displayOntModel) { +// +// String resultQuery = ""; +// QuerySolutionMap initialBinding = new QuerySolutionMap(); +// Resource searchConfig = ResourceFactory.createResource(uri); +// +// initialBinding.add("searchConfig", searchConfig); +// +// Query query = QueryFactory.create(queryForEducationalTraining); +// displayOntModel.enterCriticalSection(Lock.READ); +// try{ +// QueryExecution qExec = QueryExecutionFactory.create(query, displayOntModel, initialBinding); +// try{ +// ResultSet results = qExec.execSelect(); +// while(results.hasNext()){ +// QuerySolution soln = results.nextSolution(); +// Literal node = soln.getLiteral("query"); +// if(node.isLiteral()){ +// resultQuery = node.toString(); +// }else{ +// log.warn("unexpected literal in the object position for context node queries " + node.toString()); +// } +// } +// }catch(Throwable t){ +// log.error(t,t); +// } finally{ +// qExec.close(); +// } +// }finally{ +// displayOntModel.leaveCriticalSection(); +// } +// +// return resultQuery.substring(0, resultQuery.length() - 3); +// } + // public List getFieldValues(String uri, Model modelToQuery, List queries){ From 17d7132222c07cdfa89dd30ca2e95df131a6ef60 Mon Sep 17 00:00:00 2001 From: deepakkoni Date: Thu, 19 May 2011 19:08:51 +0000 Subject: [PATCH 05/38] Removing objectproperties from IndividualToSolrDocument and uncommenting contextURI in ContextNodesInclusionFactory --- .../search/beans/ContextNodesInclusionFactory.java | 4 ++-- .../webapp/search/solr/IndividualToSolrDocument.java | 12 ++---------- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java index 4f95392f4..8738a65c0 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java @@ -26,7 +26,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; public class ContextNodesInclusionFactory { private OntModel fullModel; -// private String contextNodeURI; + private String contextNodeURI; //private String query = ""; // private static final String queryForEducationalTraining = "SELECT ?query WHERE {" + @@ -37,7 +37,7 @@ public class ContextNodesInclusionFactory { public ContextNodesInclusionFactory(String contextNodeURI, OntModel displayOntModel, ServletContext context) { this.fullModel = ModelContext.getJenaOntModel(context); -// this.contextNodeURI = contextNodeURI; + this.contextNodeURI = contextNodeURI; //query = getQueryFromModel(contextNodeURI, displayOntModel); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index 797f05d88..bd6ac321e 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -9,10 +9,8 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.document.Document; -import org.apache.solr.client.solrj.beans.Field; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; -import org.apache.solr.common.SolrInputField; import org.joda.time.DateTime; import com.hp.hpl.jena.vocabulary.OWL; @@ -29,7 +27,6 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactor import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; -import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc; public class IndividualToSolrDocument implements Obj2DocIface { @@ -47,7 +44,6 @@ public class IndividualToSolrDocument implements Obj2DocIface { private ContextNodesInclusionFactory contextNodesInclusionFactory; - private static HashSet objectProperties = new HashSet(); public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, @@ -58,7 +54,8 @@ public class IndividualToSolrDocument implements Obj2DocIface { this.contextNodesInclusionFactory = contextNodesInclusionFactory; } - @Override + @SuppressWarnings("static-access") + @Override public Object translate(Object obj) throws IndexingException{ long tProhibited = System.currentTimeMillis(); @@ -232,11 +229,6 @@ public class IndividualToSolrDocument implements Obj2DocIface { continue; try { value+= " "+ ( ((t=objectPropertyStmt.getObject().getName()) == null)?"":t ); - if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ - //IndividualURIToObjectProperties.put(ent.getURI(), ( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t ) ); - objectProperties.add(( ((t=objectPropertyStmt.getProperty().getURI()) == null)?"":t )); - } - } catch (Exception e) { log.debug("could not index name of related object: " + e.getMessage()); } From 57882afebf0338413519679a4010ffcd7711e9e8 Mon Sep 17 00:00:00 2001 From: anupsawant Date: Fri, 20 May 2011 14:59:39 +0000 Subject: [PATCH 06/38] Fri 11 AM merged with latest copy of trunk From 96f138c247e628cee16b009e2d2d63f14f8b7eca Mon Sep 17 00:00:00 2001 From: anupsawant Date: Fri, 20 May 2011 19:01:53 +0000 Subject: [PATCH 07/38] made changes to web.xml to comment out pagesearchcontroller and use solrpagesearchcontroller --- webapp/config/web.xml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/webapp/config/web.xml b/webapp/config/web.xml index fc1c291b4..609194414 100644 --- a/webapp/config/web.xml +++ b/webapp/config/web.xml @@ -894,17 +894,18 @@ EntityURLController /entityurl/* - + + SearchController edu.cornell.mannlib.vitro.webapp.search.controller.SolrPagedSearchController - --> + SearchController /search From 51bb0611568c561c0467c5e8c94538142bb6af24 Mon Sep 17 00:00:00 2001 From: anupsawant Date: Mon, 23 May 2011 14:02:55 +0000 Subject: [PATCH 08/38] merged from trunk version From cf69ec009ac207080f84f571967369b2ec1d346d Mon Sep 17 00:00:00 2001 From: anupsawant Date: Mon, 23 May 2011 19:46:14 +0000 Subject: [PATCH 09/38] added dismax query parser, boost for alltext fields and changed name of ContextNodeFactory --- solr/exampleSolr/conf/schema.xml | 4 +-- solr/exampleSolr/conf/solrconfig.xml | 10 +++++-- .../vitro/webapp/search/VitroTermNames.java | 3 ++ ...onFactory.java => SearchQueryHandler.java} | 28 ++++++++++++++---- .../search/lucene/Entity2LuceneDoc.java | 20 ++++++------- .../webapp/search/lucene/LuceneSetup.java | 4 +-- .../webapp/search/lucene/LuceneSetupCJK.java | 4 +-- .../search/solr/IndividualToSolrDocument.java | 29 +++++++++++-------- .../vitro/webapp/search/solr/SolrSetup.java | 4 +-- 9 files changed, 68 insertions(+), 38 deletions(-) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/{ContextNodesInclusionFactory.java => SearchQueryHandler.java} (96%) diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index ede99f821..586eb76aa 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xml @@ -498,7 +498,7 @@ - + @@ -573,7 +573,7 @@ DocId - ALLTEXT + diff --git a/solr/exampleSolr/conf/solrconfig.xml b/solr/exampleSolr/conf/solrconfig.xml index b37a027ca..30aff6ca6 100644 --- a/solr/exampleSolr/conf/solrconfig.xml +++ b/solr/exampleSolr/conf/solrconfig.xml @@ -707,6 +707,8 @@ will be overridden by parameters in the request --> + dismax + nameRaw nameLowercase nameUnstemmed nameStemmed ALLTEXT ALLTEXTUNSTEMMED explicit 10 @@ -784,20 +786,22 @@ layout Solritas - edismax + + *:* 10 *,score + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 text,features,name,sku,id,manu,cat 3 - + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 - + on cat manu_exact diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java index 74d719823..35613c02d 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/VitroTermNames.java @@ -54,4 +54,7 @@ public class VitroTermNames { /** rdfs:label lowercased, tokenized, stop words, stemmed **/ public static String NAME_STEMMED = "nameStemmed"; // was NAME + + /** field for beta values of all documents **/ + public static final String BETA = "BETA"; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java similarity index 96% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java index 8738a65c0..8feb00413 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/ContextNodesInclusionFactory.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java @@ -15,15 +15,17 @@ import com.hp.hpl.jena.query.QuerySolutionMap; import com.hp.hpl.jena.query.ResultSet; import com.hp.hpl.jena.query.Syntax; import com.hp.hpl.jena.rdf.model.Literal; +import com.hp.hpl.jena.rdf.model.Property; import com.hp.hpl.jena.rdf.model.RDFNode; import com.hp.hpl.jena.rdf.model.Resource; import com.hp.hpl.jena.rdf.model.ResourceFactory; +import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.shared.Lock; import edu.cornell.mannlib.vitro.webapp.dao.DisplayVocabulary; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; -public class ContextNodesInclusionFactory { +public class SearchQueryHandler { private OntModel fullModel; private String contextNodeURI; @@ -32,9 +34,9 @@ public class ContextNodesInclusionFactory { // private static final String queryForEducationalTraining = "SELECT ?query WHERE {" + // "?searchConfig <"+ DisplayVocabulary.QUERY_FOR_EDUCATIONAL_TRAINING + "> ?query . }"; - private static Log log = LogFactory.getLog(ContextNodesInclusionFactory.class); + private static Log log = LogFactory.getLog(SearchQueryHandler.class); - public ContextNodesInclusionFactory(String contextNodeURI, + public SearchQueryHandler(String contextNodeURI, OntModel displayOntModel, ServletContext context) { this.fullModel = ModelContext.getJenaOntModel(context); this.contextNodeURI = contextNodeURI; @@ -643,6 +645,22 @@ public class ContextNodesInclusionFactory { } - - + private int getTotalIndividuals(){ + return fullModel.listIndividuals().toList().size(); + } + + public float calculateBeta(String uri){ + float beta=0; + RDFNode node = (Resource) fullModel.getResource(uri); + StmtIterator stmtItr = fullModel.listStatements((Resource)null, (Property)null,node); + int Conn = 0; + while(stmtItr.hasNext()){ + stmtItr.next(); + Conn++; + } + + beta = (float)Conn/getTotalIndividuals(); + beta += 1; + return beta; + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index 1228ba45f..e13b823b3 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -23,7 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.ObjectPropertyStatement; import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; -import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; +import edu.cornell.mannlib.vitro.webapp.search.beans.SearchQueryHandler; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; @@ -110,7 +110,7 @@ public class Entity2LuceneDoc implements Obj2DocIface{ private IndividualProhibitedFromSearch individualProhibited; - private ContextNodesInclusionFactory contextNodesInclusionFactory; + private SearchQueryHandler searchQueryHandler; private static HashMap IndividualURIToObjectProperties = new HashMap(); @@ -118,10 +118,10 @@ public class Entity2LuceneDoc implements Obj2DocIface{ public Entity2LuceneDoc( ProhibitedFromSearch classesProhibitedFromSearch, - IndividualProhibitedFromSearch individualProhibited, ContextNodesInclusionFactory contextNodesInclusionFactory){ + IndividualProhibitedFromSearch individualProhibited, SearchQueryHandler searchQueryHandler){ this.classesProhibitedFromSearch = classesProhibitedFromSearch; this.individualProhibited = individualProhibited; - this.contextNodesInclusionFactory = contextNodesInclusionFactory; + this.searchQueryHandler = searchQueryHandler; } public boolean canTranslate(Object obj) { @@ -242,12 +242,12 @@ public class Entity2LuceneDoc implements Obj2DocIface{ String contextNodePropertyValues; // if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ - contextNodePropertyValues = contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI()); + contextNodePropertyValues = searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); // } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java index 6872596fa..08162714c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java @@ -42,7 +42,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; -import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; +import edu.cornell.mannlib.vitro.webapp.search.beans.SearchQueryHandler; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.servlet.setup.AbortStartup; @@ -120,7 +120,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { Entity2LuceneDoc translator = new Entity2LuceneDoc( new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), new IndividualProhibitedFromSearch(context), - new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context) + new SearchQueryHandler(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context) ); indexer.addObj2Doc(translator); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java index 3323dabe5..83dc5076a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetupCJK.java @@ -27,7 +27,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; -import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; +import edu.cornell.mannlib.vitro.webapp.search.beans.SearchQueryHandler; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; @@ -94,7 +94,7 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener { Entity2LuceneDoc translator = new Entity2LuceneDoc( new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), new IndividualProhibitedFromSearch(context), - new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); + new SearchQueryHandler(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); indexer.addObj2Doc(translator); indexer.setLuceneIndexFactory(lif); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index 4d2d1858d..df0b589eb 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -13,6 +13,10 @@ import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrInputDocument; import org.joda.time.DateTime; +import com.hp.hpl.jena.rdf.model.Property; +import com.hp.hpl.jena.rdf.model.RDFNode; +import com.hp.hpl.jena.rdf.model.Resource; +import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.vocabulary.OWL; import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement; @@ -23,7 +27,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.VClass; import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.VitroTermNames; -import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; +import edu.cornell.mannlib.vitro.webapp.search.beans.SearchQueryHandler; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; @@ -42,16 +46,16 @@ public class IndividualToSolrDocument implements Obj2DocIface { private IndividualProhibitedFromSearch individualProhibitedFromSearch; - private ContextNodesInclusionFactory contextNodesInclusionFactory; + private SearchQueryHandler searchQueryHandler; public IndividualToSolrDocument(ProhibitedFromSearch classesProhibitedFromSearch, IndividualProhibitedFromSearch individualProhibitedFromSearch, - ContextNodesInclusionFactory contextNodesInclusionFactory){ + SearchQueryHandler searchQueryHandler){ this.classesProhibitedFromSearch = classesProhibitedFromSearch; this.individualProhibitedFromSearch = individualProhibitedFromSearch; - this.contextNodesInclusionFactory = contextNodesInclusionFactory; + this.searchQueryHandler = searchQueryHandler; } @SuppressWarnings("static-access") @@ -156,12 +160,12 @@ public class IndividualToSolrDocument implements Obj2DocIface { long tContextNodes = System.currentTimeMillis(); String contextNodePropertyValues = ""; - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRole(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithPosition(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithRelationship(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); - contextNodePropertyValues += contextNodesInclusionFactory.getPropertiesAssociatedWithInformationResource(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); doc.addField(term.CONTEXTNODE, contextNodePropertyValues); @@ -237,8 +241,8 @@ public class IndividualToSolrDocument implements Obj2DocIface { log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements)); - doc.addField(term.ALLTEXT, value); - doc.addField(term.ALLTEXTUNSTEMMED, value); + doc.addField(term.ALLTEXT, value,ALL_TEXT_BOOST); + doc.addField(term.ALLTEXTUNSTEMMED, value,ALL_TEXT_BOOST); } return doc; @@ -282,5 +286,6 @@ public class IndividualToSolrDocument implements Obj2DocIface { } public static float NAME_BOOST = 3.0F; + public static float ALL_TEXT_BOOST = 2.0F; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java index 0f142c6bb..3d910fb45 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java @@ -23,7 +23,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener; -import edu.cornell.mannlib.vitro.webapp.search.beans.ContextNodesInclusionFactory; +import edu.cornell.mannlib.vitro.webapp.search.beans.SearchQueryHandler; import edu.cornell.mannlib.vitro.webapp.search.beans.IndividualProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; @@ -76,7 +76,7 @@ public class SolrSetup implements javax.servlet.ServletContextListener{ IndividualToSolrDocument indToSolrDoc = new IndividualToSolrDocument( new ProhibitedFromSearch(DisplayVocabulary.PRIMARY_LUCENE_INDEX_URI, displayOntModel), new IndividualProhibitedFromSearch(context), - new ContextNodesInclusionFactory(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); + new SearchQueryHandler(DisplayVocabulary.CONTEXT_NODES_URI, displayOntModel, context)); List o2d = new ArrayList(); o2d.add(indToSolrDoc); From 4943a966fec383e375fc42e3835e7d9a5c4fef2c Mon Sep 17 00:00:00 2001 From: anupsawant Date: Tue, 24 May 2011 18:29:50 +0000 Subject: [PATCH 10/38] --- .../webapp/search/beans/SearchQueryHandler.java | 10 ++++------ .../webapp/search/lucene/Entity2LuceneDoc.java | 8 ++++---- .../search/solr/IndividualToSolrDocument.java | 15 +++++++++------ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java index 8feb00413..ded894f8f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java @@ -29,6 +29,7 @@ public class SearchQueryHandler { private OntModel fullModel; private String contextNodeURI; + private int totalInd; //private String query = ""; // private static final String queryForEducationalTraining = "SELECT ?query WHERE {" + @@ -40,6 +41,7 @@ public class SearchQueryHandler { OntModel displayOntModel, ServletContext context) { this.fullModel = ModelContext.getJenaOntModel(context); this.contextNodeURI = contextNodeURI; + this.totalInd = fullModel.listIndividuals().toList().size(); //query = getQueryFromModel(contextNodeURI, displayOntModel); } @@ -644,11 +646,7 @@ public class SearchQueryHandler { return propertyValues.toString(); } - - private int getTotalIndividuals(){ - return fullModel.listIndividuals().toList().size(); - } - + public float calculateBeta(String uri){ float beta=0; RDFNode node = (Resource) fullModel.getResource(uri); @@ -659,7 +657,7 @@ public class SearchQueryHandler { Conn++; } - beta = (float)Conn/getTotalIndividuals(); + beta = (float)Conn/totalInd; beta += 1; return beta; } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java index e13b823b3..37f8f2aba 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/Entity2LuceneDoc.java @@ -242,17 +242,17 @@ public class Entity2LuceneDoc implements Obj2DocIface{ String contextNodePropertyValues; // if(ent.isVClass("http://xmlns.com/foaf/0.1/Person")){ - contextNodePropertyValues = searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); + /*contextNodePropertyValues = searchQueryHandler.getPropertiesAssociatedWithEducationalTraining(ent.getURI()); contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRole(ent.getURI()); contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithPosition(ent.getURI()); contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithRelationship(ent.getURI()); contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithAwardReceipt(ent.getURI()); - contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); + contextNodePropertyValues += searchQueryHandler.getPropertiesAssociatedWithInformationResource(ent.getURI()); */ // } - Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED ); - doc.add(contextNodeInformation); + /* Field contextNodeInformation = new Field(term.CONTEXTNODE, contextNodePropertyValues, Field.Store.YES, Field.Index.ANALYZED ); + doc.add(contextNodeInformation);*/ //Moniker diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index df0b589eb..873b65175 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -71,6 +71,9 @@ public class IndividualToSolrDocument implements Obj2DocIface { String classPublicNames = ""; SolrInputDocument doc = new SolrInputDocument(); + float beta = searchQueryHandler.calculateBeta(ent.getURI()); + doc.addField(term.BETA,beta); + //DocId String id = ent.getURI(); log.debug("translating " + id); @@ -152,10 +155,10 @@ public class IndividualToSolrDocument implements Obj2DocIface { value = ent.getLocalName(); } - doc.addField(term.NAME_RAW, value, NAME_BOOST); - doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),NAME_BOOST); - doc.addField(term.NAME_UNSTEMMED, value,NAME_BOOST); - doc.addField(term.NAME_STEMMED, value, NAME_BOOST); + doc.addField(term.NAME_RAW, value, (NAME_BOOST*beta)); + doc.addField(term.NAME_LOWERCASE, value.toLowerCase(),(NAME_BOOST*beta)); + doc.addField(term.NAME_UNSTEMMED, value,(NAME_BOOST*beta)); + doc.addField(term.NAME_STEMMED, value, (NAME_BOOST*beta)); long tContextNodes = System.currentTimeMillis(); @@ -241,8 +244,8 @@ public class IndividualToSolrDocument implements Obj2DocIface { log.debug("time to include data property statements, object property statements in the index: " + Long.toString(System.currentTimeMillis() - tPropertyStatements)); - doc.addField(term.ALLTEXT, value,ALL_TEXT_BOOST); - doc.addField(term.ALLTEXTUNSTEMMED, value,ALL_TEXT_BOOST); + doc.addField(term.ALLTEXT, value,(ALL_TEXT_BOOST*beta)); + doc.addField(term.ALLTEXTUNSTEMMED, value,(ALL_TEXT_BOOST*beta)); } return doc; From ecca050bbc8e9e20ab0f82e3debbed621ed1648b Mon Sep 17 00:00:00 2001 From: anupsawant Date: Tue, 24 May 2011 19:17:46 +0000 Subject: [PATCH 11/38] --- .../vitro/webapp/search/solr/IndividualToSolrDocument.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java index 873b65175..d748c077a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java @@ -71,8 +71,9 @@ public class IndividualToSolrDocument implements Obj2DocIface { String classPublicNames = ""; SolrInputDocument doc = new SolrInputDocument(); - float beta = searchQueryHandler.calculateBeta(ent.getURI()); - doc.addField(term.BETA,beta); + //float beta = searchQueryHandler.calculateBeta(ent.getURI()); + //doc.addField(term.BETA,beta); + float beta =1; //DocId String id = ent.getURI(); From 7d14c52c122e301219eac523028846c0046c6b5e Mon Sep 17 00:00:00 2001 From: anupsawant Date: Wed, 25 May 2011 21:43:32 +0000 Subject: [PATCH 12/38] --- solr/exampleSolr/conf/schema.xml | 9 ++++--- solr/exampleSolr/conf/solrconfig.xml | 3 ++- .../vitro/webapp/search/VitroTermNames.java | 4 +++ .../search/beans/SearchQueryHandler.java | 1 + .../controller/SolrPagedSearchController.java | 1 + .../search/solr/IndividualToSolrDocument.java | 26 +++++++++++-------- .../vitro/webapp/search/solr/SolrIndexer.java | 2 +- 7 files changed, 30 insertions(+), 16 deletions(-) diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index 7cd9c70c8..89dd80135 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xml @@ -381,7 +381,7 @@ - + @@ -493,9 +493,11 @@ + + @@ -577,7 +579,7 @@ DocId - + ALLTEXT @@ -600,7 +602,8 @@ - + + dismax - nameRaw nameLowercase nameUnstemmed nameStemmed ALLTEXT ALLTEXTUNSTEMMED + nameRaw nameLowercase nameUnstemmed nameStemmed ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC explicit + BETA 10 dismax - nameRaw nameLowercase nameUnstemmed nameStemmed ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC + nameRaw nameLowercase nameUnstemmed nameStemmed ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC contextNode explicit BETA 10 diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java index 1bf79ed59..43bd60293 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/beans/SearchQueryHandler.java @@ -168,42 +168,42 @@ public class SearchQueryHandler { if(hrJobTitle != null){ propertyValues.append(" " + hrJobTitle.toString()); }else{ - log.warn("hrJobTitle is null "); + log.debug("hrJobTitle is null "); } RDFNode involvedOrganizationName = soln.get("involvedOrganizationName"); if(involvedOrganizationName != null){ propertyValues.append(" " + involvedOrganizationName.toString()); }else{ - log.warn("involvedOrganizationName is null "); + log.debug("involvedOrganizationName is null "); } RDFNode positionForPerson = soln.get("positionForPerson"); if(positionForPerson != null){ propertyValues.append(" " + positionForPerson.toString()); }else{ - log.warn("positionForPerson is null "); + log.debug("positionForPerson is null "); } RDFNode positionInOrganization = soln.get("positionInOrganization"); if(positionInOrganization != null){ propertyValues.append(" " + positionInOrganization.toString()); }else{ - log.warn("positionInOrganization is null "); + log.debug("positionInOrganization is null "); } RDFNode titleOrRole = soln.get("titleOrRole"); if(titleOrRole != null){ propertyValues.append(" " + titleOrRole.toString()); }else{ - log.warn("titleOrRole is null "); + log.debug("titleOrRole is null "); } RDFNode positionLabel = soln.get("positionLabel"); if(positionLabel != null){ propertyValues.append(" " + positionLabel.toString()); }else{ - log.warn("positionLabel is null "); + log.debug("positionLabel is null "); } } @@ -268,28 +268,28 @@ public class SearchQueryHandler { if(advisee != null){ propertyValues.append(" " + advisee.toString()); }else{ - log.warn("advisee is null "); + log.debug("advisee is null "); } RDFNode degreeCandidacy = soln.get("degreeCandidacy"); if(degreeCandidacy != null){ propertyValues.append(" " + degreeCandidacy.toString()); }else{ - log.warn("degreeCandidacy is null "); + log.debug("degreeCandidacy is null "); } RDFNode linkedAuthor = soln.get("linkedAuthor"); if(linkedAuthor != null){ propertyValues.append(" " + linkedAuthor.toString()); }else{ - log.warn("linkedAuthor is null "); + log.debug("linkedAuthor is null "); } RDFNode linkedInformationResource = soln.get("linkedInformationResource"); if(linkedInformationResource != null){ propertyValues.append(" " + linkedInformationResource.toString()); }else{ - log.warn("linkedInformationResource is null "); + log.debug("linkedInformationResource is null "); } } @@ -355,28 +355,28 @@ public class SearchQueryHandler { if(awardConferredBy != null){ propertyValues.append(" " + awardConferredBy.toString()); }else{ - log.warn("awardConferredBy is null "); + log.debug("awardConferredBy is null "); } RDFNode awardOrHonorFor = soln.get("awardOrHonorFor"); if(awardOrHonorFor != null){ propertyValues.append(" " + awardOrHonorFor.toString()); }else{ - log.warn("awardOrHonorFor is null "); + log.debug("awardOrHonorFor is null "); } RDFNode description = soln.get("description"); if(description != null){ propertyValues.append(" " + description.toString()); }else{ - log.warn("description is null "); + log.debug("description is null "); } RDFNode awardReceiptLabel = soln.get("awardReceiptLabel"); if(awardReceiptLabel != null){ propertyValues.append(" " + awardReceiptLabel.toString()); }else{ - log.warn("awardReceiptLabel is null "); + log.debug("awardReceiptLabel is null "); } } @@ -502,35 +502,35 @@ public class SearchQueryHandler { if(academicDegreeLabel != null){ propertyValues.append(" " + academicDegreeLabel.toString()); }else{ - log.warn("academicDegreeLabel is null "); + log.debug("academicDegreeLabel is null "); } RDFNode academicDegreeAbbreviation = soln.get("academicDegreeAbbreviation"); if(academicDegreeAbbreviation != null){ propertyValues.append(" " + academicDegreeAbbreviation.toString()); }else{ - log.warn("academicDegreeAbbreviation is null "); + log.debug("academicDegreeAbbreviation is null "); } RDFNode majorField = soln.get("majorField"); if(majorField != null){ propertyValues.append(" " + majorField.toString()); }else{ - log.warn("majorField is null "); + log.debug("majorField is null "); } RDFNode trainingAtDepartmentOrSchool = soln.get("departmentOrSchool"); if(trainingAtDepartmentOrSchool != null){ propertyValues.append(" " + trainingAtDepartmentOrSchool.toString()); }else{ - log.warn("trainingAtDepartmentOrSchool is null "); + log.debug("trainingAtDepartmentOrSchool is null "); } RDFNode trainingAtOrganizationLabel = soln.get("trainingAtOrganizationLabel"); if(trainingAtOrganizationLabel != null){ propertyValues.append(" " + trainingAtOrganizationLabel.toString()); }else{ - log.warn("trainingAtOrganizationLabel is null "); + log.debug("trainingAtOrganizationLabel is null "); } } @@ -596,42 +596,42 @@ public class SearchQueryHandler { if(linkedAuthor != null){ propertyValues.append(" " + linkedAuthor.toString()); }else{ - log.warn("linkedAuthor is null "); + log.debug("linkedAuthor is null "); } RDFNode linkedInformationResource = soln.get("linkedInformationResource"); if(linkedInformationResource != null){ propertyValues.append(" " + linkedInformationResource.toString()); }else{ - log.warn("linkedInformationResource is null "); + log.debug("linkedInformationResource is null "); } RDFNode editor = soln.get("editor"); if(editor != null){ propertyValues.append(" " + editor.toString()); }else{ - log.warn("editor is null "); + log.debug("editor is null "); } RDFNode subjectArea = soln.get("subjectArea"); if(subjectArea != null){ propertyValues.append(" " + subjectArea.toString()); }else{ - log.warn("subjectArea is null "); + log.debug("subjectArea is null "); } RDFNode researchAreaOf = soln.get("researchAreaOf"); if(researchAreaOf != null){ propertyValues.append(" " + researchAreaOf.toString()); }else{ - log.warn("researchAreaOf is null "); + log.debug("researchAreaOf is null "); } RDFNode features = soln.get("features"); if(features != null){ propertyValues.append(" " + features.toString()); }else{ - log.warn("features is null "); + log.debug("features is null "); } } From b6666dd03914010b3fd9308791dce1e5755d5891 Mon Sep 17 00:00:00 2001 From: anupsawant Date: Thu, 26 May 2011 04:34:14 +0000 Subject: [PATCH 14/38] --- solr/exampleSolr/conf/schema.xml | 8 +- solr/exampleSolr/conf/syn.txt | 44722 +++++++++++++++++++++++++++++ 2 files changed, 44726 insertions(+), 4 deletions(-) create mode 100644 solr/exampleSolr/conf/syn.txt diff --git a/solr/exampleSolr/conf/schema.xml b/solr/exampleSolr/conf/schema.xml index 89dd80135..6cb0c82ee 100644 --- a/solr/exampleSolr/conf/schema.xml +++ b/solr/exampleSolr/conf/schema.xml @@ -222,9 +222,9 @@ - + + + + @@ -251,6 +251,7 @@ + @@ -501,8 +502,10 @@ - + + + --> diff --git a/solr/exampleSolr/conf/solrconfig.xml b/solr/exampleSolr/conf/solrconfig.xml index e3e2a7ab7..03378f9d6 100644 --- a/solr/exampleSolr/conf/solrconfig.xml +++ b/solr/exampleSolr/conf/solrconfig.xml @@ -708,9 +708,10 @@ --> dismax - nameRaw nameLowercase nameUnstemmed nameStemmed ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC contextNode + nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo + targetInfo explicit - BETA + 1 10 @@ -582,7 +582,7 @@ DocId - ALLTEXT + diff --git a/solr/exampleSolr/conf/solrconfig.xml b/solr/exampleSolr/conf/solrconfig.xml index 03378f9d6..a8e7b22da 100644 --- a/solr/exampleSolr/conf/solrconfig.xml +++ b/solr/exampleSolr/conf/solrconfig.xml @@ -711,7 +711,8 @@ nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo targetInfo explicit - 1 + 2 + 2 10 - dismax + edismax nameRaw nameLowercase ALLTEXT ALLTEXTUNSTEMMED ALLTEXT_PHONETIC NAME_PHONETIC nameUnstemmed nameStemmed targetInfo targetInfo explicit 2 2 10 + *:* + *,score