From 8bd0990e85e89c62cf3a3869053313f3774a3093 Mon Sep 17 00:00:00 2001 From: briancaruso Date: Tue, 26 Jun 2012 17:00:08 +0000 Subject: [PATCH] Index all labels for an individual, to support searching on multiple labels and other-language versions of a label NIHVIVO-3811 Fixed number of threads used during a solr index update Changed nameLowercaseSingleValue to not be a copy field Changed logging in solr index building so that errors from the solr server are logged --- solr/homeDirectoryTemplate/conf/schema.xml | 4 +- .../rdfservice/impl/RDFServiceUtils.java | 6 +- .../webapp/search/indexing/IndexBuilder.java | 32 ++++----- .../search/indexing/IndexWorkerThread.java | 6 +- .../vitro/webapp/search/solr/SolrIndexer.java | 1 + .../vitro/webapp/search/solr/SolrSetup.java | 18 ++++- .../CalculateParameters.java | 2 +- .../ContextNodeFields.java | 2 +- .../DocumentModifier.java | 3 +- .../ExcludeBasedOnNamespace.java | 2 +- .../ExcludeBasedOnType.java | 2 +- .../ExcludeBasedOnTypeNamespace.java | 2 +- .../ExcludeNonFlagVitro.java | 2 +- .../IndividualToSolrDocument.java | 45 ++++++------ .../{ => documentBuilding}/NameBoost.java | 2 +- .../solr/documentBuilding/NameFields.java | 72 +++++++++++++++++++ .../SearchIndexExcluder.java | 2 +- .../SkipIndividualException.java | 4 +- .../SourceInstitution.java | 2 +- .../SyncingExcludeBasedOnType.java | 2 +- .../ThumbnailImageURL.java | 2 +- .../search/solr/ThumbnailImageURLTest.java | 4 +- 22 files changed, 153 insertions(+), 64 deletions(-) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/CalculateParameters.java (99%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/ContextNodeFields.java (98%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/DocumentModifier.java (88%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/ExcludeBasedOnNamespace.java (92%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/ExcludeBasedOnType.java (95%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/ExcludeBasedOnTypeNamespace.java (96%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/ExcludeNonFlagVitro.java (95%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/IndividualToSolrDocument.java (96%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/NameBoost.java (95%) create mode 100644 webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/NameFields.java rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/SearchIndexExcluder.java (87%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/SkipIndividualException.java (58%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/SourceInstitution.java (93%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/SyncingExcludeBasedOnType.java (98%) rename webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/{ => documentBuilding}/ThumbnailImageURL.java (98%) diff --git a/solr/homeDirectoryTemplate/conf/schema.xml b/solr/homeDirectoryTemplate/conf/schema.xml index d979805c6..3e3aef549 100644 --- a/solr/homeDirectoryTemplate/conf/schema.xml +++ b/solr/homeDirectoryTemplate/conf/schema.xml @@ -246,9 +246,9 @@ - - + + diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceUtils.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceUtils.java index 673de350f..97a98252c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceUtils.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/rdfservice/impl/RDFServiceUtils.java @@ -23,10 +23,11 @@ import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ModelSerialization import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; -import edu.cornell.mannlib.vitro.webapp.search.solr.ContextNodeFields; public class RDFServiceUtils { + static Log log = LogFactory.getLog(RDFServiceUtils.class); + private static final String RDFSERVICEFACTORY_ATTR = RDFServiceUtils.class.getName() + ".RDFServiceFactory"; private static final String RDFSERVICEFACTORY_FILTERING_ATTR = @@ -97,8 +98,7 @@ public class RDFServiceUtils { InputStream resultStream = rdfService.sparqlSelectQuery(query, RDFService.ResultFormat.JSON); resultSet = ResultSetFactory.fromJSON(resultStream); return resultSet; - } catch (RDFServiceException e) { - Log log = LogFactory.getLog(ContextNodeFields.class); + } catch (RDFServiceException e) { log.error("error executing sparql select query: " + e.getMessage()); } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java index 8c5cb302d..67b8419e6 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java @@ -72,15 +72,17 @@ public class IndexBuilder extends VitroBackgroundThread { public static final String FLAG_REBUILDING = "rebuilding"; /** Number of threads to use during indexing. */ - protected int numberOfThreads = 10; + //protected int numberOfThreads = 10; /** List of IndexingEventListeners */ protected LinkedList indexingEventListeners = new LinkedList(); - public static final int MAX_REINDEX_THREADS= 10; - public static final int MAX_UPDATE_THREADS= 10; - public static final int MAX_THREADS = Math.max( MAX_UPDATE_THREADS, MAX_REINDEX_THREADS); + /** number of threads to use during a full index rebuild. */ + public static final int REINDEX_THREADS= 1; + + /** Max threads to use during an update. Smaller updates will use fewer threads. */ + public static final int MAX_UPDATE_THREADS= 1; private static final Log log = LogFactory.getLog(IndexBuilder.class); @@ -285,9 +287,8 @@ public class IndexBuilder extends VitroBackgroundThread { log.debug("Getting all URIs in the model"); Iterator uris = wdf.getIndividualDao().getAllOfThisTypeIterator(); - - this.numberOfThreads = MAX_REINDEX_THREADS; - doBuild(uris, Collections.emptyList() ); + + doBuild(uris, Collections.emptyList(), REINDEX_THREADS ); if( log != null ) //log might be null if system is shutting down. log.info("Rebuild of search index is complete."); @@ -298,8 +299,10 @@ public class IndexBuilder extends VitroBackgroundThread { UriLists uriLists = makeAddAndDeleteLists( changedStatementsToUris() ); - this.numberOfThreads = Math.max( MAX_UPDATE_THREADS, uriLists.updatedUris.size() / 20); - doBuild( uriLists.updatedUris.iterator(), uriLists.deletedUris ); + int numberOfThreads = + Math.min( MAX_UPDATE_THREADS, + Math.max( uriLists.updatedUris.size() / 100, 1)); + doBuild( uriLists.updatedUris.iterator(), uriLists.deletedUris , numberOfThreads); log.debug("Ending updateIndex()"); } @@ -318,7 +321,7 @@ public class IndexBuilder extends VitroBackgroundThread { * to false, and a check is made before adding, it will work fine; but * checking if an object is on the index is slow. */ - private void doBuild(Iterator updates, Collection deletes ){ + private void doBuild(Iterator updates, Collection deletes, int numberOfThreads ){ boolean updateRequested = ! reindexRequested; try { @@ -341,7 +344,7 @@ public class IndexBuilder extends VitroBackgroundThread { } } - indexUriList(updates); + indexUriList(updates, numberOfThreads); } catch (Exception e) { if( log != null) log.debug("Exception during indexing",e); @@ -354,12 +357,7 @@ public class IndexBuilder extends VitroBackgroundThread { * Use the back end indexer to index each object that the Iterator returns. * @throws AbortIndexing */ - private void indexUriList(Iterator updateUris ) { - //make a copy of numberOfThreads so the local copy is safe during this method. - int numberOfThreads = this.numberOfThreads; - if( numberOfThreads > MAX_THREADS ) - numberOfThreads = MAX_THREADS; - + private void indexUriList(Iterator updateUris , int numberOfThreads) { //make lists of work URIs for workers List> workLists = makeWorkerUriLists(updateUris, numberOfThreads); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexWorkerThread.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexWorkerThread.java index d7f75d184..6008d330f 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexWorkerThread.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexWorkerThread.java @@ -11,7 +11,7 @@ import org.apache.commons.logging.LogFactory; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.beans.IndexerIface; -import edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.IndividualToSolrDocument; class IndexWorkerThread extends Thread{ @@ -83,8 +83,8 @@ class IndexWorkerThread extends Thread{ } }catch(Throwable th){ //on tomcat shutdown odd exceptions get thrown and log can be null - if( log != null ) - log.debug("Exception during index building",th); + if( log != null && ! stopRequested ) + log.error("Exception during index building",th); } } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrIndexer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrIndexer.java index f2e6b3e8d..100620926 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrIndexer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrIndexer.java @@ -20,6 +20,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.search.beans.IndexerIface; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.IndividualToSolrDocument; public class SolrIndexer implements IndexerIface { diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java index a3e4ad92e..6dc2a7d8d 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SolrSetup.java @@ -30,6 +30,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils; import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters; import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext; +import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils; import edu.cornell.mannlib.vitro.webapp.search.beans.FileBasedProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch; import edu.cornell.mannlib.vitro.webapp.search.beans.StatementToURIsToUpdate; @@ -40,6 +41,17 @@ import edu.cornell.mannlib.vitro.webapp.search.indexing.AdditionalURIsForTypeSta import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder; import edu.cornell.mannlib.vitro.webapp.search.indexing.SearchReindexingListener; import edu.cornell.mannlib.vitro.webapp.search.indexing.URIsForClassGroupChange; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.DocumentModifier; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnNamespace; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnType; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnTypeNamespace; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeNonFlagVitro; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.IndividualToSolrDocument; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.NameBoost; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.NameFields; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SearchIndexExcluder; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SyncingExcludeBasedOnType; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL; import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus; public class SolrSetup implements javax.servlet.ServletContextListener{ @@ -120,9 +132,9 @@ public class SolrSetup implements javax.servlet.ServletContextListener{ if( modifiers == null ) modifiers = new ArrayList(); - - modifiers.add(new NameBoost( 1.2f )); - modifiers.add(new ThumbnailImageURL(jenaOntModel)); + modifiers.add( new NameFields( RDFServiceUtils.getRDFServiceFactory(context))); + modifiers.add( new NameBoost( 1.2f )); + modifiers.add( new ThumbnailImageURL(jenaOntModel)); /* try to get context attribute SearchIndexExcludes * and use that as the start of the list of exclude diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/CalculateParameters.java similarity index 99% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/CalculateParameters.java index efb0ec413..0ae3eca19 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/CalculateParameters.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/CalculateParameters.java @@ -1,6 +1,6 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.ArrayList; import java.util.HashSet; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ContextNodeFields.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ContextNodeFields.java similarity index 98% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ContextNodeFields.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ContextNodeFields.java index b4e3967c9..579c7bd1e 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ContextNodeFields.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ContextNodeFields.java @@ -1,5 +1,5 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.ArrayList; import java.util.Collection; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/DocumentModifier.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/DocumentModifier.java similarity index 88% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/DocumentModifier.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/DocumentModifier.java index b3b960451..28259ece6 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/DocumentModifier.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/DocumentModifier.java @@ -1,11 +1,10 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import org.apache.solr.common.SolrInputDocument; import edu.cornell.mannlib.vitro.webapp.beans.Individual; - /** * This interface represents an object that can add to a SolrInputDocument. */ diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnNamespace.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnNamespace.java similarity index 92% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnNamespace.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnNamespace.java index 8bf0e2aae..088fe6fb2 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnNamespace.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnNamespace.java @@ -1,5 +1,5 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.Arrays; import java.util.List; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnType.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnType.java similarity index 95% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnType.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnType.java index e61e6f4ce..3d9515143 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnType.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnType.java @@ -1,5 +1,5 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.ArrayList; import java.util.Arrays; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnTypeNamespace.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnTypeNamespace.java similarity index 96% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnTypeNamespace.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnTypeNamespace.java index a2219d8a6..9e864f898 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeBasedOnTypeNamespace.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeBasedOnTypeNamespace.java @@ -1,5 +1,5 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.Arrays; import java.util.Collections; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeNonFlagVitro.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeNonFlagVitro.java similarity index 95% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeNonFlagVitro.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeNonFlagVitro.java index ef9a431d9..3381423d0 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ExcludeNonFlagVitro.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ExcludeNonFlagVitro.java @@ -1,5 +1,5 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.List; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/IndividualToSolrDocument.java similarity index 96% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/IndividualToSolrDocument.java index 0064b5b03..a08d4238c 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/IndividualToSolrDocument.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/IndividualToSolrDocument.java @@ -1,6 +1,7 @@ + /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.ArrayList; import java.util.HashMap; @@ -60,10 +61,10 @@ public class IndividualToSolrDocument { //vitro id doc.addField(term.URI, ind.getURI()); - //Individual Label - addLabel( ind, doc ); - - //add classes, classgroups get if prohibied becasue of its class + //get label from ind + addLabel(ind, doc); + + //add classes, classgroups get if prohibited because of its class StringBuffer classPublicNames = new StringBuffer(""); addClasses(ind, doc, classPublicNames); @@ -72,7 +73,7 @@ public class IndividualToSolrDocument { StringBuffer addUri = new StringBuffer(""); addObjectPropertyText(ind, doc, objectNames, addUri); - //time of index in millis past epoc + //time of index in msec past epoch doc.addField(term.INDEXEDTIME, new Long( (new DateTime()).getMillis() ) ); addAllText( ind, doc, classPublicNames, objectNames ); @@ -89,10 +90,10 @@ public class IndividualToSolrDocument { //indicates that this individual should not be indexed by returning null log.debug(ex); return null; - }catch(Throwable th){ + }catch(Exception th){ //Odd exceptions can get thrown on shutdown if( log != null ) - log.debug(th); + log.error(th,th); return null; } } @@ -190,19 +191,7 @@ public class IndividualToSolrDocument { doc.addField(term.ALLTEXT_PHONETIC, alltext); } - protected void addLabel(Individual ind, SolrInputDocument doc) { - String value = ""; - String label = ind.getRdfsLabel(); - if (label != null) { - value = label; - } else { - value = ind.getLocalName(); - } - doc.addField(term.NAME_RAW, value); - // NAME_RAW will be copied by solr into the following fields: - // NAME_LOWERCASE, NAME_UNSTEMMED, NAME_STEMMED, NAME_PHONETIC, AC_NAME_UNTOKENIZED, AC_NAME_STEMMED - } @@ -278,6 +267,22 @@ public class IndividualToSolrDocument { } } } + + protected void addLabel(Individual ind, SolrInputDocument doc) { + String value = ""; + String label = ind.getRdfsLabel(); + if (label != null) { + value = label; + } else { + value = ind.getLocalName(); + } + + doc.addField(term.NAME_RAW, value); + doc.addField(term.NAME_LOWERCASE_SINGLE_VALUED,value); + + // NAME_RAW will be copied by solr into the following fields: + // NAME_LOWERCASE, NAME_UNSTEMMED, NAME_STEMMED, NAME_PHONETIC, AC_NAME_UNTOKENIZED, AC_NAME_STEMMED + } public Object getIndexId(Object obj) { throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented"); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/NameBoost.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/NameBoost.java similarity index 95% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/NameBoost.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/NameBoost.java index 63aaa8698..1b0613b99 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/NameBoost.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/NameBoost.java @@ -1,6 +1,6 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputField; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/NameFields.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/NameFields.java new file mode 100644 index 000000000..3a76ac9ef --- /dev/null +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/NameFields.java @@ -0,0 +1,72 @@ +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.solr.common.SolrInputDocument; + +import edu.cornell.mannlib.vitro.webapp.beans.Individual; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException; +import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory; +import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames; + +/** + * Adds all labels to name fields, not just the one returned by Indivdiual.getName(). + */ +public class NameFields implements DocumentModifier { + RDFServiceFactory rsf; + + public static final VitroSearchTermNames term = new VitroSearchTermNames(); + public static final Log log = LogFactory.getLog(NameFields.class.getName()); + + public NameFields( RDFServiceFactory rsf){ + this.rsf = rsf; + } + + @Override + public void modifyDocument(Individual ind, SolrInputDocument doc, + StringBuffer addUri) throws SkipIndividualException { + if( ind == null || ind.getURI() == null ){ + return; + } + + //also run SPARQL query to get rdfs:label values + String query = + "SELECT ?label WHERE { " + + "<" + ind.getURI() + "> " + + " ?label }"; + + try { + RDFService rdfService = rsf.getRDFService(); + BufferedReader stream = + new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV))); + + StringBuffer buffer = new StringBuffer(); + String line; + + //throw out first line since it is just a header + stream.readLine(); + + while( (line = stream.readLine()) != null ){ + buffer.append(line).append(' '); + } + + log.debug("Adding labels for " + ind.getURI() + " \"" + buffer.toString() + "\""); + doc.addField(term.NAME_RAW, buffer.toString()); + + } catch (RDFServiceException e) { + log.error("could not get the rdfs:label for " + ind.getURI(), e); + } catch (IOException e) { + log.error("could not get the rdfs:label for " + ind.getURI(), e); + } + + } + + @Override + public void shutdown() { /*nothing to do */ } +} diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SearchIndexExcluder.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SearchIndexExcluder.java similarity index 87% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SearchIndexExcluder.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SearchIndexExcluder.java index 1507d9397..83678288a 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SearchIndexExcluder.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SearchIndexExcluder.java @@ -1,5 +1,5 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import edu.cornell.mannlib.vitro.webapp.beans.Individual; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SkipIndividualException.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SkipIndividualException.java similarity index 58% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SkipIndividualException.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SkipIndividualException.java index 5d2848ce6..ef5bc7a79 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SkipIndividualException.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SkipIndividualException.java @@ -1,8 +1,8 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; -class SkipIndividualException extends Exception{ +public class SkipIndividualException extends Exception{ public SkipIndividualException(String string) { super(string); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SourceInstitution.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SourceInstitution.java similarity index 93% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SourceInstitution.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SourceInstitution.java index 62fd2b841..23e96c08e 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SourceInstitution.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SourceInstitution.java @@ -1,6 +1,6 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import org.apache.solr.common.SolrInputDocument; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SyncingExcludeBasedOnType.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SyncingExcludeBasedOnType.java similarity index 98% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SyncingExcludeBasedOnType.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SyncingExcludeBasedOnType.java index a6ed8161f..cccc6fe6e 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/SyncingExcludeBasedOnType.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/SyncingExcludeBasedOnType.java @@ -1,5 +1,5 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.ArrayList; import java.util.List; diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ThumbnailImageURL.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ThumbnailImageURL.java similarity index 98% rename from webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ThumbnailImageURL.java rename to webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ThumbnailImageURL.java index 0cdfbe786..3fc9d32d6 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/ThumbnailImageURL.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/solr/documentBuilding/ThumbnailImageURL.java @@ -1,6 +1,6 @@ /* $This file is distributed under the terms of the license in /doc/license.txt$ */ -package edu.cornell.mannlib.vitro.webapp.search.solr; +package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding; import java.util.Iterator; diff --git a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/solr/ThumbnailImageURLTest.java b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/solr/ThumbnailImageURLTest.java index 968f6e3b8..39fc0b418 100644 --- a/webapp/test/edu/cornell/mannlib/vitro/webapp/search/solr/ThumbnailImageURLTest.java +++ b/webapp/test/edu/cornell/mannlib/vitro/webapp/search/solr/ThumbnailImageURLTest.java @@ -23,6 +23,8 @@ import edu.cornell.mannlib.vitro.testing.AbstractTestClass; import edu.cornell.mannlib.vitro.webapp.beans.Individual; import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl; import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SkipIndividualException; +import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL; /** * @author bdc34 @@ -49,7 +51,7 @@ public class ThumbnailImageURLTest extends AbstractTestClass{ } /** - * Test method for {@link edu.cornell.mannlib.vitro.webapp.search.solr.ThumbnailImageURL#modifyDocument(edu.cornell.mannlib.vitro.webapp.beans.Individual, org.apache.solr.common.SolrInputDocument, java.lang.StringBuffer)}. + * Test method for {@link edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL#modifyDocument(edu.cornell.mannlib.vitro.webapp.beans.Individual, org.apache.solr.common.SolrInputDocument, java.lang.StringBuffer)}. */ @Test public void testModifyDocument() {