Index all labels for an individual, to support searching on multiple labels and other-language versions of a label NIHVIVO-3811

Fixed number of threads used during a solr index update
Changed nameLowercaseSingleValue to not be a copy field
Changed logging in solr index building so that errors from the solr server are logged
This commit is contained in:
briancaruso 2012-06-26 17:00:08 +00:00
parent 1a30302241
commit 8bd0990e85
22 changed files with 153 additions and 64 deletions

View file

@ -246,9 +246,9 @@
<copyField source="nameRaw" dest="nameLowercase" />
<copyField source="nameRaw" dest="NAME_PHONETIC" />
<copyField source="nameRaw" dest="acNameUntokenized" />
<copyField source="nameRaw" dest="acNameStemmed" />
<copyField source="nameRaw" dest="nameLowercaseSingleValued" />
<copyField source="nameRaw" dest="acNameStemmed" />
<copyField source="nameRaw" dest="nameText" />
<!-- nameLowercaseSingleValued is not copied from nameRaw becasue nameRaw might have multiple values -->
<!-- **************************** End Vitro Fields *************************** -->

View file

@ -23,10 +23,11 @@ import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ModelSerialization
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.search.solr.ContextNodeFields;
public class RDFServiceUtils {
static Log log = LogFactory.getLog(RDFServiceUtils.class);
private static final String RDFSERVICEFACTORY_ATTR =
RDFServiceUtils.class.getName() + ".RDFServiceFactory";
private static final String RDFSERVICEFACTORY_FILTERING_ATTR =
@ -97,8 +98,7 @@ public class RDFServiceUtils {
InputStream resultStream = rdfService.sparqlSelectQuery(query, RDFService.ResultFormat.JSON);
resultSet = ResultSetFactory.fromJSON(resultStream);
return resultSet;
} catch (RDFServiceException e) {
Log log = LogFactory.getLog(ContextNodeFields.class);
} catch (RDFServiceException e) {
log.error("error executing sparql select query: " + e.getMessage());
}

View file

@ -72,15 +72,17 @@ public class IndexBuilder extends VitroBackgroundThread {
public static final String FLAG_REBUILDING = "rebuilding";
/** Number of threads to use during indexing. */
protected int numberOfThreads = 10;
//protected int numberOfThreads = 10;
/** List of IndexingEventListeners */
protected LinkedList<IndexingEventListener> indexingEventListeners =
new LinkedList<IndexingEventListener>();
public static final int MAX_REINDEX_THREADS= 10;
public static final int MAX_UPDATE_THREADS= 10;
public static final int MAX_THREADS = Math.max( MAX_UPDATE_THREADS, MAX_REINDEX_THREADS);
/** number of threads to use during a full index rebuild. */
public static final int REINDEX_THREADS= 1;
/** Max threads to use during an update. Smaller updates will use fewer threads. */
public static final int MAX_UPDATE_THREADS= 1;
private static final Log log = LogFactory.getLog(IndexBuilder.class);
@ -285,9 +287,8 @@ public class IndexBuilder extends VitroBackgroundThread {
log.debug("Getting all URIs in the model");
Iterator<String> uris = wdf.getIndividualDao().getAllOfThisTypeIterator();
this.numberOfThreads = MAX_REINDEX_THREADS;
doBuild(uris, Collections.<String>emptyList() );
doBuild(uris, Collections.<String>emptyList(), REINDEX_THREADS );
if( log != null ) //log might be null if system is shutting down.
log.info("Rebuild of search index is complete.");
@ -298,8 +299,10 @@ public class IndexBuilder extends VitroBackgroundThread {
UriLists uriLists = makeAddAndDeleteLists( changedStatementsToUris() );
this.numberOfThreads = Math.max( MAX_UPDATE_THREADS, uriLists.updatedUris.size() / 20);
doBuild( uriLists.updatedUris.iterator(), uriLists.deletedUris );
int numberOfThreads =
Math.min( MAX_UPDATE_THREADS,
Math.max( uriLists.updatedUris.size() / 100, 1));
doBuild( uriLists.updatedUris.iterator(), uriLists.deletedUris , numberOfThreads);
log.debug("Ending updateIndex()");
}
@ -318,7 +321,7 @@ public class IndexBuilder extends VitroBackgroundThread {
* to false, and a check is made before adding, it will work fine; but
* checking if an object is on the index is slow.
*/
private void doBuild(Iterator<String> updates, Collection<String> deletes ){
private void doBuild(Iterator<String> updates, Collection<String> deletes, int numberOfThreads ){
boolean updateRequested = ! reindexRequested;
try {
@ -341,7 +344,7 @@ public class IndexBuilder extends VitroBackgroundThread {
}
}
indexUriList(updates);
indexUriList(updates, numberOfThreads);
} catch (Exception e) {
if( log != null) log.debug("Exception during indexing",e);
@ -354,12 +357,7 @@ public class IndexBuilder extends VitroBackgroundThread {
* Use the back end indexer to index each object that the Iterator returns.
* @throws AbortIndexing
*/
private void indexUriList(Iterator<String> updateUris ) {
//make a copy of numberOfThreads so the local copy is safe during this method.
int numberOfThreads = this.numberOfThreads;
if( numberOfThreads > MAX_THREADS )
numberOfThreads = MAX_THREADS;
private void indexUriList(Iterator<String> updateUris , int numberOfThreads) {
//make lists of work URIs for workers
List<List<String>> workLists = makeWorkerUriLists(updateUris, numberOfThreads);

View file

@ -11,7 +11,7 @@ import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndexerIface;
import edu.cornell.mannlib.vitro.webapp.search.solr.IndividualToSolrDocument;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.IndividualToSolrDocument;
class IndexWorkerThread extends Thread{
@ -83,8 +83,8 @@ class IndexWorkerThread extends Thread{
}
}catch(Throwable th){
//on tomcat shutdown odd exceptions get thrown and log can be null
if( log != null )
log.debug("Exception during index building",th);
if( log != null && ! stopRequested )
log.error("Exception during index building",th);
}
}
}

View file

@ -20,6 +20,7 @@ import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.beans.IndexerIface;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.IndividualToSolrDocument;
public class SolrIndexer implements IndexerIface {

View file

@ -30,6 +30,7 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.rdfservice.impl.RDFServiceUtils;
import edu.cornell.mannlib.vitro.webapp.search.beans.FileBasedProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.StatementToURIsToUpdate;
@ -40,6 +41,17 @@ import edu.cornell.mannlib.vitro.webapp.search.indexing.AdditionalURIsForTypeSta
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.search.indexing.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.indexing.URIsForClassGroupChange;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.DocumentModifier;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnNamespace;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnType;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeBasedOnTypeNamespace;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ExcludeNonFlagVitro;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.IndividualToSolrDocument;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.NameBoost;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.NameFields;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SearchIndexExcluder;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SyncingExcludeBasedOnType;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL;
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
public class SolrSetup implements javax.servlet.ServletContextListener{
@ -120,9 +132,9 @@ public class SolrSetup implements javax.servlet.ServletContextListener{
if( modifiers == null )
modifiers = new ArrayList<DocumentModifier>();
modifiers.add(new NameBoost( 1.2f ));
modifiers.add(new ThumbnailImageURL(jenaOntModel));
modifiers.add( new NameFields( RDFServiceUtils.getRDFServiceFactory(context)));
modifiers.add( new NameBoost( 1.2f ));
modifiers.add( new ThumbnailImageURL(jenaOntModel));
/* try to get context attribute SearchIndexExcludes
* and use that as the start of the list of exclude

View file

@ -1,6 +1,6 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.ArrayList;
import java.util.HashSet;

View file

@ -1,5 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.ArrayList;
import java.util.Collection;

View file

@ -1,11 +1,10 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
/**
* This interface represents an object that can add to a SolrInputDocument.
*/

View file

@ -1,5 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.Arrays;
import java.util.List;

View file

@ -1,5 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.ArrayList;
import java.util.Arrays;

View file

@ -1,5 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.Arrays;
import java.util.Collections;

View file

@ -1,5 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.List;

View file

@ -1,6 +1,7 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.ArrayList;
import java.util.HashMap;
@ -60,10 +61,10 @@ public class IndividualToSolrDocument {
//vitro id
doc.addField(term.URI, ind.getURI());
//Individual Label
addLabel( ind, doc );
//add classes, classgroups get if prohibied becasue of its class
//get label from ind
addLabel(ind, doc);
//add classes, classgroups get if prohibited because of its class
StringBuffer classPublicNames = new StringBuffer("");
addClasses(ind, doc, classPublicNames);
@ -72,7 +73,7 @@ public class IndividualToSolrDocument {
StringBuffer addUri = new StringBuffer("");
addObjectPropertyText(ind, doc, objectNames, addUri);
//time of index in millis past epoc
//time of index in msec past epoch
doc.addField(term.INDEXEDTIME, new Long( (new DateTime()).getMillis() ) );
addAllText( ind, doc, classPublicNames, objectNames );
@ -89,10 +90,10 @@ public class IndividualToSolrDocument {
//indicates that this individual should not be indexed by returning null
log.debug(ex);
return null;
}catch(Throwable th){
}catch(Exception th){
//Odd exceptions can get thrown on shutdown
if( log != null )
log.debug(th);
log.error(th,th);
return null;
}
}
@ -190,19 +191,7 @@ public class IndividualToSolrDocument {
doc.addField(term.ALLTEXT_PHONETIC, alltext);
}
protected void addLabel(Individual ind, SolrInputDocument doc) {
String value = "";
String label = ind.getRdfsLabel();
if (label != null) {
value = label;
} else {
value = ind.getLocalName();
}
doc.addField(term.NAME_RAW, value);
// NAME_RAW will be copied by solr into the following fields:
// NAME_LOWERCASE, NAME_UNSTEMMED, NAME_STEMMED, NAME_PHONETIC, AC_NAME_UNTOKENIZED, AC_NAME_STEMMED
}
@ -278,6 +267,22 @@ public class IndividualToSolrDocument {
}
}
}
protected void addLabel(Individual ind, SolrInputDocument doc) {
String value = "";
String label = ind.getRdfsLabel();
if (label != null) {
value = label;
} else {
value = ind.getLocalName();
}
doc.addField(term.NAME_RAW, value);
doc.addField(term.NAME_LOWERCASE_SINGLE_VALUED,value);
// NAME_RAW will be copied by solr into the following fields:
// NAME_LOWERCASE, NAME_UNSTEMMED, NAME_STEMMED, NAME_PHONETIC, AC_NAME_UNTOKENIZED, AC_NAME_STEMMED
}
public Object getIndexId(Object obj) {
throw new Error("IndiviudalToSolrDocument.getIndexId() is unimplemented");

View file

@ -1,6 +1,6 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;

View file

@ -0,0 +1,72 @@
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.common.SolrInputDocument;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFService.ResultFormat;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceException;
import edu.cornell.mannlib.vitro.webapp.rdfservice.RDFServiceFactory;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
/**
* Adds all labels to name fields, not just the one returned by Indivdiual.getName().
*/
public class NameFields implements DocumentModifier {
RDFServiceFactory rsf;
public static final VitroSearchTermNames term = new VitroSearchTermNames();
public static final Log log = LogFactory.getLog(NameFields.class.getName());
public NameFields( RDFServiceFactory rsf){
this.rsf = rsf;
}
@Override
public void modifyDocument(Individual ind, SolrInputDocument doc,
StringBuffer addUri) throws SkipIndividualException {
if( ind == null || ind.getURI() == null ){
return;
}
//also run SPARQL query to get rdfs:label values
String query =
"SELECT ?label WHERE { " +
"<" + ind.getURI() + "> " +
"<http://www.w3.org/2000/01/rdf-schema#label> ?label }";
try {
RDFService rdfService = rsf.getRDFService();
BufferedReader stream =
new BufferedReader(new InputStreamReader(rdfService.sparqlSelectQuery(query, ResultFormat.CSV)));
StringBuffer buffer = new StringBuffer();
String line;
//throw out first line since it is just a header
stream.readLine();
while( (line = stream.readLine()) != null ){
buffer.append(line).append(' ');
}
log.debug("Adding labels for " + ind.getURI() + " \"" + buffer.toString() + "\"");
doc.addField(term.NAME_RAW, buffer.toString());
} catch (RDFServiceException e) {
log.error("could not get the rdfs:label for " + ind.getURI(), e);
} catch (IOException e) {
log.error("could not get the rdfs:label for " + ind.getURI(), e);
}
}
@Override
public void shutdown() { /*nothing to do */ }
}

View file

@ -1,5 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;

View file

@ -1,8 +1,8 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
class SkipIndividualException extends Exception{
public class SkipIndividualException extends Exception{
public SkipIndividualException(String string) {
super(string);

View file

@ -1,6 +1,6 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import org.apache.solr.common.SolrInputDocument;

View file

@ -1,5 +1,5 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.ArrayList;
import java.util.List;

View file

@ -1,6 +1,6 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.solr;
package edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding;
import java.util.Iterator;

View file

@ -23,6 +23,8 @@ import edu.cornell.mannlib.vitro.testing.AbstractTestClass;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.search.VitroSearchTermNames;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.SkipIndividualException;
import edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL;
/**
* @author bdc34
@ -49,7 +51,7 @@ public class ThumbnailImageURLTest extends AbstractTestClass{
}
/**
* Test method for {@link edu.cornell.mannlib.vitro.webapp.search.solr.ThumbnailImageURL#modifyDocument(edu.cornell.mannlib.vitro.webapp.beans.Individual, org.apache.solr.common.SolrInputDocument, java.lang.StringBuffer)}.
* Test method for {@link edu.cornell.mannlib.vitro.webapp.search.solr.documentBuilding.ThumbnailImageURL#modifyDocument(edu.cornell.mannlib.vitro.webapp.beans.Individual, org.apache.solr.common.SolrInputDocument, java.lang.StringBuffer)}.
*/
@Test
public void testModifyDocument() {