Manually copy IndexBuilder.java from branch - manual merge of the remainder of 4725 (other files already merged).

This commit is contained in:
jeb228 2010-04-14 17:58:15 +00:00
parent 846aad6d50
commit d0756fa6ce

View file

@ -1,213 +1,333 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */ /* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.search.indexing; package edu.cornell.mannlib.vitro.webapp.search.indexing;
import java.util.Iterator; import java.util.Collection;
import java.util.LinkedList; import java.util.Iterator;
import java.util.List; import java.util.LinkedList;
import java.util.List;
import javax.servlet.ServletContext;
import javax.servlet.ServletContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl; import edu.cornell.mannlib.vitro.webapp.beans.Individual;
import edu.cornell.mannlib.vitro.webapp.search.IndexingException; import edu.cornell.mannlib.vitro.webapp.beans.IndividualImpl;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface; import edu.cornell.mannlib.vitro.webapp.beans.VClass;
import edu.cornell.mannlib.vitro.webapp.utils.EntityChangeListener; import edu.cornell.mannlib.vitro.webapp.dao.VClassDao;
import edu.cornell.mannlib.vitro.webapp.dao.VitroVocabulary;
/** import edu.cornell.mannlib.vitro.webapp.dao.WebappDaoFactory;
* The IndexBuilder is used to rebuild or update a search index. import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
* It uses an implementation of a backend through an object that import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
* implements IndexerIface. An example of a backend is LuceneIndexer. import edu.cornell.mannlib.vitro.webapp.utils.EntityChangeListener;
*
* The IndexBuilder implements the EntityChangeListener so it can /**
* be registered for Entity changes from the GenericDB classes. * The IndexBuilder is used to rebuild or update a search index.
* * It uses an implementation of a backend through an object that
* There should be an IndexBuilder in the servlet context, try: * implements IndexerIface. An example of a backend is LuceneIndexer.
* *
IndexBuilder builder = (IndexBuilder)getServletContext().getAttribute(IndexBuilder.class.getName()); * The IndexBuilder implements the EntityChangeListener so it can
if( request.getParameter("update") != null ) * be registered for Entity changes from the GenericDB classes.
builder.doUpdateIndex(); *
* There should be an IndexBuilder in the servlet context, try:
* @author bdc34 *
* IndexBuilder builder = (IndexBuilder)getServletContext().getAttribute(IndexBuilder.class.getName());
*/ if( request.getParameter("update") != null )
public class IndexBuilder implements Runnable, EntityChangeListener{ builder.doUpdateIndex();
List sourceList = new LinkedList();
IndexerIface indexer = null; * @author bdc34
*
public static final boolean UPDATE_DOCS = false; */
public static final boolean NEW_DOCS = true; public class IndexBuilder implements Runnable, EntityChangeListener{
List<ObjectSourceIface> sourceList = new LinkedList<ObjectSourceIface>();
private static final Log log = LogFactory.getLog(IndexBuilder.class.getName()); IndexerIface indexer = null;
ServletContext context = null;
public IndexBuilder(ServletContext context,
IndexerIface indexer, long lastRun = 0;
List /*ObjectSourceIface*/ sources ){ List<String> changedUris = null;
this.indexer = indexer;
this.sourceList = sources; public static final boolean UPDATE_DOCS = false;
public static final boolean NEW_DOCS = true;
//add this to the context as a EntityChangeListener so that we can
//be notified of entity changes. private static final Log log = LogFactory.getLog(IndexBuilder.class.getName());
context.setAttribute(EntityChangeListener.class.getName(), this);
} public IndexBuilder(ServletContext context,
IndexerIface indexer,
public void addObjectSource(ObjectSourceIface osi) { List /*ObjectSourceIface*/ sources ){
if (osi != null) this.indexer = indexer;
sourceList.add(osi); this.sourceList = sources;
} this.context = context;
public boolean isIndexing(){ changedUris = new LinkedList<String>();
return indexer.isIndexing();
} //add this to the context as a EntityChangeListener so that we can
//be notified of entity changes.
public List getObjectSourceList() { context.setAttribute(EntityChangeListener.class.getName(), this);
return sourceList; }
}
public void addObjectSource(ObjectSourceIface osi) {
public void doIndexBuild() throws IndexingException { if (osi != null)
log.debug(this.getClass().getName() sourceList.add(osi);
+ " performing doFullRebuildIndex()\n"); }
Iterator sources = sourceList.iterator(); public boolean isIndexing(){
List listOfIterators = new LinkedList(); return indexer.isIndexing();
while(sources.hasNext()){ }
Object obj = sources.next();
if( obj != null && obj instanceof ObjectSourceIface ) public List<ObjectSourceIface> getObjectSourceList() {
listOfIterators.add((((ObjectSourceIface) obj) return sourceList;
.getAllOfThisTypeIterator())); }
else
log.debug("\tskipping object of class " public void doIndexBuild() throws IndexingException {
+ obj.getClass().getName() + "\n" log.debug(this.getClass().getName()
+ "\tIt doesn not implement ObjectSourceIface.\n"); + " performing doFullRebuildIndex()\n");
}
if( listOfIterators.size() == 0){ log.debug("Warning: no ObjectSources found.");} Iterator<ObjectSourceIface> sources = sourceList.iterator();
doBuild( listOfIterators, true, NEW_DOCS ); List listOfIterators = new LinkedList();
log.debug(this.getClass().getName() + ".doFullRebuildIndex() Done \n"); while(sources.hasNext()){
} Object obj = sources.next();
if( obj != null && obj instanceof ObjectSourceIface )
public void run() { listOfIterators.add((((ObjectSourceIface) obj)
doUpdateIndex(); .getAllOfThisTypeIterator()));
} else
log.debug("\tskipping object of class "
public void doUpdateIndex() { + obj.getClass().getName() + "\n"
long since = indexer.getModified() - 60000; + "\tIt doesn not implement ObjectSourceIface.\n");
}
Iterator<ObjectSourceIface> sources = sourceList.iterator();
List<Iterator<ObjectSourceIface>> listOfIterators = //clear out changed uris since we are doing a full index rebuild
new LinkedList<Iterator<ObjectSourceIface>>(); getAndEmptyChangedUris();
while (sources.hasNext()) {
Object obj = sources.next(); if( listOfIterators.size() == 0){ log.debug("Warning: no ObjectSources found.");}
if (obj != null && obj instanceof ObjectSourceIface) doBuild( listOfIterators, true, NEW_DOCS );
listOfIterators.add((((ObjectSourceIface) obj) log.debug(this.getClass().getName() + ".doFullRebuildIndex() Done \n");
.getUpdatedSinceIterator(since))); }
else
log.debug("\tskipping object of class " public void run() {
+ obj.getClass().getName() + "\n" doUpdateIndex();
+ "\tIt doesn not implement " + "ObjectSourceIface.\n"); }
}
doBuild( listOfIterators, false, UPDATE_DOCS ); public void doUpdateIndex() {
} long since = indexer.getModified() - 60000;
public void clearIndex(){ Iterator<ObjectSourceIface> sources = sourceList.iterator();
try { List<Iterator<ObjectSourceIface>> listOfIterators =
indexer.clearIndex(); new LinkedList<Iterator<ObjectSourceIface>>();
} catch (IndexingException e) { while (sources.hasNext()) {
log.error("error while clearing index", e); Object obj = sources.next();
} if (obj != null && obj instanceof ObjectSourceIface)
} listOfIterators.add((((ObjectSourceIface) obj)
.getUpdatedSinceIterator(since)));
/** else
* For each sourceIterator, get all of the objects and attempt to log.debug("\tskipping object of class "
* index them. + obj.getClass().getName() + "\n"
* + "\tIt doesn not implement " + "ObjectSourceIface.\n");
* This takes a list of source Iterators and, for each of these, }
* calls indexForSource.
* List<Individual> changedInds = addDepResourceClasses(checkForDeletes(getAndEmptyChangedUris()));
* @param sourceIterators listOfIterators.add( (new IndexBuilder.BuilderObjectSource(changedInds)).getUpdatedSinceIterator(0) );
* @param newDocs true if we know that the document is new. Set
* to false if we want to attempt to remove the object from the index before doBuild( listOfIterators, false, UPDATE_DOCS );
* attempting to index it. If an object is not on the list but you set this }
* to false, and a check is made before adding, it will work fine; but
* checking if an object is on the index is slow. private List<Individual> addDepResourceClasses(List<Individual> inds) {
*/ WebappDaoFactory wdf = (WebappDaoFactory)context.getAttribute("webappDaoFactory");
private void doBuild(List sourceIterators, boolean wipeIndexFirst, boolean newDocs ){ VClassDao vClassDao = wdf.getVClassDao();
try { java.util.ListIterator<Individual> it = inds.listIterator();
indexer.startIndexing(); VClass depResVClass = new VClass(VitroVocabulary.DEPENDENT_RESORUCE);
while(it.hasNext()){
if( wipeIndexFirst ) Individual ind = it.next();
indexer.clearIndex(); List<VClass> classes = ind.getVClasses();
boolean isDepResource = false;
//get an iterator for all of the sources of indexable objects for( VClass clazz : classes){
Iterator sourceIters = sourceIterators.iterator(); if( !isDepResource && VitroVocabulary.DEPENDENT_RESORUCE.equals( clazz.getURI() ) ){
Object obj = null; isDepResource = true;
while (sourceIters.hasNext()) { break;
obj = sourceIters.next(); }
if (obj == null || !(obj instanceof Iterator)) { }
log.debug("\tskipping object of class " if( ! isDepResource ){
+ obj.getClass().getName() + "\n" for( VClass clazz : classes){
+ "\tIt doesn not implement " List<String> superClassUris = vClassDao.getAllSuperClassURIs(clazz.getURI());
+ "Iterator.\n"); for( String uri : superClassUris){
continue; if( VitroVocabulary.DEPENDENT_RESORUCE.equals( uri ) ){
} isDepResource = true;
indexForSource((Iterator)obj, newDocs); break;
} }
} catch (IndexingException ex) { }
log.error("\t" + ex.getMessage(),ex); if( isDepResource )
} catch (Exception e) { break;
log.error("\t"+e.getMessage(),e); }
} finally { }
indexer.endIndexing(); if( isDepResource){
} classes.add(depResVClass);
} ind.setVClasses(classes, true);
}
/** }
* Use the back end indexer to index each object that the Iterator returns. return inds;
* @param items }
* @return
*/ public void clearIndex(){
protected void indexForSource(Iterator items , boolean newDocs){ try {
if( items == null ) return; indexer.clearIndex();
while(items.hasNext()){ } catch (IndexingException e) {
indexItem(items.next(), newDocs); log.error("error while clearing index", e);
} }
} }
/** /**
* Use the backend indexer to index a single item. * For each sourceIterator, get all of the objects and attempt to
* @param item * index them.
* @return *
*/ * This takes a list of source Iterators and, for each of these,
protected void indexItem( Object item, boolean newDoc){ * calls indexForSource.
try{ *
indexer.index(item, newDoc); * @param sourceIterators
}catch(Throwable ex){ * @param newDocs true if we know that the document is new. Set
log.debug("IndexBuilder.indexItem() Error indexing " * to false if we want to attempt to remove the object from the index before
+ item + "\n" +ex); * attempting to index it. If an object is not on the list but you set this
} * to false, and a check is made before adding, it will work fine; but
return ; * checking if an object is on the index is slow.
} */
private void doBuild(List sourceIterators, boolean wipeIndexFirst, boolean newDocs ){
/* These methods are so that the IndexBuilder may register for entity changes */ try {
public void entityAdded(String entityURI) { indexer.startIndexing();
log.debug("IndexBuilder.entityAdded() " + entityURI);
(new Thread(this)).start(); if( wipeIndexFirst )
} indexer.clearIndex();
public void entityDeleted(String entityURI) { //get an iterator for all of the sources of indexable objects
log.debug("IndexBuilder.entityDeleted() " + entityURI); Iterator sourceIters = sourceIterators.iterator();
Individual ent = new IndividualImpl(entityURI); Object obj = null;
try { while (sourceIters.hasNext()) {
indexer.removeFromIndex(ent); obj = sourceIters.next();
} catch (IndexingException e) { if (obj == null || !(obj instanceof Iterator)) {
log.debug("IndexBuilder.entityDeleted failed: " + e); log.debug("\tskipping object of class "
} + obj.getClass().getName() + "\n"
} + "\tIt doesn not implement "
+ "Iterator.\n");
public void entityUpdated(String entityURI) { continue;
log.debug("IndexBuilder.entityUpdate() " + entityURI); }
(new Thread(this)).start(); indexForSource((Iterator)obj, newDocs);
} }
} } catch (IndexingException ex) {
log.error("\t" + ex.getMessage(),ex);
} catch (Exception e) {
log.error("\t"+e.getMessage(),e);
} finally {
indexer.endIndexing();
}
}
/**
* Use the back end indexer to index each object that the Iterator returns.
* @param items
* @return
*/
protected void indexForSource(Iterator items , boolean newDocs){
if( items == null ) return;
while(items.hasNext()){
indexItem(items.next(), newDocs);
}
}
private List<Individual> checkForDeletes(List<String> uris){
WebappDaoFactory wdf = (WebappDaoFactory)context.getAttribute("webappDaoFactory");
List<Individual> nonDeletes = new LinkedList<Individual>();
for( String uri: uris){
if( uri != null ){
Individual ind = wdf.getIndividualDao().getIndividualByURI(uri);
if( ind != null)
nonDeletes.add(ind);
else{
log.debug("found delete in changed uris");
entityDeleted(uri);
}
}
}
return nonDeletes;
}
/**
* Use the backend indexer to index a single item.
* @param item
* @return
*/
protected void indexItem( Object item, boolean newDoc){
try{
indexer.index(item, newDoc);
}catch(Throwable ex){
log.debug("IndexBuilder.indexItem() Error indexing "
+ item + "\n" +ex);
}
return ;
}
/* These methods are so that the IndexBuilder may register for entity changes */
public void entityAdded(String entityURI) {
if( log.isDebugEnabled())
log.debug("IndexBuilder.entityAdded() " + entityURI);
addToChangedUris(entityURI);
(new Thread(this)).start();
}
public void entityDeleted(String entityURI) {
if( log.isDebugEnabled())
log.debug("IndexBuilder.entityDeleted() " + entityURI);
Individual ent = new IndividualImpl(entityURI);
try {
indexer.removeFromIndex(ent);
} catch (IndexingException e) {
log.debug("IndexBuilder.entityDeleted failed: " + e);
}
}
public void entityUpdated(String entityURI) {
if( log.isDebugEnabled())
log.debug("IndexBuilder.entityUpdate() " + entityURI);
addToChangedUris(entityURI);
(new Thread(this)).start();
}
public synchronized void addToChangedUris(String uri){
changedUris.add(uri);
}
public synchronized void addToChangedUris(Collection<String> uris){
changedUris.addAll(uris);
}
private synchronized List<String> getAndEmptyChangedUris(){
LinkedList<String> out = new LinkedList<String>();
out.addAll( changedUris );
changedUris = new LinkedList<String>();
return out;
}
private class BuilderObjectSource implements ObjectSourceIface {
private final List<Individual> individuals;
public BuilderObjectSource( List<Individual> individuals){
this.individuals=individuals;
}
public Iterator getAllOfThisTypeIterator() {
return new Iterator(){
final Iterator it = individuals.iterator();
public boolean hasNext() {
return it.hasNext();
}
public Object next() {
return it.next();
}
public void remove() { /* not implemented */}
};
}
public Iterator getUpdatedSinceIterator(long msSinceEpoc) {
return getAllOfThisTypeIterator();
}
}
}