Working on improving error messages for lucene at startup NIHVIVO-1483

This commit is contained in:
bdc34 2011-01-13 21:24:51 +00:00
parent 86efdf09e9
commit 709b1fc534
5 changed files with 159 additions and 92 deletions

View file

@ -37,4 +37,3 @@ log4j.logger.edu.cornell.mannlib.vitro.webapp.auth.policy.ServletPolicyList=WARN
log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN
log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN
log4j.logger.edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexer=WARN

View file

@ -365,6 +365,7 @@ public class VClassGroupCache implements ServletContextListener{
@Override @Override
public void contextDestroyed(ServletContextEvent arg0) { public void contextDestroyed(ServletContextEvent arg0) {
if( _cacheRebuildThread != null )
_cacheRebuildThread.kill(); _cacheRebuildThread.kill();
} }

View file

@ -151,7 +151,6 @@ public class IndexBuilder {
} }
protected void indexRebuild() throws IndexingException { protected void indexRebuild() throws IndexingException {
setReindexRequested(false);
log.info("Rebuild of search index is starting."); log.info("Rebuild of search index is starting.");
Iterator<ObjectSourceIface> sources = sourceList.iterator(); Iterator<ObjectSourceIface> sources = sourceList.iterator();
@ -172,6 +171,7 @@ public class IndexBuilder {
if( listOfIterators.size() == 0){ log.debug("Warning: no ObjectSources found.");} if( listOfIterators.size() == 0){ log.debug("Warning: no ObjectSources found.");}
setReindexRequested(false);
doBuild( listOfIterators, Collections.EMPTY_LIST, true, NEW_DOCS ); doBuild( listOfIterators, Collections.EMPTY_LIST, true, NEW_DOCS );
log.info("Rebuild of search index is complete."); log.info("Rebuild of search index is complete.");
} }

View file

@ -11,14 +11,17 @@ import java.util.Iterator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
@ -35,7 +38,8 @@ import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
*/ */
public class LuceneIndexer implements IndexerIface { public class LuceneIndexer implements IndexerIface {
private final static Log log = LogFactory.getLog(LuceneIndexer.class.getName()); private final static Log log = LogFactory.getLog(LuceneIndexer.class);
LinkedList<Obj2DocIface> obj2DocList = new LinkedList<Obj2DocIface>(); LinkedList<Obj2DocIface> obj2DocList = new LinkedList<Obj2DocIface>();
String baseIndexDir = null; String baseIndexDir = null;
String liveIndexDir = null; String liveIndexDir = null;
@ -80,42 +84,9 @@ public class LuceneIndexer implements IndexerIface {
this.analyzer = analyzer; this.analyzer = analyzer;
if( searchers != null ) if( searchers != null )
this.searchers = searchers; this.searchers = searchers;
makeIndexIfNone();
}
private synchronized void makeIndexIfNone() throws IOException { updateTo1p2();
if( !liveIndexExists() ){ makeEmptyIndexIfNone();
log.debug("Making new index dir and initially empty lucene index at " + liveIndexDir);
closeWriter();
File baseDir = new File(baseIndexDir);
baseDir.mkdirs();
File dir = new File(liveIndexDir);
dir.mkdirs();
writer = new IndexWriter(liveIndexDir,analyzer,true,MAX_FIELD_LENGTH);
closeWriter();
}
}
private boolean liveIndexExists(){
Directory fsDir = null;
IndexSearcher isearcher = null ;
try{
fsDir = FSDirectory.getDirectory(liveIndexDir);
isearcher = new IndexSearcher(fsDir);
return true;
}catch(Exception ex){
return false;
}finally{
try{
if( isearcher != null )
isearcher.close();
if( fsDir != null)
fsDir.close();
}catch(Exception ex){}
}
} }
public synchronized void addObj2Doc(Obj2DocIface o2d) { public synchronized void addObj2Doc(Obj2DocIface o2d) {
@ -134,6 +105,14 @@ public class LuceneIndexer implements IndexerIface {
searchers.add( s ); searchers.add( s );
} }
@Override
public synchronized void prepareForRebuild() throws IndexingException {
if( this.indexing )
log.error("Only an update will be performed, must call prepareForRebuild() before startIndexing()");
else
this.fullRebuild = true;
}
/** /**
* Checks to see if indexing is currently happening. * Checks to see if indexing is currently happening.
*/ */
@ -143,12 +122,12 @@ public class LuceneIndexer implements IndexerIface {
public synchronized void startIndexing() throws IndexingException{ public synchronized void startIndexing() throws IndexingException{
while( indexing ){ //wait for indexing to end. while( indexing ){ //wait for indexing to end.
log.info("LuceneIndexer.startIndexing() waiting..."); log.debug("LuceneIndexer.startIndexing() waiting...");
try{ wait(); } catch(InterruptedException ex){} try{ wait(); } catch(InterruptedException ex){}
} }
checkStartPreconditions(); checkStartPreconditions();
try { try {
log.info("Starting to index"); log.debug("Starting to index");
if( this.fullRebuild ){ if( this.fullRebuild ){
String offLineDir = getOffLineBuildDir(); String offLineDir = getOffLineBuildDir();
this.currentOffLineDir = offLineDir; this.currentOffLineDir = offLineDir;
@ -166,16 +145,6 @@ public class LuceneIndexer implements IndexerIface {
} }
} }
private void checkStartPreconditions() {
if( this.writer != null )
log.info("it is expected that the writer would " +
"be null but it isn't");
if( this.currentOffLineDir != null)
log.info("it is expected that the current" +
"OffLineDir would be null but it is " + currentOffLineDir);
if( indexing )
log.info("indexing should not be set to true just yet");
}
public synchronized void endIndexing() { public synchronized void endIndexing() {
if( ! indexing ){ if( ! indexing ){
@ -184,7 +153,7 @@ public class LuceneIndexer implements IndexerIface {
} }
try { try {
urisIndexed = null; urisIndexed = null;
log.info("ending index"); log.debug("ending index");
if( writer != null ) if( writer != null )
writer.optimize(); writer.optimize();
@ -209,16 +178,8 @@ public class LuceneIndexer implements IndexerIface {
} }
} }
private synchronized void bringRebuildOnLine() { public void setLuceneIndexFactory(LuceneIndexFactory lif) {
closeWriter(); luceneIndexFactory = lif;
deleteDir(new File(liveIndexDir));
File offLineDir = new File(currentOffLineDir);
File liveDir = new File(liveIndexDir);
boolean success = offLineDir.renameTo( liveDir );
if( ! success )
log.error("could not move off line index at "
+ offLineDir.getAbsolutePath() + " to live index directory "
+ liveDir.getAbsolutePath());
} }
public synchronized Analyzer getAnalyzer(){ public synchronized Analyzer getAnalyzer(){
@ -314,6 +275,18 @@ public class LuceneIndexer implements IndexerIface {
writer = null; writer = null;
} }
private synchronized void bringRebuildOnLine() {
closeWriter();
deleteDir(new File(liveIndexDir));
File offLineDir = new File(currentOffLineDir);
File liveDir = new File(liveIndexDir);
boolean success = offLineDir.renameTo( liveDir );
if( ! success )
log.error("could not move off line index at "
+ offLineDir.getAbsolutePath() + " to live index directory "
+ liveDir.getAbsolutePath());
}
private synchronized String getOffLineBuildDir(){ private synchronized String getOffLineBuildDir(){
File baseDir = new File(baseIndexDir); File baseDir = new File(baseIndexDir);
baseDir.mkdirs(); baseDir.mkdirs();
@ -356,16 +329,98 @@ public class LuceneIndexer implements IndexerIface {
return dir.delete(); return dir.delete();
} }
public void setLuceneIndexFactory(LuceneIndexFactory lif) { private void checkStartPreconditions() {
luceneIndexFactory = lif; if( this.writer != null )
log.error("it is expected that the writer would " +
"be null but it isn't");
if( this.currentOffLineDir != null)
log.error("it is expected that the current" +
"OffLineDir would be null but it is " + currentOffLineDir);
if( indexing )
log.error("indexing should not be set to true just yet");
} }
@Override
public synchronized void prepareForRebuild() throws IndexingException { private synchronized void makeEmptyIndexIfNone() throws IOException {
if( this.indexing ) if( !liveIndexExists() ){
log.error("Only an update will be performed, must call prepareForRebuild() before startIndexing()"); log.debug("Making new index dir and initially empty lucene index at " + liveIndexDir);
closeWriter();
makeIndexDirs();
writer = new IndexWriter(liveIndexDir,analyzer,true,MAX_FIELD_LENGTH);
closeWriter();
}
}
private synchronized void makeIndexDirs() throws IOException{
File baseDir = new File(baseIndexDir);
if( ! baseDir.exists())
baseDir.mkdirs();
File dir = new File(liveIndexDir);
if( ! dir.exists() )
dir.mkdirs();
}
private boolean liveIndexExists(){
return indexExistsAt(liveIndexDir);
}
private boolean indexExistsAt(String dirName){
Directory fsDir = null;
try{
fsDir = FSDirectory.getDirectory(dirName);
return IndexReader.indexExists(fsDir);
}catch(Exception ex){
return false;
}finally{
try{
if( fsDir != null)
fsDir.close();
}catch(Exception ex){}
}
}
/*
* In needed, create new 1.2 style index directories and copy old index to new dirs.
*/
private synchronized void updateTo1p2() throws IOException {
//check if live index directory exists, don't check for a lucene index.
File liveDirF = new File(this.liveIndexDir);
if( ! liveDirF.exists() && indexExistsAt(baseIndexDir)){
log.info("Updating to vitro 1.2 search index directory structure");
makeIndexDirs();
File live = new File(liveIndexDir);
//copy existing index to live index directory
File baseDir = new File(baseIndexDir);
for( File file : baseDir.listFiles()){
if( ! file.isDirectory() && ! live.getName().equals(file.getName() ) ){
FileUtils.copyFile(file, new File(liveIndexDir+File.separator+file.getName()));
boolean success = file.delete();
if( ! success )
log.error("could not delete "+ baseIndexDir + file.getName());
}
}
log.info("Done updating to vitro 1.2 search index directory structure.");
}
}
public boolean isIndexEmpty() throws CorruptIndexException, IOException{
TermDocs td = null;
try{
IndexReader reader = IndexReader.open(new File( this.liveIndexDir ));
td = reader.termDocs(new Term( Entity2LuceneDoc.VitroLuceneTermNames.DOCID) );
if( td.next() )
return false;
else else
this.fullRebuild = true; return true;
}finally{
if (td != null) td.close();
}
} }
public boolean isIndexCorroupt(){
//if it is clear it out but don't rebuild.
return false;
}
} }

View file

@ -98,6 +98,14 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
context.setAttribute(LuceneIndexer.class.getName(), indexer); context.setAttribute(LuceneIndexer.class.getName(), indexer);
indexer.setLuceneIndexFactory(lif); indexer.setLuceneIndexFactory(lif);
if( indexer.isIndexCorroupt() ){
log.info("index is corrupt, requesting rebuild");
}
if( indexer.isIndexEmpty() ){
log.info("index is empty, requesting rebuild");
sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE);
}
// This is where the builder gets the list of places to try to // This is where the builder gets the list of places to try to
// get objects to index. It is filtered so that non-public text // get objects to index. It is filtered so that non-public text
// does not get into the search index. // does not get into the search index.
@ -130,20 +138,22 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
if( (Boolean)sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) instanceof Boolean && if( (Boolean)sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) instanceof Boolean &&
(Boolean)sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) ){ (Boolean)sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) ){
builder.doIndexRebuild();
log.info("Rebuild of search index required before startup."); log.info("Rebuild of search index required before startup.");
int n = 0; builder.doIndexRebuild();
while( builder.isIndexing() ){
Thread.currentThread().sleep(500); Thread.currentThread().sleep(500);
int n = 0;
while( builder.isReindexRequested() || builder.isIndexing() ){
n++;
if( n % 20 == 0 ) //output message every 10 sec. if( n % 20 == 0 ) //output message every 10 sec.
log.info("Still rebulding search index"); log.info("Still rebulding search index");
Thread.currentThread().sleep(500);
} }
log.info("Search index rebuild completed.");
} }
log.debug("**** End of " + this.getClass().getName() + ".contextInitialized()"); log.debug("**** End of " + this.getClass().getName() + ".contextInitialized()");
} catch (Throwable t) { } catch (Throwable t) {
log.error("***** Error setting up Lucene search *****", t); log.error("***** Error setting up Lucene search *****", t);
throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration");
} }
} }
@ -153,8 +163,10 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
public void contextDestroyed(ServletContextEvent sce) { public void contextDestroyed(ServletContextEvent sce) {
log.debug("**** Running " + this.getClass().getName() + ".contextDestroyed()"); log.debug("**** Running " + this.getClass().getName() + ".contextDestroyed()");
IndexBuilder builder = (IndexBuilder) sce.getServletContext().getAttribute(IndexBuilder.class.getName()); IndexBuilder builder = (IndexBuilder) sce.getServletContext().getAttribute(IndexBuilder.class.getName());
if( builder != null){
builder.killIndexingThread(); builder.killIndexingThread();
} }
}
/** /**
* In wild card searches the query is first broken into many boolean * In wild card searches the query is first broken into many boolean