From 709b1fc5346c39d8fe319fa3f312049c16351555 Mon Sep 17 00:00:00 2001 From: bdc34 Date: Thu, 13 Jan 2011 21:24:51 +0000 Subject: [PATCH] Working on improving error messages for lucene at startup NIHVIVO-1483 --- webapp/config/default.log4j.properties | 1 - .../webapp/dao/jena/VClassGroupCache.java | 3 +- .../webapp/search/indexing/IndexBuilder.java | 8 +- .../webapp/search/lucene/LuceneIndexer.java | 213 +++++++++++------- .../webapp/search/lucene/LuceneSetup.java | 26 ++- 5 files changed, 159 insertions(+), 92 deletions(-) diff --git a/webapp/config/default.log4j.properties b/webapp/config/default.log4j.properties index c9d51e482..75c0575e5 100644 --- a/webapp/config/default.log4j.properties +++ b/webapp/config/default.log4j.properties @@ -37,4 +37,3 @@ log4j.logger.edu.cornell.mannlib.vitro.webapp.auth.policy.ServletPolicyList=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.controller.freemarker.BrowseController=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.pellet.PelletListener=WARN log4j.logger.edu.cornell.mannlib.vitro.webapp.dao.jena.RDBGraphGenerator=WARN -log4j.logger.edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexer=WARN diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/VClassGroupCache.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/VClassGroupCache.java index 9c142b0f5..6dade2858 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/VClassGroupCache.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/dao/jena/VClassGroupCache.java @@ -365,7 +365,8 @@ public class VClassGroupCache implements ServletContextListener{ @Override public void contextDestroyed(ServletContextEvent arg0) { - _cacheRebuildThread.kill(); + if( _cacheRebuildThread != null ) + _cacheRebuildThread.kill(); } @Override diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java index 73476958d..becadbbab 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/indexing/IndexBuilder.java @@ -150,8 +150,7 @@ public class IndexBuilder { return out; } - protected void indexRebuild() throws IndexingException { - setReindexRequested(false); + protected void indexRebuild() throws IndexingException { log.info("Rebuild of search index is starting."); Iterator sources = sourceList.iterator(); @@ -171,7 +170,8 @@ public class IndexBuilder { getAndEmptyChangedUris(); if( listOfIterators.size() == 0){ log.debug("Warning: no ObjectSources found.");} - + + setReindexRequested(false); doBuild( listOfIterators, Collections.EMPTY_LIST, true, NEW_DOCS ); log.info("Rebuild of search index is complete."); } @@ -291,7 +291,7 @@ public class IndexBuilder { for(Individual deleteMe : deletes ){ indexer.removeFromIndex(deleteMe); } - } + } //get an iterator for all of the sources of indexable objects Iterator sourceIters = sourceIterators.iterator(); diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java index de8359647..1fcdb0bbf 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneIndexer.java @@ -11,14 +11,17 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -35,7 +38,8 @@ import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface; */ public class LuceneIndexer implements IndexerIface { - private final static Log log = LogFactory.getLog(LuceneIndexer.class.getName()); + private final static Log log = LogFactory.getLog(LuceneIndexer.class); + LinkedList obj2DocList = new LinkedList(); String baseIndexDir = null; String liveIndexDir = null; @@ -79,45 +83,12 @@ public class LuceneIndexer implements IndexerIface { this.liveIndexDir = liveIndexDir; this.analyzer = analyzer; if( searchers != null ) - this.searchers = searchers; - makeIndexIfNone(); - } - - private synchronized void makeIndexIfNone() throws IOException { - if( !liveIndexExists() ){ - log.debug("Making new index dir and initially empty lucene index at " + liveIndexDir); - closeWriter(); - - File baseDir = new File(baseIndexDir); - baseDir.mkdirs(); - - File dir = new File(liveIndexDir); - dir.mkdirs(); - - writer = new IndexWriter(liveIndexDir,analyzer,true,MAX_FIELD_LENGTH); - closeWriter(); - } - } - - private boolean liveIndexExists(){ - Directory fsDir = null; - IndexSearcher isearcher = null ; - try{ - fsDir = FSDirectory.getDirectory(liveIndexDir); - isearcher = new IndexSearcher(fsDir); - return true; - }catch(Exception ex){ - return false; - }finally{ - try{ - if( isearcher != null ) - isearcher.close(); - if( fsDir != null) - fsDir.close(); - }catch(Exception ex){} - } - } - + this.searchers = searchers; + + updateTo1p2(); + makeEmptyIndexIfNone(); + } + public synchronized void addObj2Doc(Obj2DocIface o2d) { if (o2d != null) obj2DocList.add(o2d); @@ -133,6 +104,14 @@ public class LuceneIndexer implements IndexerIface { } searchers.add( s ); } + + @Override + public synchronized void prepareForRebuild() throws IndexingException { + if( this.indexing ) + log.error("Only an update will be performed, must call prepareForRebuild() before startIndexing()"); + else + this.fullRebuild = true; + } /** * Checks to see if indexing is currently happening. @@ -143,12 +122,12 @@ public class LuceneIndexer implements IndexerIface { public synchronized void startIndexing() throws IndexingException{ while( indexing ){ //wait for indexing to end. - log.info("LuceneIndexer.startIndexing() waiting..."); + log.debug("LuceneIndexer.startIndexing() waiting..."); try{ wait(); } catch(InterruptedException ex){} } checkStartPreconditions(); try { - log.info("Starting to index"); + log.debug("Starting to index"); if( this.fullRebuild ){ String offLineDir = getOffLineBuildDir(); this.currentOffLineDir = offLineDir; @@ -166,16 +145,6 @@ public class LuceneIndexer implements IndexerIface { } } - private void checkStartPreconditions() { - if( this.writer != null ) - log.info("it is expected that the writer would " + - "be null but it isn't"); - if( this.currentOffLineDir != null) - log.info("it is expected that the current" + - "OffLineDir would be null but it is " + currentOffLineDir); - if( indexing ) - log.info("indexing should not be set to true just yet"); - } public synchronized void endIndexing() { if( ! indexing ){ @@ -184,7 +153,7 @@ public class LuceneIndexer implements IndexerIface { } try { urisIndexed = null; - log.info("ending index"); + log.debug("ending index"); if( writer != null ) writer.optimize(); @@ -208,23 +177,15 @@ public class LuceneIndexer implements IndexerIface { notifyAll(); } } - - private synchronized void bringRebuildOnLine() { - closeWriter(); - deleteDir(new File(liveIndexDir)); - File offLineDir = new File(currentOffLineDir); - File liveDir = new File(liveIndexDir); - boolean success = offLineDir.renameTo( liveDir ); - if( ! success ) - log.error("could not move off line index at " - + offLineDir.getAbsolutePath() + " to live index directory " - + liveDir.getAbsolutePath()); + + public void setLuceneIndexFactory(LuceneIndexFactory lif) { + luceneIndexFactory = lif; + } + + public synchronized Analyzer getAnalyzer(){ + return analyzer; } - - public synchronized Analyzer getAnalyzer(){ - return analyzer; - } - + /** * Indexes an object. startIndexing() must be called before this method * to setup the modifier. @@ -314,6 +275,18 @@ public class LuceneIndexer implements IndexerIface { writer = null; } + private synchronized void bringRebuildOnLine() { + closeWriter(); + deleteDir(new File(liveIndexDir)); + File offLineDir = new File(currentOffLineDir); + File liveDir = new File(liveIndexDir); + boolean success = offLineDir.renameTo( liveDir ); + if( ! success ) + log.error("could not move off line index at " + + offLineDir.getAbsolutePath() + " to live index directory " + + liveDir.getAbsolutePath()); + } + private synchronized String getOffLineBuildDir(){ File baseDir = new File(baseIndexDir); baseDir.mkdirs(); @@ -354,18 +327,100 @@ public class LuceneIndexer implements IndexerIface { } // The directory is now empty so delete it return dir.delete(); + } + + private void checkStartPreconditions() { + if( this.writer != null ) + log.error("it is expected that the writer would " + + "be null but it isn't"); + if( this.currentOffLineDir != null) + log.error("it is expected that the current" + + "OffLineDir would be null but it is " + currentOffLineDir); + if( indexing ) + log.error("indexing should not be set to true just yet"); } - public void setLuceneIndexFactory(LuceneIndexFactory lif) { - luceneIndexFactory = lif; + + private synchronized void makeEmptyIndexIfNone() throws IOException { + if( !liveIndexExists() ){ + log.debug("Making new index dir and initially empty lucene index at " + liveIndexDir); + closeWriter(); + makeIndexDirs(); + writer = new IndexWriter(liveIndexDir,analyzer,true,MAX_FIELD_LENGTH); + closeWriter(); + } + } + + private synchronized void makeIndexDirs() throws IOException{ + File baseDir = new File(baseIndexDir); + if( ! baseDir.exists()) + baseDir.mkdirs(); + + File dir = new File(liveIndexDir); + if( ! dir.exists() ) + dir.mkdirs(); + } + + private boolean liveIndexExists(){ + return indexExistsAt(liveIndexDir); } - @Override - public synchronized void prepareForRebuild() throws IndexingException { - if( this.indexing ) - log.error("Only an update will be performed, must call prepareForRebuild() before startIndexing()"); - else - this.fullRebuild = true; + private boolean indexExistsAt(String dirName){ + Directory fsDir = null; + try{ + fsDir = FSDirectory.getDirectory(dirName); + return IndexReader.indexExists(fsDir); + }catch(Exception ex){ + return false; + }finally{ + try{ + if( fsDir != null) + fsDir.close(); + }catch(Exception ex){} + } } - + + /* + * In needed, create new 1.2 style index directories and copy old index to new dirs. + */ + private synchronized void updateTo1p2() throws IOException { + //check if live index directory exists, don't check for a lucene index. + File liveDirF = new File(this.liveIndexDir); + if( ! liveDirF.exists() && indexExistsAt(baseIndexDir)){ + log.info("Updating to vitro 1.2 search index directory structure"); + makeIndexDirs(); + File live = new File(liveIndexDir); + + //copy existing index to live index directory + File baseDir = new File(baseIndexDir); + for( File file : baseDir.listFiles()){ + if( ! file.isDirectory() && ! live.getName().equals(file.getName() ) ){ + FileUtils.copyFile(file, new File(liveIndexDir+File.separator+file.getName())); + boolean success = file.delete(); + if( ! success ) + log.error("could not delete "+ baseIndexDir + file.getName()); + } + } + log.info("Done updating to vitro 1.2 search index directory structure."); + } + } + + public boolean isIndexEmpty() throws CorruptIndexException, IOException{ + TermDocs td = null; + try{ + IndexReader reader = IndexReader.open(new File( this.liveIndexDir )); + td = reader.termDocs(new Term( Entity2LuceneDoc.VitroLuceneTermNames.DOCID) ); + if( td.next() ) + return false; + else + return true; + }finally{ + if (td != null) td.close(); + } + } + + public boolean isIndexCorroupt(){ + //if it is clear it out but don't rebuild. + return false; + } } diff --git a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java index 600e66295..cdc06eabb 100644 --- a/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java +++ b/webapp/src/edu/cornell/mannlib/vitro/webapp/search/lucene/LuceneSetup.java @@ -98,6 +98,14 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { context.setAttribute(LuceneIndexer.class.getName(), indexer); indexer.setLuceneIndexFactory(lif); + if( indexer.isIndexCorroupt() ){ + log.info("index is corrupt, requesting rebuild"); + } + if( indexer.isIndexEmpty() ){ + log.info("index is empty, requesting rebuild"); + sce.getServletContext().setAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP, Boolean.TRUE); + } + // This is where the builder gets the list of places to try to // get objects to index. It is filtered so that non-public text // does not get into the search index. @@ -130,20 +138,22 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { if( (Boolean)sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) instanceof Boolean && (Boolean)sce.getServletContext().getAttribute(INDEX_REBUILD_REQUESTED_AT_STARTUP) ){ - builder.doIndexRebuild(); - log.info("Rebuild of search index required before startup."); + log.info("Rebuild of search index required before startup."); + builder.doIndexRebuild(); + Thread.currentThread().sleep(500); int n = 0; - while( builder.isIndexing() ){ - Thread.currentThread().sleep(500); + while( builder.isReindexRequested() || builder.isIndexing() ){ + n++; if( n % 20 == 0 ) //output message every 10 sec. log.info("Still rebulding search index"); - } - log.info("Search index rebuild completed."); + Thread.currentThread().sleep(500); + } } log.debug("**** End of " + this.getClass().getName() + ".contextInitialized()"); } catch (Throwable t) { log.error("***** Error setting up Lucene search *****", t); + throw new RuntimeException("Startup of vitro application was prevented by errors in the lucene configuration"); } } @@ -153,7 +163,9 @@ public class LuceneSetup implements javax.servlet.ServletContextListener { public void contextDestroyed(ServletContextEvent sce) { log.debug("**** Running " + this.getClass().getName() + ".contextDestroyed()"); IndexBuilder builder = (IndexBuilder) sce.getServletContext().getAttribute(IndexBuilder.class.getName()); - builder.killIndexingThread(); + if( builder != null){ + builder.killIndexingThread(); + } } /**