Adding off line lucene index rebuild NIHVIVO-1483

This commit is contained in:
bdc34 2011-01-13 01:35:24 +00:00
parent f312a15571
commit daedd8a961
8 changed files with 166 additions and 159 deletions

View file

@ -89,12 +89,10 @@ public class AutocompleteController extends FreemarkerHttpServlet{
int maxHitSize = defaultMaxSearchSize;
String indexDir = getIndexDir(getServletContext());
String qtxt = vreq.getParameter(QUERY_PARAMETER_NAME);
Analyzer analyzer = getAnalyzer(getServletContext());
Query query = getQuery(vreq, portalFlag, analyzer, indexDir, qtxt);
Query query = getQuery(vreq, portalFlag, analyzer, qtxt);
log.debug("query for '" + qtxt +"' is " + query.toString());
if (query == null ) {
@ -161,13 +159,13 @@ public class AutocompleteController extends FreemarkerHttpServlet{
}
}
private String getIndexDir(ServletContext servletContext) throws SearchException {
Object obj = servletContext.getAttribute(LuceneSetup.INDEX_DIR);
if( obj == null || !(obj instanceof String) )
throw new SearchException("Could not get IndexDir for lucene index");
else
return (String)obj;
}
// private String getIndexDir(ServletContext servletContext) throws SearchException {
// Object obj = servletContext.getAttribute(LuceneSetup.INDEX_DIR);
// if( obj == null || !(obj instanceof String) )
// throw new SearchException("Could not get IndexDir for lucene index");
// else
// return (String)obj;
// }
private Analyzer getAnalyzer(ServletContext servletContext) throws SearchException {
Object obj = servletContext.getAttribute(LuceneSetup.ANALYZER);
@ -178,7 +176,7 @@ public class AutocompleteController extends FreemarkerHttpServlet{
}
private Query getQuery(VitroRequest request, PortalFlag portalState,
Analyzer analyzer, String indexDir, String querystr) throws SearchException{
Analyzer analyzer, String querystr) throws SearchException{
Query query = null;
try {

View file

@ -16,7 +16,6 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.servlet.ServletConfig;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServletRequest;
@ -38,8 +37,6 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import edu.cornell.mannlib.vitro.webapp.beans.DataProperty;
import edu.cornell.mannlib.vitro.webapp.beans.DataPropertyStatement;
@ -71,7 +68,6 @@ import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQuery;
import edu.cornell.mannlib.vitro.webapp.search.beans.VitroQueryFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.Entity2LuceneDoc;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexFactory;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneIndexer;
import edu.cornell.mannlib.vitro.webapp.search.lucene.LuceneSetup;
import edu.cornell.mannlib.vitro.webapp.search.lucene.SimpleLuceneHighlighter;
import edu.cornell.mannlib.vitro.webapp.utils.FlagMathUtils;
@ -564,18 +560,10 @@ public class FreemarkerPagedSearchController extends FreemarkerHttpServlet imple
return typesInHits;
}
private String getIndexDir(ServletContext servletContext) throws SearchException {
Object obj = servletContext.getAttribute(LuceneSetup.INDEX_DIR);
if( obj == null || !(obj instanceof String) )
throw new SearchException("Could not get IndexDir for luecene index");
else
return (String)obj;
}
private Analyzer getAnalyzer(ServletContext servletContext) throws SearchException {
Object obj = servletContext.getAttribute(LuceneSetup.ANALYZER);
if( obj == null || !(obj instanceof Analyzer) )
throw new SearchException("Could not get anlyzer");
throw new SearchException("Could not get analyzer");
else
return (Analyzer)obj;
}
@ -720,22 +708,6 @@ public class FreemarkerPagedSearchController extends FreemarkerHttpServlet imple
}
}
private synchronized IndexSearcher getIndexSearcher(String indexDir) {
if( searcher == null ){
try {
Directory fsDir = FSDirectory.getDirectory(indexDir);
searcher = new IndexSearcher(fsDir);
} catch (IOException e) {
log.error("LuceneSearcher: could not make indexSearcher "+e);
log.error("It is likely that you have not made a directory for the lucene index. "+
"Create the directory indicated in the error and set permissions/ownership so"+
" that the tomcat server can read/write to it.");
//The index directory is created by LuceneIndexer.makeNewIndex()
}
}
return searcher;
}
private List<Individual> highlightBeans(List<Individual> beans,
DataPropertyDao dpDao, ObjectPropertyDao opDao, VitroHighlighter highlighter) {
if( beans == null ){

View file

@ -280,13 +280,14 @@ public class IndexBuilder {
* to false, and a check is made before adding, it will work fine; but
* checking if an object is on the index is slow.
*/
private void doBuild(List sourceIterators, Collection<Individual> deletes, boolean wipeIndexFirst, boolean newDocs ){
private void doBuild(List sourceIterators, Collection<Individual> deletes, boolean forceNewIndex, boolean newDocs ){
try {
if( forceNewIndex )
indexer.prepareForRebuild();
indexer.startIndexing();
if( wipeIndexFirst )
indexer.clearIndex();
else{
if( ! forceNewIndex ){
for(Individual deleteMe : deletes ){
indexer.removeFromIndex(deleteMe);
}

View file

@ -48,11 +48,7 @@ public interface IndexerIface {
*/
public void removeFromIndex(Individual ind) throws IndexingException;
/**
* Removes all documents from the index.
* @throws IndexingException
*/
public void clearIndex()throws IndexingException;
public void prepareForRebuild() throws IndexingException;
public void startIndexing() throws IndexingException;
public void endIndexing();

View file

@ -2,6 +2,7 @@
package edu.cornell.mannlib.vitro.webapp.search.lucene;
import java.io.File;
import java.io.IOException;
import java.util.List;
@ -18,10 +19,16 @@ import edu.cornell.mannlib.vitro.webapp.search.SearchException;
public class LuceneIndexFactory {
IndexSearcher searcher = null;
String baseIndexDirName = null;
private static final Log log = LogFactory.getLog(LuceneIndexFactory.class.getName());
public static final String LUCENE_INDEX_FACTORY= "LuceneIndexFactory";
public LuceneIndexFactory(String baseIndexDirName){
this.baseIndexDirName = baseIndexDirName;
}
/**
* Get a lucene IndexSearch. This may return null.
*/
@ -31,10 +38,6 @@ public class LuceneIndexFactory {
public static LuceneIndexFactory getLuceneIndexFactoryFromContext(ServletContext context){
Object obj = context.getAttribute(LUCENE_INDEX_FACTORY);
if( obj == null ){
setup(context);
obj = context.getAttribute(LUCENE_INDEX_FACTORY);
}
if( obj == null ){
log.error("cannot get LuceneIndexFactory from context. Search is not setup correctly");
return null;
@ -48,11 +51,13 @@ public class LuceneIndexFactory {
}
public static void setup(ServletContext context){
public static LuceneIndexFactory setup(ServletContext context, String baseIndexDirName){
LuceneIndexFactory lif = (LuceneIndexFactory)context.getAttribute(LuceneIndexFactory.LUCENE_INDEX_FACTORY);
if( lif == null ){
context.setAttribute(LuceneIndexFactory.LUCENE_INDEX_FACTORY, new LuceneIndexFactory());
lif = new LuceneIndexFactory(baseIndexDirName);
context.setAttribute(LuceneIndexFactory.LUCENE_INDEX_FACTORY, lif);
}
return lif;
}
/**
@ -68,16 +73,17 @@ public class LuceneIndexFactory {
private synchronized IndexSearcher innerGetIndexSearcher(ServletContext context) {
if (searcher == null ) {
String indexDir = getIndexDir( context );
if( indexDir != null ){
String liveDir = getLiveIndexDir( context );
if( liveDir != null ){
try {
Directory fsDir = FSDirectory.getDirectory(indexDir);
Directory fsDir = FSDirectory.getDirectory(liveDir);
searcher = new IndexSearcher(fsDir);
} catch (IOException e) {
log.error("could not make indexSearcher " + e);
String base = getBaseIndexDir();
log.error("could not make IndexSearcher " + e);
log.error("It is likely that you have not made a directory for the lucene index. "
+ "Create the directory indicated in the error and set permissions/ownership so"
+ " that the tomcat server can read and write to it.");
+ "Create the directory " + base + " and set permissions/ownership so"
+ " that the tomcat process can read and write to it.");
}
}else{
log.error("Could not create IndexSearcher because index directory was null. It may be that the LucenSetup.indexDir is " +
@ -87,16 +93,20 @@ public class LuceneIndexFactory {
return searcher;
}
private String getIndexDir(ServletContext servletContext){
Object obj = servletContext.getAttribute(LuceneSetup.INDEX_DIR);
if (obj == null ){
log.error("could not find " + LuceneSetup.INDEX_DIR + " in context. Search is not configured correctly.");
return null;
}else if ( !(obj instanceof String) ){
log.error( LuceneSetup.INDEX_DIR + " from context was not a String. Search is not configured correctly.");
return null;
}else
return (String) obj;
protected String getBaseIndexDir(){
if( this.baseIndexDirName == null )
log.error("LucenIndexFactory was not setup correctly, it must have a value for baseIndexDir");
return this.baseIndexDirName;
}
protected String getLiveIndexDir(ServletContext servletContext){
String base = getBaseIndexDir();
if( base == null )
return null;
else
return base + File.separator + "live";
}
}

View file

@ -37,13 +37,17 @@ public class LuceneIndexer implements IndexerIface {
private final static Log log = LogFactory.getLog(LuceneIndexer.class.getName());
LinkedList<Obj2DocIface> obj2DocList = new LinkedList<Obj2DocIface>();
String indexDir = null;
String baseIndexDir = null;
String liveIndexDir = null;
Analyzer analyzer = null;
List<Searcher> searchers = Collections.EMPTY_LIST;
IndexWriter writer = null;
boolean indexing = false;
boolean fullRebuild = false;
HashSet<String> urisIndexed;
private LuceneIndexFactory luceneIndexFactory;
private String currentOffLineDir;
//JODA timedate library can use java date format strings.
//http://java.sun.com/j2se/1.3/docs/api/java/text/SimpleDateFormat.html
@ -70,8 +74,9 @@ public class LuceneIndexer implements IndexerIface {
private static final IndexWriter.MaxFieldLength MAX_FIELD_LENGTH =
IndexWriter.MaxFieldLength.UNLIMITED;
public LuceneIndexer(String indexDir, List<Searcher> searchers, Analyzer analyzer ) throws IOException{
this.indexDir = indexDir;
public LuceneIndexer(String baseIndexDir, String liveIndexDir, List<Searcher> searchers, Analyzer analyzer ) throws IOException{
this.baseIndexDir = baseIndexDir;
this.liveIndexDir = liveIndexDir;
this.analyzer = analyzer;
if( searchers != null )
this.searchers = searchers;
@ -79,15 +84,26 @@ public class LuceneIndexer implements IndexerIface {
}
private synchronized void makeIndexIfNone() throws IOException {
if( !indexExists( indexDir ) )
makeNewIndex();
if( !liveIndexExists() ){
log.debug("Making new index dir and initially empty lucene index at " + liveIndexDir);
closeWriter();
File baseDir = new File(baseIndexDir);
baseDir.mkdirs();
File dir = new File(liveIndexDir);
dir.mkdirs();
writer = new IndexWriter(liveIndexDir,analyzer,true,MAX_FIELD_LENGTH);
closeWriter();
}
}
private boolean indexExists(String dir){
private boolean liveIndexExists(){
Directory fsDir = null;
IndexSearcher isearcher = null ;
try{
fsDir = FSDirectory.getDirectory(indexDir);
fsDir = FSDirectory.getDirectory(liveIndexDir);
isearcher = new IndexSearcher(fsDir);
return true;
}catch(Exception ex){
@ -102,10 +118,6 @@ public class LuceneIndexer implements IndexerIface {
}
}
public synchronized void setIndexDir(String dirName) {
indexDir = dirName;
}
public synchronized void addObj2Doc(Obj2DocIface o2d) {
if (o2d != null)
obj2DocList.add(o2d);
@ -124,7 +136,6 @@ public class LuceneIndexer implements IndexerIface {
/**
* Checks to see if indexing is currently happening.
* @return
*/
public synchronized boolean isIndexing(){
return indexing;
@ -135,26 +146,37 @@ public class LuceneIndexer implements IndexerIface {
log.info("LuceneIndexer.startIndexing() waiting...");
try{ wait(); } catch(InterruptedException ex){}
}
checkStartPreconditions();
try {
log.info("Starting to index");
if( writer == null )
writer =
new IndexWriter(indexDir,analyzer,false, MAX_FIELD_LENGTH);
if( this.fullRebuild ){
String offLineDir = getOffLineBuildDir();
this.currentOffLineDir = offLineDir;
writer = new IndexWriter(offLineDir, analyzer, true, MAX_FIELD_LENGTH);
}else{
writer = new IndexWriter(this.liveIndexDir, analyzer, false, MAX_FIELD_LENGTH);
}
indexing = true;
urisIndexed = new HashSet<String>();
} catch(Throwable ioe){
try{
makeNewIndex();
indexing = true;
}catch(Throwable ioe2){
throw new IndexingException("LuceneIndexer.startIndexing() unable " +
"to make indexModifier " + ioe2.getMessage());
}
} catch(Throwable th){
throw new IndexingException("startIndexing() unable " +
"to make IndexWriter:" + th.getMessage());
}finally{
notifyAll();
}
}
private void checkStartPreconditions() {
if( this.writer != null )
log.info("it is expected that the writer would " +
"be null but it isn't");
if( this.currentOffLineDir != null)
log.info("it is expected that the current" +
"OffLineDir would be null but it is " + currentOffLineDir);
if( indexing )
log.info("indexing should not be set to true just yet");
}
public synchronized void endIndexing() {
if( ! indexing ){
notifyAll();
@ -166,6 +188,9 @@ public class LuceneIndexer implements IndexerIface {
if( writer != null )
writer.optimize();
if( this.fullRebuild )
bringRebuildOnLine();
//close the searcher so it will find the newly indexed documents
for( Searcher s : searchers){
s.close();
@ -177,12 +202,25 @@ public class LuceneIndexer implements IndexerIface {
log.error("LuceneIndexer.endIndexing() - "
+ "unable to optimize lucene index: \n" + e);
}finally{
closeModifier();
fullRebuild = false;
closeWriter();
indexing = false;
notifyAll();
}
}
private synchronized void bringRebuildOnLine() {
closeWriter();
deleteDir(new File(liveIndexDir));
File offLineDir = new File(currentOffLineDir);
File liveDir = new File(liveIndexDir);
boolean success = offLineDir.renameTo( liveDir );
if( ! success )
log.error("could not move off line index at "
+ offLineDir.getAbsolutePath() + " to live index directory "
+ liveDir.getAbsolutePath());
}
public synchronized Analyzer getAnalyzer(){
return analyzer;
}
@ -254,41 +292,14 @@ public class LuceneIndexer implements IndexerIface {
}
}
/**
* clear the index by deleting the directory and make a new empty index.
*/
public synchronized void clearIndex() throws IndexingException{
log.debug("Clearing the index at "+indexDir);
closeModifier();
deleteDir(new File(indexDir));
try {
makeNewIndex();
for(Searcher s : searchers){
s.close();
}
//this is the call that replaces Searcher.close()
luceneIndexFactory.forceNewIndexSearcher();
} catch (IOException e) {
throw new IndexingException(e.getMessage());
}
notifyAll();
}
/**
* This will make a new directory and create a lucene index in it.
*/
private synchronized void makeNewIndex() throws IOException{
log.debug("Making new index dir and initially empty lucene index at " + indexDir);
closeModifier();
File dir = new File(indexDir);
dir.mkdirs();
//This will wipe out an existing index because of the true flag
writer = new IndexWriter(indexDir,analyzer,true,MAX_FIELD_LENGTH);
}
private synchronized void closeModifier(){
private synchronized void closeWriter(){
if( writer != null )try{
writer.commit();
writer.close();
@ -303,10 +314,23 @@ public class LuceneIndexer implements IndexerIface {
writer = null;
}
private synchronized String getOffLineBuildDir(){
File baseDir = new File(baseIndexDir);
baseDir.mkdirs();
File tmpDir = new File( baseIndexDir + File.separator + "tmp" );
tmpDir.mkdir();
File offLineBuildDir = new File( baseIndexDir + File.separator + "tmp" + File.separator + "offLineRebuild" + System.currentTimeMillis());
offLineBuildDir.mkdir();
String dirName = offLineBuildDir.getAbsolutePath();
if( ! dirName.endsWith(File.separator) )
dirName = dirName + File.separator;
return dirName;
}
public long getModified() {
long rv = 0;
try{
FSDirectory d = FSDirectory.getDirectory(indexDir);
FSDirectory d = FSDirectory.getDirectory(liveIndexDir);
rv = IndexReader.lastModified(d);
}catch(IOException ex){
log.error("LuceneIndexer.getModified() - could not get modified time "+ ex);
@ -336,4 +360,12 @@ public class LuceneIndexer implements IndexerIface {
luceneIndexFactory = lif;
}
@Override
public synchronized void prepareForRebuild() throws IndexingException {
if( this.indexing )
log.error("Only an update will be performed, must call prepareForRebuild() before startIndexing()");
else
this.fullRebuild = true;
}
}

View file

@ -32,11 +32,9 @@ import edu.cornell.mannlib.vitro.webapp.dao.filtering.WebappDaoFactoryFiltering;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilterUtils;
import edu.cornell.mannlib.vitro.webapp.dao.filtering.filters.VitroFilters;
import edu.cornell.mannlib.vitro.webapp.dao.jena.ModelContext;
import edu.cornell.mannlib.vitro.webapp.dao.jena.OntModelSelector;
import edu.cornell.mannlib.vitro.webapp.dao.jena.SearchReindexingListener;
import edu.cornell.mannlib.vitro.webapp.search.beans.ObjectSourceIface;
import edu.cornell.mannlib.vitro.webapp.search.beans.ProhibitedFromSearch;
import edu.cornell.mannlib.vitro.webapp.search.beans.Searcher;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexBuilder;
import edu.cornell.mannlib.vitro.webapp.web.DisplayVocabulary;
@ -63,8 +61,7 @@ import edu.cornell.mannlib.vitro.webapp.web.DisplayVocabulary;
*
*/
public class LuceneSetup implements javax.servlet.ServletContextListener {
private static String indexDir = null;
private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
private static final Log log = LogFactory.getLog(LuceneSetup.class.getName());
/**
* Gets run to set up DataSource when the webapp servlet context gets
@ -75,8 +72,8 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
ServletContext context = sce.getServletContext();
log.debug("**** Running " + this.getClass().getName() + ".contextInitialized()");
indexDir = getIndexDirName();
log.info("Directory of full text index: " + indexDir);
String baseIndexDir = getBaseIndexDirName();
log.info("Base directory of lucene full text index: " + baseIndexDir);
setBoolMax();
@ -87,17 +84,18 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
context.setAttribute(SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
//This is where to get a LucenIndex from. The indexer will
//need to reference this to notify it of updates to the index
context.setAttribute(BASE_INDEX_DIR, baseIndexDir);
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, baseIndexDir);
String liveIndexDir = lif.getLiveIndexDir(context);
// Here we want to put the LuceneIndex object into the application scope.
// This will attempt to create a new directory and empty index if there is none.
LuceneIndexer indexer = new LuceneIndexer(indexDir, null, getAnalyzer());
LuceneIndexer indexer = new LuceneIndexer(getBaseIndexDirName(),liveIndexDir, null, getAnalyzer());
context.setAttribute(ANALYZER, getAnalyzer());
context.setAttribute(INDEX_DIR, indexDir);
indexer.addObj2Doc(new Entity2LuceneDoc());
context.setAttribute(LuceneIndexer.class.getName(), indexer);
//This is where to get a LucenIndex from. The indexer will
//need to reference this to notify it of updates to the index
LuceneIndexFactory lif = LuceneIndexFactory.getLuceneIndexFactoryFromContext(context);
indexer.setLuceneIndexFactory(lif);
// This is where the builder gets the list of places to try to
@ -182,7 +180,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
* @throws IOException
* if the directory doesn't exist and we fail to create it.
*/
private String getIndexDirName()
private String getBaseIndexDirName()
throws IOException {
String dirName = ConfigurationProperties
.getProperty("LuceneSetup.indexDir");
@ -221,7 +219,7 @@ public class LuceneSetup implements javax.servlet.ServletContextListener {
public static final String INDEX_REBUILD_REQUESTED_AT_STARTUP = "LuceneSetup.indexRebuildRequestedAtStarup";
public static final String ANALYZER= "lucene.analyzer";
public static final String INDEX_DIR = "lucene.indexDir";
public static final String BASE_INDEX_DIR = "lucene.indexDir";
public static final String SEARCH_DATAPROPERTY_BLACKLIST =
"search.dataproperty.blacklist";
public static final String SEARCH_OBJECTPROPERTY_BLACKLIST =

View file

@ -75,15 +75,15 @@ public class LuceneSetupCJK implements javax.servlet.ServletContextListener {
objectPropertyBlacklist.add("http://www.w3.org/2002/07/owl#differentFrom");
context.setAttribute(LuceneSetup.SEARCH_OBJECTPROPERTY_BLACKLIST, objectPropertyBlacklist);
//here we want to put the LuceneIndex object into the application scope
LuceneIndexer indexer = new LuceneIndexer(indexDir, null, getAnalyzer());
context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer());
context.setAttribute(LuceneSetup.INDEX_DIR, indexDir);
indexer.addObj2Doc(new Entity2LuceneDoc());
//This is where to get a LucenIndex from. The indexer will
//need to reference this to notify it of updates to the index
LuceneIndexFactory lif = LuceneIndexFactory.getLuceneIndexFactoryFromContext(context);
LuceneIndexFactory lif = LuceneIndexFactory.setup(context, indexDir);
String liveIndexDir = lif.getLiveIndexDir(context);
//here we want to put the LuceneIndex object into the application scope
LuceneIndexer indexer = new LuceneIndexer(indexDir, liveIndexDir, null, getAnalyzer());
context.setAttribute(LuceneSetup.ANALYZER, getAnalyzer());
indexer.addObj2Doc(new Entity2LuceneDoc());
indexer.setLuceneIndexFactory(lif);
//This is where the builder gets the list of places to try to