Full reindex should remove non-existent individuals. NIHVIVO-2699

This commit is contained in:
briancaruso 2011-07-13 20:32:25 +00:00
parent 3fa716dbd1
commit a325e885f1
4 changed files with 58 additions and 85 deletions

View file

@ -244,11 +244,12 @@ public class IndexBuilder extends Thread {
reindexRequested = false; reindexRequested = false;
if( ! forceNewIndex ){ if( ! forceNewIndex ){
//if this is not a full reindex, deleted indivdiuals
//need to be removed from the index
for(String deleteMe : deletes ){ for(String deleteMe : deletes ){
try{ try{
indexer.removeFromIndex(deleteMe); indexer.removeFromIndex(deleteMe);
}catch(Exception ex){ }catch(Exception ex){
log.debug(ex.getMessage());
log.debug("could not remove individual " + deleteMe log.debug("could not remove individual " + deleteMe
+ " from index, usually this is harmless",ex); + " from index, usually this is harmless",ex);
} }
@ -315,9 +316,6 @@ public class IndexBuilder extends Thread {
IndexWorkerThread.resetCount(); IndexWorkerThread.resetCount();
} }
/* maybe ObjectSourceIface should be replaced with just an iterator. */ /* maybe ObjectSourceIface should be replaced with just an iterator. */
protected class UriToIndividualIterator implements Iterator<Individual>{ protected class UriToIndividualIterator implements Iterator<Individual>{
private final Iterator<String> uris; private final Iterator<String> uris;

View file

@ -21,12 +21,8 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
*/ */
public interface IndexerIface { public interface IndexerIface {
public void addObj2Doc(Obj2DocIface o2d);
public List<Obj2DocIface> getObj2DocList();
/** /**
* Check if indexing is currently running in a different thread. * Check if indexing is currently running.
* @return
*/ */
public boolean isIndexing(); public boolean isIndexing();

View file

@ -98,7 +98,7 @@ public class IndividualToSolrDocument {
addThumbnailExistance(ind, doc); addThumbnailExistance(ind, doc);
//time of index in millis past epoc //time of index in millis past epoc
doc.addField(term.INDEXEDTIME,(new DateTime()).getMillis()); doc.addField(term.INDEXEDTIME, new Long( (new DateTime()).getMillis() ) );
if(!prohibited){ if(!prohibited){
addAllText( ind, doc, classPublicNames, objectNames ); addAllText( ind, doc, classPublicNames, objectNames );

View file

@ -21,6 +21,7 @@ import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface; import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface; import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
public class SolrIndexer implements IndexerIface { public class SolrIndexer implements IndexerIface {
private final static Log log = LogFactory.getLog(SolrIndexer.class); private final static Log log = LogFactory.getLog(SolrIndexer.class);
@ -29,6 +30,25 @@ public class SolrIndexer implements IndexerIface {
protected HashSet<String> urisIndexed; protected HashSet<String> urisIndexed;
protected IndividualToSolrDocument individualToSolrDoc; protected IndividualToSolrDocument individualToSolrDoc;
/**
* System is shutting down if true.
*/
protected boolean shutdownRequested = false;
/**
* This records when a full re-index starts so that once it is done
* all the documents on the Solr service that are earlier than the
* reindexStart can be removed.
*/
protected long reindexStart = 0L;
/**
* If true, then a full index rebuild was requested and reindexStart
* will be used to determine what documents to remove from the index
* once the re-index is complete.
*/
protected boolean doingFullIndexRebuild = false;
public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){ public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
this.server = server; this.server = server;
this.individualToSolrDoc = indToDoc; this.individualToSolrDoc = indToDoc;
@ -56,50 +76,11 @@ public class SolrIndexer implements IndexerIface {
solrDoc = individualToSolrDoc.translate(ind); solrDoc = individualToSolrDoc.translate(ind);
if( solrDoc != null){ if( solrDoc != null){
//sending each doc individually is inefficient
// Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
// docs.add( solrDoc );
UpdateResponse res = server.add( solrDoc ); UpdateResponse res = server.add( solrDoc );
log.debug("response after adding docs to server: "+ res); log.debug("response after adding docs to server: "+ res);
}else{ }else{
log.debug("removing from index " + ind.getURI()); log.debug("removing from index " + ind.getURI());
//TODO: how do we delete document? removeFromIndex(ind.getURI());
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
}
}
} catch (IOException ex) {
throw new IndexingException(ex.getMessage());
} catch (SolrServerException ex) {
throw new IndexingException(ex.getMessage());
}
if( ! indexing )
throw new IndexingException("SolrIndexer: must call " +
"startIndexing() before index().");
if( ind == null )
log.debug("Individual to index was null, ignoring.");
try{
if( urisIndexed.contains(ind.getURI()) ){
log.debug("already indexed " + ind.getURI() );
return;
}else{
SolrInputDocument solrDoc = null;
synchronized(this){
urisIndexed.add(ind.getURI());
}
log.debug("indexing " + ind.getURI());
solrDoc = individualToSolrDoc.translate(ind);
if( solrDoc != null){
UpdateResponse res = server.add( solrDoc );
log.debug("response after adding docs to server: "+ res);
}else{
log.debug("removing from index " + ind.getURI());
//TODO: how do we delete document?
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
} }
} }
} catch (IOException ex) { } catch (IOException ex) {
@ -116,7 +97,8 @@ public class SolrIndexer implements IndexerIface {
@Override @Override
public void prepareForRebuild() throws IndexingException { public void prepareForRebuild() throws IndexingException {
// TODO Auto-generated method stub reindexStart = System.currentTimeMillis();
doingFullIndexRebuild = true;
} }
@Override @Override
@ -135,9 +117,11 @@ public class SolrIndexer implements IndexerIface {
@Override @Override
public synchronized void startIndexing() throws IndexingException { public synchronized void startIndexing() throws IndexingException {
while( indexing ){ //wait for indexing to end. if( indexing)
log.debug("SolrIndexer.startIndexing() waiting..."); log.debug("SolrIndexer.startIndexing() Indexing in progress, waiting for completion...");
try{ wait(); } catch(InterruptedException ex){} while( indexing && ! shutdownRequested ){ //wait for indexing to end.
try{ wait( 250 ); }
catch(InterruptedException ex){}
} }
log.debug("Starting to index"); log.debug("Starting to index");
@ -146,18 +130,9 @@ public class SolrIndexer implements IndexerIface {
notifyAll(); notifyAll();
} }
public synchronized void addObj2Doc(Obj2DocIface o2d) {
//no longer used
}
public synchronized List<Obj2DocIface> getObj2DocList() {
//no longer used
return null;
}
@Override @Override
public void abortIndexingAndCleanUp() { public void abortIndexingAndCleanUp() {
shutdownRequested = true;
try{ try{
server.commit(); server.commit();
}catch(SolrServerException e){ }catch(SolrServerException e){
@ -187,15 +162,25 @@ public class SolrIndexer implements IndexerIface {
} catch(IOException e){ } catch(IOException e){
log.error("Could not commit to solr server", e); log.error("Could not commit to solr server", e);
} }
// try { if( doingFullIndexRebuild ){
// server.optimize(); removeDocumentsFromBeforeRebuild( );
// } catch (Exception e) { }
// log.error("Could not optimize solr server", e);
// }
indexing = false; indexing = false;
notifyAll(); notifyAll();
} }
protected void removeDocumentsFromBeforeRebuild(){
try {
server.deleteByQuery("indexedTime:[ * TO " + reindexStart + " ]");
server.commit();
} catch (SolrServerException e) {
log.error("could not delete documents from before rebuild.",e);
} catch (IOException e) {
log.error("could not delete documents from before rebuild.",e);
}
}
@Override @Override
public long getModified() { public long getModified() {
long modified = 0; long modified = 0;
@ -211,7 +196,6 @@ public class SolrIndexer implements IndexerIface {
modified = (Long)docs.get(0).getFieldValue("indexedTime"); modified = (Long)docs.get(0).getFieldValue("indexedTime");
} }
} catch (SolrServerException e) { } catch (SolrServerException e) {
// TODO Auto-generated catch block
log.error(e,e); log.error(e,e);
} }
@ -228,14 +212,9 @@ public class SolrIndexer implements IndexerIface {
return true; return true;
} }
} catch (SolrServerException e) { } catch (SolrServerException e) {
// TODO Auto-generated catch block
log.error(e,e); log.error(e,e);
} }
return false; return false;
} }
} }