Full reindex should remove non-existent individuals. NIHVIVO-2699

This commit is contained in:
briancaruso 2011-07-13 20:32:25 +00:00
parent 3fa716dbd1
commit a325e885f1
4 changed files with 58 additions and 85 deletions

View file

@ -243,18 +243,19 @@ public class IndexBuilder extends Thread {
indexer.startIndexing();
reindexRequested = false;
if( ! forceNewIndex ){
if( ! forceNewIndex ){
//if this is not a full reindex, deleted indivdiuals
//need to be removed from the index
for(String deleteMe : deletes ){
try{
indexer.removeFromIndex(deleteMe);
}catch(Exception ex){
log.debug(ex.getMessage());
}catch(Exception ex){
log.debug("could not remove individual " + deleteMe
+ " from index, usually this is harmless",ex);
}
}
}
}
indexUriList(updates, newDocs);
} catch (AbortIndexing abort){
if( log != null)
@ -313,10 +314,7 @@ public class IndexBuilder extends Thread {
}
IndexWorkerThread.resetCount();
}
}
/* maybe ObjectSourceIface should be replaced with just an iterator. */
protected class UriToIndividualIterator implements Iterator<Individual>{

View file

@ -21,12 +21,8 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
*/
public interface IndexerIface {
public void addObj2Doc(Obj2DocIface o2d);
public List<Obj2DocIface> getObj2DocList();
/**
* Check if indexing is currently running in a different thread.
* @return
* Check if indexing is currently running.
*/
public boolean isIndexing();

View file

@ -98,7 +98,7 @@ public class IndividualToSolrDocument {
addThumbnailExistance(ind, doc);
//time of index in millis past epoc
doc.addField(term.INDEXEDTIME,(new DateTime()).getMillis());
doc.addField(term.INDEXEDTIME, new Long( (new DateTime()).getMillis() ) );
if(!prohibited){
addAllText( ind, doc, classPublicNames, objectNames );

View file

@ -21,6 +21,7 @@ import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
public class SolrIndexer implements IndexerIface {
private final static Log log = LogFactory.getLog(SolrIndexer.class);
@ -29,6 +30,25 @@ public class SolrIndexer implements IndexerIface {
protected HashSet<String> urisIndexed;
protected IndividualToSolrDocument individualToSolrDoc;
/**
* System is shutting down if true.
*/
protected boolean shutdownRequested = false;
/**
* This records when a full re-index starts so that once it is done
* all the documents on the Solr service that are earlier than the
* reindexStart can be removed.
*/
protected long reindexStart = 0L;
/**
* If true, then a full index rebuild was requested and reindexStart
* will be used to determine what documents to remove from the index
* once the re-index is complete.
*/
protected boolean doingFullIndexRebuild = false;
public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
this.server = server;
this.individualToSolrDoc = indToDoc;
@ -56,57 +76,18 @@ public class SolrIndexer implements IndexerIface {
solrDoc = individualToSolrDoc.translate(ind);
if( solrDoc != null){
//sending each doc individually is inefficient
// Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
// docs.add( solrDoc );
UpdateResponse res = server.add( solrDoc );
log.debug("response after adding docs to server: "+ res);
}else{
log.debug("removing from index " + ind.getURI());
//TODO: how do we delete document?
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
log.debug("removing from index " + ind.getURI());
removeFromIndex(ind.getURI());
}
}
} catch (IOException ex) {
throw new IndexingException(ex.getMessage());
} catch (SolrServerException ex) {
throw new IndexingException(ex.getMessage());
}
if( ! indexing )
throw new IndexingException("SolrIndexer: must call " +
"startIndexing() before index().");
if( ind == null )
log.debug("Individual to index was null, ignoring.");
try{
if( urisIndexed.contains(ind.getURI()) ){
log.debug("already indexed " + ind.getURI() );
return;
}else{
SolrInputDocument solrDoc = null;
synchronized(this){
urisIndexed.add(ind.getURI());
}
log.debug("indexing " + ind.getURI());
solrDoc = individualToSolrDoc.translate(ind);
if( solrDoc != null){
UpdateResponse res = server.add( solrDoc );
log.debug("response after adding docs to server: "+ res);
}else{
log.debug("removing from index " + ind.getURI());
//TODO: how do we delete document?
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
}
}
} catch (IOException ex) {
throw new IndexingException(ex.getMessage());
} catch (SolrServerException ex) {
throw new IndexingException(ex.getMessage());
}
}
}
@Override
@ -116,7 +97,8 @@ public class SolrIndexer implements IndexerIface {
@Override
public void prepareForRebuild() throws IndexingException {
// TODO Auto-generated method stub
reindexStart = System.currentTimeMillis();
doingFullIndexRebuild = true;
}
@Override
@ -134,30 +116,23 @@ public class SolrIndexer implements IndexerIface {
}
@Override
public synchronized void startIndexing() throws IndexingException {
while( indexing ){ //wait for indexing to end.
log.debug("SolrIndexer.startIndexing() waiting...");
try{ wait(); } catch(InterruptedException ex){}
public synchronized void startIndexing() throws IndexingException {
if( indexing)
log.debug("SolrIndexer.startIndexing() Indexing in progress, waiting for completion...");
while( indexing && ! shutdownRequested ){ //wait for indexing to end.
try{ wait( 250 ); }
catch(InterruptedException ex){}
}
log.debug("Starting to index");
log.debug("Starting to index");
indexing = true;
urisIndexed = new HashSet<String>();
notifyAll();
}
public synchronized void addObj2Doc(Obj2DocIface o2d) {
//no longer used
}
public synchronized List<Obj2DocIface> getObj2DocList() {
//no longer used
return null;
}
}
@Override
public void abortIndexingAndCleanUp() {
shutdownRequested = true;
try{
server.commit();
}catch(SolrServerException e){
@ -187,15 +162,25 @@ public class SolrIndexer implements IndexerIface {
} catch(IOException e){
log.error("Could not commit to solr server", e);
}
// try {
// server.optimize();
// } catch (Exception e) {
// log.error("Could not optimize solr server", e);
// }
if( doingFullIndexRebuild ){
removeDocumentsFromBeforeRebuild( );
}
indexing = false;
notifyAll();
}
protected void removeDocumentsFromBeforeRebuild(){
try {
server.deleteByQuery("indexedTime:[ * TO " + reindexStart + " ]");
server.commit();
} catch (SolrServerException e) {
log.error("could not delete documents from before rebuild.",e);
} catch (IOException e) {
log.error("could not delete documents from before rebuild.",e);
}
}
@Override
public long getModified() {
long modified = 0;
@ -211,7 +196,6 @@ public class SolrIndexer implements IndexerIface {
modified = (Long)docs.get(0).getFieldValue("indexedTime");
}
} catch (SolrServerException e) {
// TODO Auto-generated catch block
log.error(e,e);
}
@ -228,14 +212,9 @@ public class SolrIndexer implements IndexerIface {
return true;
}
} catch (SolrServerException e) {
// TODO Auto-generated catch block
log.error(e,e);
}
return false;
}
}