Full reindex should remove non-existent individuals. NIHVIVO-2699
This commit is contained in:
parent
3fa716dbd1
commit
a325e885f1
4 changed files with 58 additions and 85 deletions
|
@ -243,18 +243,19 @@ public class IndexBuilder extends Thread {
|
||||||
indexer.startIndexing();
|
indexer.startIndexing();
|
||||||
reindexRequested = false;
|
reindexRequested = false;
|
||||||
|
|
||||||
if( ! forceNewIndex ){
|
if( ! forceNewIndex ){
|
||||||
|
//if this is not a full reindex, deleted indivdiuals
|
||||||
|
//need to be removed from the index
|
||||||
for(String deleteMe : deletes ){
|
for(String deleteMe : deletes ){
|
||||||
try{
|
try{
|
||||||
indexer.removeFromIndex(deleteMe);
|
indexer.removeFromIndex(deleteMe);
|
||||||
}catch(Exception ex){
|
}catch(Exception ex){
|
||||||
log.debug(ex.getMessage());
|
|
||||||
log.debug("could not remove individual " + deleteMe
|
log.debug("could not remove individual " + deleteMe
|
||||||
+ " from index, usually this is harmless",ex);
|
+ " from index, usually this is harmless",ex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
indexUriList(updates, newDocs);
|
indexUriList(updates, newDocs);
|
||||||
} catch (AbortIndexing abort){
|
} catch (AbortIndexing abort){
|
||||||
if( log != null)
|
if( log != null)
|
||||||
|
@ -313,10 +314,7 @@ public class IndexBuilder extends Thread {
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexWorkerThread.resetCount();
|
IndexWorkerThread.resetCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* maybe ObjectSourceIface should be replaced with just an iterator. */
|
/* maybe ObjectSourceIface should be replaced with just an iterator. */
|
||||||
protected class UriToIndividualIterator implements Iterator<Individual>{
|
protected class UriToIndividualIterator implements Iterator<Individual>{
|
||||||
|
|
|
@ -21,12 +21,8 @@ import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
*/
|
*/
|
||||||
public interface IndexerIface {
|
public interface IndexerIface {
|
||||||
|
|
||||||
public void addObj2Doc(Obj2DocIface o2d);
|
|
||||||
public List<Obj2DocIface> getObj2DocList();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Check if indexing is currently running in a different thread.
|
* Check if indexing is currently running.
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
public boolean isIndexing();
|
public boolean isIndexing();
|
||||||
|
|
||||||
|
|
|
@ -98,7 +98,7 @@ public class IndividualToSolrDocument {
|
||||||
addThumbnailExistance(ind, doc);
|
addThumbnailExistance(ind, doc);
|
||||||
|
|
||||||
//time of index in millis past epoc
|
//time of index in millis past epoc
|
||||||
doc.addField(term.INDEXEDTIME,(new DateTime()).getMillis());
|
doc.addField(term.INDEXEDTIME, new Long( (new DateTime()).getMillis() ) );
|
||||||
|
|
||||||
if(!prohibited){
|
if(!prohibited){
|
||||||
addAllText( ind, doc, classPublicNames, objectNames );
|
addAllText( ind, doc, classPublicNames, objectNames );
|
||||||
|
|
|
@ -21,6 +21,7 @@ import edu.cornell.mannlib.vitro.webapp.search.IndexingException;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
import edu.cornell.mannlib.vitro.webapp.search.docbuilder.Obj2DocIface;
|
||||||
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
|
import edu.cornell.mannlib.vitro.webapp.search.indexing.IndexerIface;
|
||||||
|
|
||||||
|
|
||||||
public class SolrIndexer implements IndexerIface {
|
public class SolrIndexer implements IndexerIface {
|
||||||
private final static Log log = LogFactory.getLog(SolrIndexer.class);
|
private final static Log log = LogFactory.getLog(SolrIndexer.class);
|
||||||
|
|
||||||
|
@ -29,6 +30,25 @@ public class SolrIndexer implements IndexerIface {
|
||||||
protected HashSet<String> urisIndexed;
|
protected HashSet<String> urisIndexed;
|
||||||
protected IndividualToSolrDocument individualToSolrDoc;
|
protected IndividualToSolrDocument individualToSolrDoc;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* System is shutting down if true.
|
||||||
|
*/
|
||||||
|
protected boolean shutdownRequested = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This records when a full re-index starts so that once it is done
|
||||||
|
* all the documents on the Solr service that are earlier than the
|
||||||
|
* reindexStart can be removed.
|
||||||
|
*/
|
||||||
|
protected long reindexStart = 0L;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If true, then a full index rebuild was requested and reindexStart
|
||||||
|
* will be used to determine what documents to remove from the index
|
||||||
|
* once the re-index is complete.
|
||||||
|
*/
|
||||||
|
protected boolean doingFullIndexRebuild = false;
|
||||||
|
|
||||||
public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
|
public SolrIndexer( SolrServer server, IndividualToSolrDocument indToDoc){
|
||||||
this.server = server;
|
this.server = server;
|
||||||
this.individualToSolrDoc = indToDoc;
|
this.individualToSolrDoc = indToDoc;
|
||||||
|
@ -56,57 +76,18 @@ public class SolrIndexer implements IndexerIface {
|
||||||
solrDoc = individualToSolrDoc.translate(ind);
|
solrDoc = individualToSolrDoc.translate(ind);
|
||||||
|
|
||||||
if( solrDoc != null){
|
if( solrDoc != null){
|
||||||
//sending each doc individually is inefficient
|
|
||||||
// Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument>();
|
|
||||||
// docs.add( solrDoc );
|
|
||||||
UpdateResponse res = server.add( solrDoc );
|
UpdateResponse res = server.add( solrDoc );
|
||||||
log.debug("response after adding docs to server: "+ res);
|
log.debug("response after adding docs to server: "+ res);
|
||||||
}else{
|
}else{
|
||||||
log.debug("removing from index " + ind.getURI());
|
log.debug("removing from index " + ind.getURI());
|
||||||
//TODO: how do we delete document?
|
removeFromIndex(ind.getURI());
|
||||||
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException ex) {
|
} catch (IOException ex) {
|
||||||
throw new IndexingException(ex.getMessage());
|
throw new IndexingException(ex.getMessage());
|
||||||
} catch (SolrServerException ex) {
|
} catch (SolrServerException ex) {
|
||||||
throw new IndexingException(ex.getMessage());
|
throw new IndexingException(ex.getMessage());
|
||||||
}
|
}
|
||||||
|
|
||||||
if( ! indexing )
|
|
||||||
throw new IndexingException("SolrIndexer: must call " +
|
|
||||||
"startIndexing() before index().");
|
|
||||||
|
|
||||||
if( ind == null )
|
|
||||||
log.debug("Individual to index was null, ignoring.");
|
|
||||||
|
|
||||||
try{
|
|
||||||
if( urisIndexed.contains(ind.getURI()) ){
|
|
||||||
log.debug("already indexed " + ind.getURI() );
|
|
||||||
return;
|
|
||||||
}else{
|
|
||||||
SolrInputDocument solrDoc = null;
|
|
||||||
synchronized(this){
|
|
||||||
urisIndexed.add(ind.getURI());
|
|
||||||
}
|
|
||||||
log.debug("indexing " + ind.getURI());
|
|
||||||
|
|
||||||
solrDoc = individualToSolrDoc.translate(ind);
|
|
||||||
|
|
||||||
if( solrDoc != null){
|
|
||||||
UpdateResponse res = server.add( solrDoc );
|
|
||||||
log.debug("response after adding docs to server: "+ res);
|
|
||||||
}else{
|
|
||||||
log.debug("removing from index " + ind.getURI());
|
|
||||||
//TODO: how do we delete document?
|
|
||||||
//writer.deleteDocuments((Term)obj2doc.getIndexId(ind));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new IndexingException(ex.getMessage());
|
|
||||||
} catch (SolrServerException ex) {
|
|
||||||
throw new IndexingException(ex.getMessage());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -116,7 +97,8 @@ public class SolrIndexer implements IndexerIface {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void prepareForRebuild() throws IndexingException {
|
public void prepareForRebuild() throws IndexingException {
|
||||||
// TODO Auto-generated method stub
|
reindexStart = System.currentTimeMillis();
|
||||||
|
doingFullIndexRebuild = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -134,30 +116,23 @@ public class SolrIndexer implements IndexerIface {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public synchronized void startIndexing() throws IndexingException {
|
public synchronized void startIndexing() throws IndexingException {
|
||||||
while( indexing ){ //wait for indexing to end.
|
if( indexing)
|
||||||
log.debug("SolrIndexer.startIndexing() waiting...");
|
log.debug("SolrIndexer.startIndexing() Indexing in progress, waiting for completion...");
|
||||||
try{ wait(); } catch(InterruptedException ex){}
|
while( indexing && ! shutdownRequested ){ //wait for indexing to end.
|
||||||
|
try{ wait( 250 ); }
|
||||||
|
catch(InterruptedException ex){}
|
||||||
}
|
}
|
||||||
|
|
||||||
log.debug("Starting to index");
|
log.debug("Starting to index");
|
||||||
indexing = true;
|
indexing = true;
|
||||||
urisIndexed = new HashSet<String>();
|
urisIndexed = new HashSet<String>();
|
||||||
notifyAll();
|
notifyAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public synchronized void addObj2Doc(Obj2DocIface o2d) {
|
|
||||||
//no longer used
|
|
||||||
}
|
|
||||||
|
|
||||||
public synchronized List<Obj2DocIface> getObj2DocList() {
|
|
||||||
//no longer used
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void abortIndexingAndCleanUp() {
|
public void abortIndexingAndCleanUp() {
|
||||||
|
shutdownRequested = true;
|
||||||
try{
|
try{
|
||||||
server.commit();
|
server.commit();
|
||||||
}catch(SolrServerException e){
|
}catch(SolrServerException e){
|
||||||
|
@ -187,15 +162,25 @@ public class SolrIndexer implements IndexerIface {
|
||||||
} catch(IOException e){
|
} catch(IOException e){
|
||||||
log.error("Could not commit to solr server", e);
|
log.error("Could not commit to solr server", e);
|
||||||
}
|
}
|
||||||
// try {
|
if( doingFullIndexRebuild ){
|
||||||
// server.optimize();
|
removeDocumentsFromBeforeRebuild( );
|
||||||
// } catch (Exception e) {
|
}
|
||||||
// log.error("Could not optimize solr server", e);
|
|
||||||
// }
|
|
||||||
indexing = false;
|
indexing = false;
|
||||||
notifyAll();
|
notifyAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void removeDocumentsFromBeforeRebuild(){
|
||||||
|
try {
|
||||||
|
server.deleteByQuery("indexedTime:[ * TO " + reindexStart + " ]");
|
||||||
|
server.commit();
|
||||||
|
} catch (SolrServerException e) {
|
||||||
|
log.error("could not delete documents from before rebuild.",e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
log.error("could not delete documents from before rebuild.",e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long getModified() {
|
public long getModified() {
|
||||||
long modified = 0;
|
long modified = 0;
|
||||||
|
@ -211,7 +196,6 @@ public class SolrIndexer implements IndexerIface {
|
||||||
modified = (Long)docs.get(0).getFieldValue("indexedTime");
|
modified = (Long)docs.get(0).getFieldValue("indexedTime");
|
||||||
}
|
}
|
||||||
} catch (SolrServerException e) {
|
} catch (SolrServerException e) {
|
||||||
// TODO Auto-generated catch block
|
|
||||||
log.error(e,e);
|
log.error(e,e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -228,14 +212,9 @@ public class SolrIndexer implements IndexerIface {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} catch (SolrServerException e) {
|
} catch (SolrServerException e) {
|
||||||
// TODO Auto-generated catch block
|
|
||||||
log.error(e,e);
|
log.error(e,e);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue