[VIVO-1397] Faster and more reliable Solr indexing

This commit is contained in:
Graham Triggs 2017-10-10 13:06:44 +01:00
parent 08d8b3f09f
commit a610e0ed25
4 changed files with 43 additions and 28 deletions

View file

@ -12,6 +12,7 @@ import javax.servlet.ServletContext;
import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.client.solrj.response.QueryResponse;
@ -31,7 +32,8 @@ import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
* The Solr-based implementation of SearchEngine. * The Solr-based implementation of SearchEngine.
*/ */
public class SolrSearchEngine implements SearchEngine { public class SolrSearchEngine implements SearchEngine {
private HttpSolrServer server; private HttpSolrServer queryEngine;
private ConcurrentUpdateSolrServer updateEngine;
/** /**
* Set up the http connection with the solr server * Set up the http connection with the solr server
@ -51,14 +53,18 @@ public class SolrSearchEngine implements SearchEngine {
} }
try { try {
server = new HttpSolrServer(solrServerUrlString); queryEngine = new HttpSolrServer(solrServerUrlString);
server.setSoTimeout(10000); // socket read timeout queryEngine.setSoTimeout(10000); // socket read timeout
server.setConnectionTimeout(10000); queryEngine.setConnectionTimeout(10000);
server.setDefaultMaxConnectionsPerHost(100); queryEngine.setDefaultMaxConnectionsPerHost(100);
server.setMaxTotalConnections(100); queryEngine.setMaxTotalConnections(100);
server.setMaxRetries(1); queryEngine.setMaxRetries(1);
css.info("Set up the Solr search engine; URL = '"
+ solrServerUrlString + "'."); updateEngine = new ConcurrentUpdateSolrServer(solrServerUrlString, 100, 1);
updateEngine.setConnectionTimeout(10000);
updateEngine.setPollQueueTime(25);
css.info("Set up the Solr search engine; URL = '" + solrServerUrlString + "'.");
} catch (Exception e) { } catch (Exception e) {
css.fatal("Could not set up the Solr search engine", e); css.fatal("Could not set up the Solr search engine", e);
} }
@ -66,13 +72,14 @@ public class SolrSearchEngine implements SearchEngine {
@Override @Override
public void shutdown(Application application) { public void shutdown(Application application) {
server.shutdown(); queryEngine.shutdown();
updateEngine.shutdown();
} }
@Override @Override
public void ping() throws SearchEngineException { public void ping() throws SearchEngineException {
try { try {
server.ping(); queryEngine.ping();
} catch (SolrServerException | IOException e) { } catch (SolrServerException | IOException e) {
throw appropriateException("Solr server did not respond to ping.", throw appropriateException("Solr server did not respond to ping.",
e); e);
@ -93,7 +100,7 @@ public class SolrSearchEngine implements SearchEngine {
public void add(Collection<SearchInputDocument> docs) public void add(Collection<SearchInputDocument> docs)
throws SearchEngineException { throws SearchEngineException {
try { try {
server.add(SolrConversionUtils.convertToSolrInputDocuments(docs)); updateEngine.add(SolrConversionUtils.convertToSolrInputDocuments(docs), 100);
} catch (SolrServerException | IOException e) { } catch (SolrServerException | IOException e) {
throw appropriateException("Solr server failed to add documents " throw appropriateException("Solr server failed to add documents "
+ docs, e); + docs, e);
@ -103,7 +110,8 @@ public class SolrSearchEngine implements SearchEngine {
@Override @Override
public void commit() throws SearchEngineException { public void commit() throws SearchEngineException {
try { try {
server.commit(); updateEngine.commit();
updateEngine.optimize();
} catch (SolrServerException | IOException e) { } catch (SolrServerException | IOException e) {
throw appropriateException("Failed to commit to Solr server.", e); throw appropriateException("Failed to commit to Solr server.", e);
} }
@ -112,7 +120,8 @@ public class SolrSearchEngine implements SearchEngine {
@Override @Override
public void commit(boolean wait) throws SearchEngineException { public void commit(boolean wait) throws SearchEngineException {
try { try {
server.commit(wait, wait); updateEngine.commit(wait, wait);
updateEngine.optimize(wait, wait);
} catch (SolrServerException | IOException e) { } catch (SolrServerException | IOException e) {
throw appropriateException("Failed to commit to Solr server.", e); throw appropriateException("Failed to commit to Solr server.", e);
} }
@ -126,7 +135,7 @@ public class SolrSearchEngine implements SearchEngine {
@Override @Override
public void deleteById(Collection<String> ids) throws SearchEngineException { public void deleteById(Collection<String> ids) throws SearchEngineException {
try { try {
server.deleteById(new ArrayList<>(ids)); updateEngine.deleteById(new ArrayList<>(ids), 100);
} catch (SolrServerException | IOException e) { } catch (SolrServerException | IOException e) {
throw appropriateException( throw appropriateException(
"Solr server failed to delete documents: " + ids, e); "Solr server failed to delete documents: " + ids, e);
@ -136,7 +145,7 @@ public class SolrSearchEngine implements SearchEngine {
@Override @Override
public void deleteByQuery(String query) throws SearchEngineException { public void deleteByQuery(String query) throws SearchEngineException {
try { try {
server.deleteByQuery(query); updateEngine.deleteByQuery(query, 100);
} catch (SolrServerException | IOException e) { } catch (SolrServerException | IOException e) {
throw appropriateException( throw appropriateException(
"Solr server failed to delete documents: " + query, e); "Solr server failed to delete documents: " + query, e);
@ -159,7 +168,7 @@ public class SolrSearchEngine implements SearchEngine {
public SearchResponse query(SearchQuery query) throws SearchEngineException { public SearchResponse query(SearchQuery query) throws SearchEngineException {
try { try {
SolrQuery solrQuery = SolrConversionUtils.convertToSolrQuery(query); SolrQuery solrQuery = SolrConversionUtils.convertToSolrQuery(query);
QueryResponse response = server.query(solrQuery); QueryResponse response = queryEngine.query(solrQuery);
return SolrConversionUtils.convertToSearchResponse(response); return SolrConversionUtils.convertToSearchResponse(response);
} catch (SolrServerException e) { } catch (SolrServerException e) {
throw appropriateException( throw appropriateException(

View file

@ -112,6 +112,7 @@ public class RebuildIndexTask implements Task {
if (!isInterrupted()) { if (!isInterrupted()) {
deleteOutdatedDocuments(); deleteOutdatedDocuments();
} }
finalizeIndexing();
} }
status = buildStatus(REBUILDING, getDocumentCount()); status = buildStatus(REBUILDING, getDocumentCount());
@ -135,11 +136,22 @@ public class RebuildIndexTask implements Task {
UpdateUrisTask.runNow(uris, excluders, modifiers, indDao, listeners, pool); UpdateUrisTask.runNow(uris, excluders, modifiers, indDao, listeners, pool);
} }
private void finalizeIndexing() {
try {
searchEngine.commit();
} catch (SearchEngineNotRespondingException e) {
log.warn("Failed to finalize search index: "
+ "the search engine is not responding.");
} catch (SearchEngineException e) {
log.warn("Failed to finalize "
+ "from the search index", e);
}
}
private void deleteOutdatedDocuments() { private void deleteOutdatedDocuments() {
String query = "indexedTime:[ * TO " + requestedAt.getTime() + " ]"; String query = "indexedTime:[ * TO " + requestedAt.getTime() + " ]";
try { try {
searchEngine.deleteByQuery(query); searchEngine.deleteByQuery(query);
searchEngine.commit();
} catch (SearchEngineNotRespondingException e) { } catch (SearchEngineNotRespondingException e) {
log.warn("Failed to delete outdated documents from the search index: " log.warn("Failed to delete outdated documents from the search index: "
+ "the search engine is not responding."); + "the search engine is not responding.");

View file

@ -153,8 +153,6 @@ public class UpdateUrisTask implements Task {
} }
pool.waitUntilIdle(); pool.waitUntilIdle();
commitChanges();
excluders.stopIndexing(); excluders.stopIndexing();
modifiers.stopIndexing(); modifiers.stopIndexing();
listeners.fireEvent(new Event(STOP_URIS, status.getSearchIndexerStatus())); listeners.fireEvent(new Event(STOP_URIS, status.getSearchIndexerStatus()));
@ -221,9 +219,6 @@ public class UpdateUrisTask implements Task {
private void fireEvent(Event event) { private void fireEvent(Event event) {
listeners.fireEvent(event); listeners.fireEvent(event);
if (event.getType() == PROGRESS || event.getType() == STOP_URIS) {
commitChanges();
}
} }
private void commitChanges() { private void commitChanges() {

View file

@ -35,7 +35,7 @@
that you fully re-index after changing this setting as it can that you fully re-index after changing this setting as it can
affect both how text is indexed and queried. affect both how text is indexed and queried.
--> -->
<luceneMatchVersion>4.7</luceneMatchVersion> <luceneMatchVersion>4.10.4</luceneMatchVersion>
<!-- <lib/> directives can be used to instruct Solr to load an Jars <!-- <lib/> directives can be used to instruct Solr to load an Jars
identified and use them to resolve any "plugins" specified in identified and use them to resolve any "plugins" specified in
@ -387,12 +387,11 @@
If the updateLog is enabled, then it's highly recommended to If the updateLog is enabled, then it's highly recommended to
have some sort of hard autoCommit to limit the log size. have some sort of hard autoCommit to limit the log size.
--> -->
<!--
<autoCommit> <autoCommit>
<maxTime>${solr.autoCommit.maxTime:15000}</maxTime> <maxTime>60000</maxTime>
<openSearcher>false</openSearcher> <openSearcher>false</openSearcher>
</autoCommit> </autoCommit>
-->
<!-- softAutoCommit is like autoCommit except it causes a <!-- softAutoCommit is like autoCommit except it causes a
'soft' commit which only ensures that changes are visible 'soft' commit which only ensures that changes are visible
but does not ensure that data is synced to disk. This is but does not ensure that data is synced to disk. This is