Merge branch 'sprint-search' into dev-search

# Conflicts:
#	home/src/main/resources/solr/README.txt
#	home/src/main/resources/solr/conf/admin-extra.html
#	home/src/main/resources/solr/conf/clustering/carrot2/kmeans-attributes.xml
#	home/src/main/resources/solr/conf/clustering/carrot2/lingo-attributes.xml
#	home/src/main/resources/solr/conf/clustering/carrot2/stc-attributes.xml
#	home/src/main/resources/solr/conf/currency.xml
#	home/src/main/resources/solr/conf/elevate.xml
#	home/src/main/resources/solr/conf/lang/contractions_it.txt
#	home/src/main/resources/solr/conf/lang/stoptags_ja.txt
#	home/src/main/resources/solr/conf/lang/stopwords_ar.txt
#	home/src/main/resources/solr/conf/lang/stopwords_ca.txt
#	home/src/main/resources/solr/conf/lang/stopwords_el.txt
#	home/src/main/resources/solr/conf/lang/stopwords_fi.txt
#	home/src/main/resources/solr/conf/lang/stopwords_hi.txt
#	home/src/main/resources/solr/conf/lang/stopwords_hu.txt
#	home/src/main/resources/solr/conf/lang/stopwords_lv.txt
#	home/src/main/resources/solr/conf/lang/stopwords_ro.txt
#	home/src/main/resources/solr/conf/mapping-FoldToASCII.txt
#	home/src/main/resources/solr/conf/schema-old.xml
#	home/src/main/resources/solr/conf/schema.xml
#	home/src/main/resources/solr/conf/solrconfig-old.xml
#	home/src/main/resources/solr/conf/solrconfig.xml
#	home/src/main/resources/solr/conf/stopwords-name.txt
#	home/src/main/resources/solr/conf/velocity/VM_global_library.vm
#	home/src/main/resources/solr/conf/velocity/jquery.autocomplete.css
#	home/src/main/resources/solr/conf/velocity/jquery.autocomplete.js
#	home/src/main/resources/solr/conf/velocity/main.css
#	home/src/main/resources/solr/conf/velocity/query_group.vm
#	home/src/main/resources/solr/conf/velocity/query_spatial.vm
#	home/src/main/resources/solr/conf/velocity/richtext_doc.vm
#	home/src/main/resources/solr/conf/xslt/example.xsl
#	home/src/main/resources/solr/conf/xslt/example_atom.xsl
#	home/src/main/resources/solr/conf/xslt/example_rss.xsl
#	home/src/main/resources/solr/conf/xslt/luke.xsl
#	home/src/main/resources/solr/conf/xslt/updateXml.xsl
#	home/src/main/resources/solr/solr.xml
#	home/src/main/resources/solr/solr_newversion.xml
This commit is contained in:
Ralph O'Flinn 2019-06-21 12:04:45 -05:00
commit d8649a60ad
128 changed files with 1516 additions and 63936 deletions

View file

@ -58,6 +58,11 @@
<artifactId>argon2-jvm</artifactId>
<version>2.4</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>fluent-hc</artifactId>
<version>4.5.6</version>
</dependency>
<dependency>
<groupId>org.vivoweb</groupId>
<artifactId>vitro-dependencies</artifactId>

View file

@ -0,0 +1,92 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.entity.ContentType;
import com.fasterxml.jackson.databind.ObjectMapper;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
/**
* The nuts and bolts of adding a document to the Elasticsearch index
*/
public class ESAdder {
private static final Log log = LogFactory.getLog(ESAdder.class);
private final String baseUrl;
public ESAdder(String baseUrl) {
this.baseUrl = baseUrl;
}
public void add(Collection<SearchInputDocument> docs)
throws SearchEngineException {
for (SearchInputDocument doc : docs) {
addDocument(doc);
}
}
private void addDocument(SearchInputDocument doc)
throws SearchEngineException {
try {
Map<String, List<Object>> map = convertDocToMap(doc);
String json = new ObjectMapper().writeValueAsString(map);
log.debug("Adding document for '" + doc.getField("DocId") + "': "
+ json);
putToElastic(json, (String) doc.getField("DocId").getFirstValue());
} catch (Exception e) {
throw new SearchEngineException("Failed to convert to JSON", e);
}
}
/**
* Some field values are collections. Add the members of the collection
* instead.
*/
private Map<String, List<Object>> convertDocToMap(SearchInputDocument doc) {
Map<String, List<Object>> map = new HashMap<>();
for (SearchInputField field : doc.getFieldMap().values()) {
ArrayList<Object> list = new ArrayList<>();
for (Object value : field.getValues()) {
if (value instanceof Collection) {
Collection<?> cValue = (Collection<?>) value;
list.addAll(cValue);
} else {
list.add(value);
}
}
map.put(field.getName(), list);
}
return map;
}
private void putToElastic(String json, String docId)
throws SearchEngineException {
try {
String url = baseUrl + "/_doc/"
+ URLEncoder.encode(docId, "UTF8");
Response response = Request.Put(url)
.bodyString(json, ContentType.APPLICATION_JSON).execute();
log.debug("Response from Elasticsearch: "
+ response.returnContent().asString());
} catch (Exception e) {
throw new SearchEngineException("Failed to put to Elasticsearch",
e);
}
}
}

View file

@ -0,0 +1,42 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.util.HashMap;
import java.util.Map;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import com.fasterxml.jackson.databind.ObjectMapper;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
/**
* The nuts and bolts of getting the number of documents in the Elasticsearch
* index.
*/
public class ESCounter {
private final String baseUrl;
public ESCounter(String baseUrl) {
this.baseUrl = baseUrl;
}
public int count() throws SearchEngineException {
try {
String url = baseUrl + "/_doc/_count";
Response response = Request.Get(url).execute();
String json = response.returnContent().asString();
@SuppressWarnings("unchecked")
Map<String, Object> map = new ObjectMapper().readValue(json,
HashMap.class);
return (Integer) map.get("count");
} catch (Exception e) {
throw new SearchEngineException("Failed to put to Elasticsearch",
e);
}
}
}

View file

@ -0,0 +1,147 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.io.IOException;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.StatusLine;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import org.apache.http.entity.ContentType;
import org.apache.http.util.EntityUtils;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
/**
* The nuts and bolts of deleting documents from the Elasticsearch index.
*/
public class ESDeleter {
private static final Log log = LogFactory.getLog(ESDeleter.class);
private final String baseUrl;
/**
* @param baseUrl
*/
public ESDeleter(String baseUrl) {
this.baseUrl = baseUrl;
}
public void deleteByIds(Collection<String> ids)
throws SearchEngineException {
for (String id : ids) {
deleteById(id);
}
}
private void deleteById(String id) throws SearchEngineException {
try {
String url = baseUrl + "/_doc/"
+ URLEncoder.encode(id, "UTF8");
Response response = Request.Delete(url).execute();
String json = response.returnContent().asString();
} catch (HttpResponseException e) {
if (e.getStatusCode() == 404) {
// Don't care if it has already been deleted.
} else {
throw new SearchEngineException(
"Failed to delete Elasticsearch document " + id, e);
}
} catch (Exception e) {
throw new SearchEngineException(
"Failed to delete Elasticsearch document " + id, e);
}
}
public void deleteByQuery(String queryString) throws SearchEngineException {
String url = baseUrl + "/_delete_by_query";
SearchQuery query = new BaseSearchQuery().setQuery(queryString);
String queryJson = new QueryConverter(query).asString();
try {
Response response = Request.Post(url)
.bodyString(queryJson, ContentType.APPLICATION_JSON)
.execute();
BaseResponseHandler handler = new BaseResponseHandler();
response.handleResponse(handler);
if (handler.getStatusCode() >= 400) {
log.warn(String.format(
"Failed to delete Elasticsearch documents by query: %s, %d - %s\n%s",
queryString, handler.getStatusCode(),
handler.getReasonPhrase(), handler.getContentString()));
}
} catch (IOException e) {
throw new SearchEngineException("Failed to delete Elasticsearch "
+ "documents by query " + queryString, e);
}
}
// ----------------------------------------------------------------------
// Helper class for interpreting HttpResponse errors
// ----------------------------------------------------------------------
private class BaseResponseHandler implements ResponseHandler<Object> {
private int statusCode;
private String reasonPhrase;
private Map<String, List<String>> headers;
private String contentString;
@Override
public Object handleResponse(org.apache.http.HttpResponse innerResponse)
throws IOException {
StatusLine statusLine = innerResponse.getStatusLine();
statusCode = statusLine.getStatusCode();
reasonPhrase = statusLine.getReasonPhrase();
headers = new HashMap<>();
for (Header header : innerResponse.getAllHeaders()) {
String name = header.getName();
if (!headers.containsKey(name)) {
headers.put(name, new ArrayList<String>());
}
headers.get(name).add(header.getValue());
}
HttpEntity entity = innerResponse.getEntity();
if (entity == null) {
contentString = "";
} else {
contentString = EntityUtils.toString(entity);
}
return "";
}
public int getStatusCode() {
return statusCode;
}
public String getReasonPhrase() {
return reasonPhrase;
}
public Map<String, List<String>> getHeaders() {
return headers;
}
public String getContentString() {
return contentString;
}
}
}

View file

@ -0,0 +1,41 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.client.fluent.Request;
import org.apache.http.client.fluent.Response;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
/**
* Just does a "commit" or "flush" to the index.
*/
public class ESFlusher {
private static final Log log = LogFactory.getLog(ESFlusher.class);
private final String baseUrl;
public ESFlusher(String baseUrl) {
this.baseUrl = baseUrl;
}
public void flush() throws SearchEngineException {
flush(false);
}
public void flush(boolean wait) throws SearchEngineException {
try {
String url = baseUrl + "/_flush"
+ (wait ? "?wait_for_ongoing" : "");
Response response = Request.Get(url).execute();
String json = response.returnContent().asString();
log.debug("flush response: " + json);
} catch (Exception e) {
throw new SearchEngineException("Failed to put to Elasticsearch",
e);
}
}
}

View file

@ -0,0 +1,106 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpEntityEnclosingRequestBase;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
/**
* Convert a SearchQuery to JSON, send it to Elasticsearch, and convert the JSON
* response to a SearchResponse.
*/
public class ESQuery {
private static final Log log = LogFactory.getLog(ESQuery.class);
private final String baseUrl;
public ESQuery(String baseUrl) {
this.baseUrl = baseUrl;
}
public SearchResponse query(SearchQuery query)
throws SearchEngineException {
String queryString = new QueryConverter(query).asString();
String response = doTheQuery(queryString);
return new ResponseParser(response).parse();
}
private String doTheQuery(String queryString) {
log.debug("QUERY: " + queryString);
try {
String url = baseUrl + "/_search";
HttpResponse response = new ESFunkyGetRequest(url)
.bodyString(queryString, ContentType.APPLICATION_JSON)
.execute();
String responseString = IOUtils
.toString(response.getEntity().getContent());
log.debug("RESPONSE: " + responseString);
return responseString;
} catch (Exception e) {
log.error("Failed to put to Elasticsearch", e);
return "";
}
}
// ----------------------------------------------------------------------
// Helper class -- a GET request that accepts a body
// ----------------------------------------------------------------------
/**
* The HttpClient implementations, both regular and conversational, do not
* allow you to put a body on a GET request. In online discussion, some say
* that the HTTP spec is ambiguous on this point, so each implementation
* makes its own choice. For example, CURL allows it.
*
* More to the point however, is that ElasticSearch requires it. So here's a
* simple class to make that possible.
*
* USE POST INSTEAD!!
*/
private static class ESFunkyGetRequest
extends HttpEntityEnclosingRequestBase {
public ESFunkyGetRequest(String url) throws SearchEngineException {
super();
try {
setURI(new URI(url));
} catch (URISyntaxException e) {
throw new SearchEngineException(e);
}
}
public ESFunkyGetRequest bodyString(String contents,
ContentType contentType) {
setEntity(new StringEntity(contents, contentType));
return this;
}
public HttpResponse execute() throws SearchEngineException {
try {
return HttpClientFactory.getHttpClient().execute(this);
} catch (IOException e) {
throw new SearchEngineException(e);
}
}
@Override
public String getMethod() {
return "GET";
}
}
}

View file

@ -0,0 +1,142 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.util.Arrays;
import java.util.Collection;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import edu.cornell.mannlib.vitro.webapp.modules.Application;
import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
/**
* A first draft of an Elasticsearch implementation.
*/
public class ElasticSearchEngine implements SearchEngine {
private static final Log log = LogFactory.getLog(ElasticSearchEngine.class);
// ----------------------------------------------------------------------
// Configuration
// ----------------------------------------------------------------------
private String baseUrl;
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBaseUrl")
public void setBaseUrl(String url) {
if (baseUrl == null) {
if (url.endsWith("/")) {
url = url.substring(0, url.length() - 1);
}
baseUrl = url;
} else {
throw new IllegalStateException(
"Configuration includes multiple base URLs: " + url
+ ", and " + baseUrl);
}
}
@Validation
public void validate() throws Exception {
if (baseUrl == null) {
throw new IllegalStateException(
"Configuration did not include a base URL.");
}
}
// ----------------------------------------------------------------------
// The instance
// ----------------------------------------------------------------------
@Override
public void startup(Application application, ComponentStartupStatus ss) {
log.warn("ElasticSearchEngine.startup() not implemented."); // TODO
}
@Override
public void shutdown(Application application) {
// TODO Flush the buffers
log.warn("ElasticSearchEngine.shutdown not implemented.");
}
@Override
public void ping() throws SearchEngineException {
// TODO What's the simplest we can do? Another smoke test?
log.warn("ElasticSearchEngine.ping() not implemented."); // TODO
}
@Override
public SearchInputDocument createInputDocument() {
return new BaseSearchInputDocument();
}
@Override
public void add(SearchInputDocument... docs) throws SearchEngineException {
add(Arrays.asList(docs));
}
@Override
public void add(Collection<SearchInputDocument> docs)
throws SearchEngineException {
new ESAdder(baseUrl).add(docs);
}
@Override
public void commit() throws SearchEngineException {
new ESFlusher(baseUrl).flush();
}
@Override
public void commit(boolean wait) throws SearchEngineException {
new ESFlusher(baseUrl).flush(wait);
}
@Override
public void deleteById(String... ids) throws SearchEngineException {
deleteById(Arrays.asList(ids));
}
@Override
public void deleteById(Collection<String> ids)
throws SearchEngineException {
new ESDeleter(baseUrl).deleteByIds(ids);
}
@Override
public void deleteByQuery(String query) throws SearchEngineException {
new ESDeleter(baseUrl).deleteByQuery(query);
}
@Override
public SearchQuery createQuery() {
return new BaseSearchQuery();
}
@Override
public SearchQuery createQuery(String queryText) {
BaseSearchQuery query = new BaseSearchQuery();
query.setQuery(queryText);
return query;
}
@Override
public SearchResponse query(SearchQuery query)
throws SearchEngineException {
return new ESQuery(baseUrl).query(query);
}
@Override
public int documentCount() throws SearchEngineException {
return new ESCounter(baseUrl).count();
}
}

View file

@ -0,0 +1,53 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
/**
* A simple implementation. In fact, this is so simple that perhaps it should be
* named BaseSearchResultDocumentList.
*/
class ElasticSearchResultDocumentList implements SearchResultDocumentList {
private final List<SearchResultDocument> documents;
private final long numberFound;
public ElasticSearchResultDocumentList(List<SearchResultDocument> documents,
long numberFound) {
this.documents = documents;
this.numberFound = numberFound;
}
@Override
public Iterator<SearchResultDocument> iterator() {
return documents.iterator();
}
@Override
public long getNumFound() {
return documents.size();
}
@Override
public int size() {
return documents.size();
}
@Override
public SearchResultDocument get(int i) {
return documents.get(i);
}
@Override
public String toString() {
return String.format(
"ElasticSearchResultDocumentList[numberFound=%s, documents=%s]",
numberFound, documents);
}
}

View file

@ -0,0 +1,216 @@
# What is this package?
* The first draft of a Elasticsearch driver for VIVO
# What has been done?
* Implement the `SearchEngine` interface
* Classes in `edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch`
* No attempt to add new functions.
# How to experiment with it?
* Install elasticsearch somewhere.
* Create a search index with the appropriate mapping (see below).
* Check out VIVO and this branch of Vitro (see below), and do the usual installation procedure.
* Modify `{vitro_home}/config/applicationSetup.n3` to use this driver (see below).
* Start elasticsearch
* Start VIVO
# Not ready for production
* Documentation
* Instructions on how to install and configure the driver.
* Instructions on how to setup elasticsearch?
* Smoke test
* Display a warning if the elasticsearch server is not responding.
* Functional testing
* Are we getting the proper search results?
* Are search results in the order that we would like?
* Improved snippets
* Copy the technique used for Solr
* Code improvement
* Rigorous closing of HTTP connections.
* IOC for HTTP code, to help in unit testing
* Consistent use of exceptions and logging
* Unit tests
* Automatic initialization of the index
* If VIVO detects an empty index, apply the mapping.
# The next steps: adding functionality
## Stay within the framework
* Add fields that enhance the contents of the search index documents (see below).
* Add data distributors that run queries and format the output (see below).
## Go outside the framework
* Add functions to the Elasticsearch driver that the Solr driver will simply ignore.
* Or remove Solr entirely
* Query Elasticsearch directly
* Or write a data distributor that will run the query
# The details:
## Check out VIVO and Vitro
* For now, the Elasticsearch driver only lives in my fork of Vitro
* No changes to VIVO are required (yet).
```
git clone https://github.com/vivo-project/VIVO.git
git clone -b feature/elasticsearchExperiments https://github.com/j2blake/Vitro.git
```
## A mapping for the search index
* If the index uses the default mapping, it will not work correctly.
* Some fields must be declared as `keyword`, some as unstemmed, etc.
* Example mapping script:
```
curl -X PUT "localhost:9200/vivo?pretty" -H 'Content-Type: application/json' -d'
{
"mappings": {
"_doc": {
"properties": {
"ALLTEXT": {
"type": "text",
"analyzer": "english"
},
"ALLTEXTUNSTEMMED": {
"type": "text",
"analyzer": "standard"
},
"DocId": {
"type": "keyword"
},
"classgroup": {
"type": "keyword"
},
"type": {
"type": "keyword"
},
"mostSpecificTypeURIs": {
"type": "keyword"
},
"indexedTime": {
"type": "long"
},
"nameRaw": {
"type": "keyword"
},
"URI": {
"type": "keyword"
},
"THUMBNAIL": {
"type": "integer"
},
"THUMBNAIL_URL": {
"type": "keyword"
},
"nameLowercaseSingleValued": {
"type": "text",
"analyzer": "standard",
"fielddata": "true"
},
"BETA" : {
"type" : "float"
}
}
}
},
"query": {
"default_field": "ALLTEXT"
}
}
'
```
* __*Note:*__ The first line of the script specifies the name of the index as `vivo`.
Any name may be used, but it must match the "base URL" that is specified in `applicationSetup.n3` (see below).
* __*Note:*__ The same first line specifies the location and port number of the elasticsearch server.
Again, any location and port may be used, but they must match the "base URL" in `applicationSetup.n3`.
## Modify `applicationSetup.n3`
* Change this:
```
# ----------------------------
#
# Search engine module:
# The Solr-based implementation is the only standard option, but it can be
# wrapped in an "instrumented" wrapper, which provides additional logging
# and more rigorous life-cycle checking.
#
:instrumentedSearchEngineWrapper
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
:wraps :solrSearchEngine .
```
* To this:
```
# ----------------------------
#
# Search engine module:
# The Solr-based implementation is the only standard option, but it can be
# wrapped in an "instrumented" wrapper, which provides additional logging
# and more rigorous life-cycle checking.
#
:instrumentedSearchEngineWrapper
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
:wraps :elasticSearchEngine .
:elasticSearchEngine
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.ElasticSearchEngine> ,
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
:hasBaseUrl "http://localhost:9200/vivo" .
```
## Enhance the contents of the search index
### An example: Publication URIs in the author's search document
* Add a keyword field to the search index
```
"publicationURI": {
"type": "keyword"
},
```
* Add a `DocumentModifier` to VIVO.
```
:documentModifier_publications
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SelectQueryDocumentModifier> ,
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
rdfs:label "URIs of publications are added to publicationURI field." ;
:hasTargetField "publicationURI" ;
:hasSelectQuery """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX vivo: <http://vivoweb.org/ontology/core#>
PREFIX bibo: <http://purl.org/ontology/bibo/>
SELECT ?publication
WHERE {
?uri vivo:relatedBy ?authorship .
?authorship a vivo:Authorship .
?authorship vivo:relates ?publication .
?publication a bibo:Document .
}
""" .
```
## Use data distributors to query the search index
* Install the Data Distribution API
* Add a distributor:
```
:drill_by_URI
a <java:edu.cornell.library.scholars.webapp.controller.api.distribute.DataDistributor> ,
<java:edu.cornell.library.scholars.webapp.controller.api.distribute.search.DrillDownSearchByUriDataDistributor> ;
:actionName "searchAndDrill" .
```
* Run the query:
```
http://localhost:8080/vivo/api/dataRequest/searchAndDrill?uri=http://scholars.cornell.edu/individual/mj495
```

View file

@ -0,0 +1,77 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Build a Map of Objects, suitable for marshalling by Jackson.
*
* Include conditional logic, so null values, empty maps, or empty lists will
* not be added, unless you use the special values.
*/
public class JsonTree {
/**
* Empty maps will not be added, except for this one.
*/
public static final Map<String, Object> EMPTY_JSON_MAP = Collections
.emptyMap();
/**
* Empty lists will not be added, except for this one.
*/
public static final List<Object> EMPTY_JSON_LIST = Collections.emptyList();
/**
* Create the tree
*/
public static JsonTree tree() {
return new JsonTree();
}
/**
* This will cause negative integers to be ignored.
*/
public static Integer ifPositive(int i) {
return (i > 0) ? i : null;
}
private Map<String, Object> map = new HashMap<>();
public JsonTree put(String key, Object value) {
if (isSignificant(value)) {
storeIt(key, value);
}
return this;
}
private boolean isSignificant(Object value) {
if (value == null) {
return false;
}
if (value instanceof Map && ((Map) value).isEmpty()
&& value != EMPTY_JSON_MAP) {
return false;
}
if (value instanceof List && ((List) value).isEmpty()
&& value != EMPTY_JSON_LIST) {
return false;
}
return true;
}
private void storeIt(String key, Object value) {
if (value instanceof JsonTree) {
map.put(key, ((JsonTree) value).asMap());
} else {
map.put(key, value);
}
}
public Map<String, Object> asMap() {
return new HashMap<>(map);
}
}

View file

@ -0,0 +1,172 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.EMPTY_JSON_MAP;
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.ifPositive;
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.tree;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery.Order;
/**
* Accept a SearchQuery and make it available as a JSON string, suitable for
* Elasticsearch.
*/
public class QueryConverter {
private static final Log log = LogFactory.getLog(QueryConverter.class);
private final SearchQuery query;
private final Map<String, Object> queryAndFilters;
private final Map<String, Object> sortFields;
private final Map<String, Object> facets;
private final Map<String, Object> highlighter;
private final List<String> returnFields;
private final Map<String, Object> fullMap;
public QueryConverter(SearchQuery query) {
this.query = query;
this.queryAndFilters = filteredOrNot();
this.sortFields = figureSortFields();
this.facets = figureFacets();
this.highlighter = figureHighlighter();
this.returnFields = figureReturnFields();
this.fullMap = figureFullMap();
}
private Map<String, Object> filteredOrNot() {
if (query.getFilters().isEmpty()) {
return new QueryStringMap(query.getQuery()).map;
} else {
return buildFilterStructure();
}
}
private Map<String, Object> buildFilterStructure() {
return tree() //
.put("bool", tree() //
.put("must", new QueryStringMap(query.getQuery()).map) //
.put("filter", buildFiltersList())) //
.asMap();
}
private List<Map<String, Object>> buildFiltersList() {
List<Map<String, Object>> list = new ArrayList<>();
for (String filter : query.getFilters()) {
list.add(new QueryStringMap(filter).map);
}
return list;
}
private Map<String, Object> figureSortFields() {
Map<String, Order> fields = query.getSortFields();
Map<String, Object> map = new HashMap<>();
for (String name : fields.keySet()) {
String sortOrder = fields.get(name).toString().toLowerCase();
map.put(name, sortOrder);
}
return map;
}
private Map<String, Object> figureFacets() {
Map<String, Object> map = new HashMap<>();
for (String field : query.getFacetFields()) {
map.put("facet_" + field, figureFacet(field));
}
return map;
}
private Map<String, Object> figureHighlighter() {
return tree() //
.put("fields", tree() //
.put("ALLTEXT", EMPTY_JSON_MAP))
.asMap();
}
private Map<String, Object> figureFacet(String field) {
return tree() //
.put("terms", tree() //
.put("field", field) //
.put("size", ifPositive(query.getFacetLimit())) //
.put("min_doc_count",
ifPositive(query.getFacetMinCount()))) //
.asMap();
}
private List<String> figureReturnFields() {
return new ArrayList<>(query.getFieldsToReturn());
}
private Map<String, Object> figureFullMap() {
return tree() //
.put("query", queryAndFilters) //
.put("from", ifPositive(query.getStart())) //
.put("highlight", highlighter)
.put("size", ifPositive(query.getRows())) //
.put("sort", sortFields) //
.put("_source", returnFields) //
.put("aggregations", facets) //
.asMap();
}
public String asString() throws SearchEngineException {
try {
return new ObjectMapper().writeValueAsString(fullMap);
} catch (JsonProcessingException e) {
throw new SearchEngineException(e);
}
}
private static class QueryStringMap {
public final Map<String, Object> map;
public QueryStringMap(String queryString) {
map = new HashMap<>();
map.put("query_string", makeInnerMap(escape(queryString)));
}
/**
* This is a kluge, but perhaps it will work for now.
*
* Apparently Solr is willing to put up with query strings that contain
* special characters in odd places, but Elasticsearch is not.
*
* So, a query string of "classgroup:http://this/that" must be escaped
* as "classgroup:http\:\/\/this\/that". Notice that the first colon
* delimits the field name, and so must not be escaped.
*
* But what if no field is specified? Then all colons must be escaped.
* How would we distinguish that?
*
* And what if the query is more complex, and more than one field is
* specified? What if other special characters are included?
*
* This could be a real problem.
*/
private String escape(String queryString) {
return queryString.replace(":", "\\:").replace("/", "\\/")
.replaceFirst("\\\\:", ":");
}
private Map<String, String> makeInnerMap(String queryString) {
Map<String, String> inner = new HashMap<>();
inner.put("default_field", "ALLTEXT");
inner.put("query", queryString);
return inner;
}
}
}

View file

@ -0,0 +1,182 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.fasterxml.jackson.databind.ObjectMapper;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField.Count;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField.BaseCount;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResponse;
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResultDocument;
/**
* Elastic search sends a JSON response to a query. parse it to a
* SearchResponse.
*/
class ResponseParser {
private static final Log log = LogFactory.getLog(ResponseParser.class);
private final Map<String, Object> responseMap;
private Map<String, Map<String, List<String>>> highlightingMap;
private Map<String, SearchFacetField> facetFieldsMap;
private long totalHits;
private List<SearchResultDocument> documentList;
@SuppressWarnings("unchecked")
public ResponseParser(String responseString) throws SearchEngineException {
try {
this.responseMap = new ObjectMapper().readValue(responseString,
HashMap.class);
} catch (IOException e) {
throw new SearchEngineException(e);
}
}
public SearchResponse parse() {
parseDocumentList();
parseFacetFields();
SearchResponse response = new BaseSearchResponse(highlightingMap,
facetFieldsMap,
new ElasticSearchResultDocumentList(documentList, totalHits));
log.debug("ESQuery.ResponseParser.parse: " + response);
return response;
}
private void parseFacetFields() {
facetFieldsMap = new HashMap<>();
@SuppressWarnings("unchecked")
Map<String, Map<String, Object>> aggregations = (Map<String, Map<String, Object>>) responseMap
.get("aggregations");
if (aggregations == null) {
return;
}
for (String key : aggregations.keySet()) {
if (key.startsWith("facet_")) {
String name = key.substring(6);
parseFacetField(name, aggregations.get(key));
}
}
}
private void parseFacetField(String name, Map<String, Object> facetMap) {
@SuppressWarnings("unchecked")
List<Map<String, Object>> bucketsList = (List<Map<String, Object>>) facetMap
.get("buckets");
if (bucketsList == null) {
return;
}
List<Count> counts = new ArrayList<>();
for (Map<String, Object> bucket : bucketsList) {
counts.add(new BaseCount((String) bucket.get("key"),
(Integer) bucket.get("doc_count")));
}
facetFieldsMap.put(name, new BaseSearchFacetField(name, counts));
}
private void parseDocumentList() {
documentList = new ArrayList<>();
highlightingMap = new HashMap<>();
@SuppressWarnings("unchecked")
Map<String, Object> uberHits = (Map<String, Object>) responseMap
.get("hits");
if (uberHits == null) {
log.warn("Didn't find a 'hits' field " + "in the query response: "
+ responseMap);
return;
}
Integer total = (Integer) uberHits.get("total");
if (total == null) {
log.warn("Didn't find a 'hits.total' field "
+ "in the query response: " + responseMap);
return;
}
@SuppressWarnings("unchecked")
List<Map<String, Object>> hits = (List<Map<String, Object>>) uberHits
.get("hits");
if (hits == null) {
log.warn("Didn't find a 'hits.hits' field "
+ "in the query response: " + responseMap);
return;
}
parseDocuments(hits);
}
private void parseDocuments(List<Map<String, Object>> hits) {
for (Map<String, Object> hit : hits) {
SearchResultDocument doc = parseDocument(hit);
if (doc != null) {
documentList.add(doc);
Map<String, List<String>> highlight = parseHighlight(hit);
if (highlight != null) {
highlightingMap.put(doc.getUniqueId(), highlight);
}
}
}
}
private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
@SuppressWarnings("unchecked")
Map<String, Collection<Object>> sourceMap = (Map<String, Collection<Object>>) hitMap
.get("_source");
if (sourceMap == null) {
log.warn("Didn't find a '_source' field in the hit: " + hitMap);
return null;
}
String id = (String) hitMap.get("_id");
if (id == null) {
log.warn("Didn't find a '_id' field in the hit: " + hitMap);
return null;
}
return new BaseSearchResultDocument(id, sourceMap);
}
private Map<String, List<String>> parseHighlight(
Map<String, Object> hitMap) {
@SuppressWarnings("unchecked")
Map<String, List<String>> highlightMap = (Map<String, List<String>>) hitMap
.get("highlight");
if (highlightMap == null) {
log.debug("Didn't find a 'highlight' field in the hit: " + hitMap);
return null;
}
@SuppressWarnings("unchecked")
List<String> snippets = highlightMap.get("ALLTEXT");
if (snippets == null) {
log.warn("Didn't find a 'highlight.ALLTEXT' field in the hit: "
+ hitMap);
return null;
}
Map<String, List<String>> snippetMap = new HashMap<>();
snippetMap.put("ALLTEXT", snippets);
return snippetMap;
}
}

View file

@ -51,7 +51,6 @@ public class SolrConversionUtils {
SearchInputDocument doc) {
SolrInputDocument solrDoc = new SolrInputDocument(
convertToSolrInputFieldMap(doc.getFieldMap()));
solrDoc.setDocumentBoost(doc.getDocumentBoost());
return solrDoc;
}
@ -81,11 +80,10 @@ public class SolrConversionUtils {
// No values, nothing to do.
} else if (values.size() == 1) {
// One value? Insure that it is accepted as such.
solrField.addValue(values.iterator().next(),
searchInputField.getBoost());
solrField.addValue(values.iterator().next());
} else {
// A collection of values? Add them.
solrField.addValue(values, searchInputField.getBoost());
solrField.addValue(values);
}
return solrField;
@ -125,7 +123,6 @@ public class SolrConversionUtils {
* Convert from a SearchQuery to a SolrQuery, so the Solr server may execute
* it.
*/
@SuppressWarnings("deprecation")
static SolrQuery convertToSolrQuery(SearchQuery query) {
SolrQuery solrQuery = new SolrQuery(query.getQuery());
solrQuery.setStart(query.getStart());
@ -141,7 +138,7 @@ public class SolrConversionUtils {
Map<String, Order> sortFields = query.getSortFields();
for (String sortField : sortFields.keySet()) {
solrQuery.addSortField(sortField,
solrQuery.addOrUpdateSort(sortField,
convertToSolrOrder(sortFields.get(sortField)));
}

View file

@ -10,10 +10,13 @@ import java.util.Collection;
import javax.servlet.ServletContext;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.StandardHttpRequestRetryHandler;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.client.solrj.response.QueryResponse;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
@ -32,8 +35,8 @@ import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
* The Solr-based implementation of SearchEngine.
*/
public class SolrSearchEngine implements SearchEngine {
private HttpSolrServer queryEngine;
private ConcurrentUpdateSolrServer updateEngine;
private SolrClient queryEngine;
private ConcurrentUpdateSolrClient updateEngine;
/**
* Set up the http connection with the solr server
@ -53,16 +56,26 @@ public class SolrSearchEngine implements SearchEngine {
}
try {
queryEngine = new HttpSolrServer(solrServerUrlString);
queryEngine.setSoTimeout(10000); // socket read timeout
queryEngine.setConnectionTimeout(10000);
queryEngine.setDefaultMaxConnectionsPerHost(100);
queryEngine.setMaxTotalConnections(100);
queryEngine.setMaxRetries(1);
HttpSolrClient.Builder builder = new HttpSolrClient.Builder(solrServerUrlString);
updateEngine = new ConcurrentUpdateSolrServer(solrServerUrlString, 100, 1);
updateEngine.setConnectionTimeout(10000);
updateEngine.setPollQueueTime(25);
builder.withSocketTimeout(10000); // socket read timeout
builder.withConnectionTimeout(10000);
HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
httpClientBuilder.setMaxConnPerRoute(100);
httpClientBuilder.setMaxConnTotal(100);
httpClientBuilder.setRetryHandler(new StandardHttpRequestRetryHandler(1, false));
builder.withHttpClient(httpClientBuilder.build());
queryEngine = builder.build();
ConcurrentUpdateSolrClient.Builder updateBuilder =
new ConcurrentUpdateSolrClient.Builder(solrServerUrlString);
updateBuilder.withConnectionTimeout(10000);
// no apparent 7.4.0 analogy to `setPollQueueTime(25)`
updateEngine = updateBuilder.build();
css.info("Set up the Solr search engine; URL = '" + solrServerUrlString + "'.");
} catch (Exception e) {
@ -72,8 +85,12 @@ public class SolrSearchEngine implements SearchEngine {
@Override
public void shutdown(Application application) {
queryEngine.shutdown();
updateEngine.shutdown();
try {
queryEngine.close();
} catch (IOException e) {
throw new RuntimeException("Error shutting down 'queryEngine'", e);
}
updateEngine.shutdownNow();
}
@Override
@ -170,7 +187,7 @@ public class SolrSearchEngine implements SearchEngine {
SolrQuery solrQuery = SolrConversionUtils.convertToSolrQuery(query);
QueryResponse response = queryEngine.query(solrQuery);
return SolrConversionUtils.convertToSearchResponse(response);
} catch (SolrServerException e) {
} catch (SolrServerException | IOException e) {
throw appropriateException(
"Solr server failed to execute the query" + query, e);
}

View file

@ -0,0 +1,128 @@
/* $This file is distributed under the terms of the license in LICENSE$ */
package edu.cornell.mannlib.vitro.webapp.servlet.setup;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
/**
* If we can't connect to ElasticSearch, add a Warning item to the StartupStatus.
*/
public class ElasticSmokeTest {
private static final Log log = LogFactory.getLog(ElasticSmokeTest.class);
private final ServletContextListener listener;
public ElasticSmokeTest(ServletContextListener listener) {
this.listener = listener;
}
public void doTest(ServletContextEvent sce) {
final StartupStatus ss = StartupStatus.getBean(sce.getServletContext());
String elasticUrlString = ConfigurationProperties.getBean(sce).getProperty("vitro.local.elastic.url", "");
if (elasticUrlString.isEmpty()) {
ss.fatal(listener, "Can't connect to ElasticSearch engine. "
+ "runtime.properties must contain a value for "
+ "vitro.local.elastic.url");
return;
}
URL elasticUrl = null;
try {
elasticUrl = new URL(elasticUrlString);
} catch (MalformedURLException e) {
ss.fatal(listener, "Can't connect to ElasticSearch engine. "
+ "The value for vitro.local.elastic.url "
+ "in runtime.properties is not a valid URL: '"
+ elasticUrlString + "'", e);
}
ss.info(listener, "Starting ElasticSearch test.");
checkConnection(elasticUrl, ss);
}
private void checkConnection(URL elasticUrl, StartupStatus ss) {
try {
new ElasticPinger(elasticUrl).ping();
reportGoodPing(ss);
} catch (ElasticProblemException e) {
reportPingProblem(ss, e);
}
}
private void reportGoodPing(StartupStatus ss) {
ss.info(listener, "The ElasticSearch server responded to a 'ping'.");
}
private void reportPingProblem(StartupStatus ss, ElasticProblemException e) {
ss.warning(listener, "The ElasticSearch engine did not respond to a 'ping' request", e);
}
/**
* Issue a "ping" to ElasticSearch. If we get here, we've already established
* contact, so any error is a fatal one.
*/
private static class ElasticPinger {
private final URL elasticUrl;
private final HttpClient httpClient = HttpClientFactory.getHttpClient();
public ElasticPinger(URL elasticUrl) {
this.elasticUrl = elasticUrl;
}
public void ping() throws ElasticProblemException {
try {
HttpGet method = new HttpGet(elasticUrl.toExternalForm());
log.debug("Trying to ping ElasticSearch");
HttpResponse response = httpClient.execute(method);
try {
log.debug("Finished pinging ElasticSearch");
int statusCode = response.getStatusLine().getStatusCode();
if (statusCode != HttpStatus.SC_OK) {
throw new ElasticProblemException(statusCode);
}
} finally {
EntityUtils.consume(response.getEntity());
}
} catch (IOException e) {
throw new ElasticProblemException(e);
}
}
}
private static class ElasticProblemException extends Exception {
private final int statusCode;
ElasticProblemException(int statusCode) {
super("HTTP status code = " + statusCode);
this.statusCode = statusCode;
}
ElasticProblemException(Throwable cause) {
super(cause);
this.statusCode = 0;
}
int getStatusCode() {
return this.statusCode;
}
}
}

View file

@ -0,0 +1,48 @@
/* $This file is distributed under the terms of the license in LICENSE$ */
package edu.cornell.mannlib.vitro.webapp.servlet.setup;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import javax.servlet.ServletContextEvent;
import javax.servlet.ServletContextListener;
/**
* Start up the appropriate search engine smoke test based on the configured URL property.
*/
public class SearchEngineSmokeTest implements ServletContextListener {
private static final Log log = LogFactory.getLog(SearchEngineSmokeTest.class);
@Override
public void contextInitialized(ServletContextEvent sce) {
final StartupStatus ss = StartupStatus.getBean(sce.getServletContext());
String solrUrlString = ConfigurationProperties.getBean(sce).getProperty("vitro.local.solr.url", "");
String elasticUrlString = ConfigurationProperties.getBean(sce).getProperty("vitro.local.elastic.url", "");
if (!solrUrlString.isEmpty() && !elasticUrlString.isEmpty()) {
ss.fatal(this, "More than one search engine is configured: " + solrUrlString + ", and " + elasticUrlString);
} else if (solrUrlString.isEmpty() && elasticUrlString.isEmpty()) {
ss.fatal(this, "No search engine is configured");
} else if (!solrUrlString.isEmpty()) {
log.debug("Initializing Solr: " + solrUrlString);
new SolrSmokeTest(this).doTest(sce);
} else {
log.debug("Initializing ElasticSearch: " + elasticUrlString);
new ElasticSmokeTest(this).doTest(sce);
}
}
@Override
public void contextDestroyed(ServletContextEvent sce) {
// nothing to tear down.
}
}

View file

@ -5,7 +5,6 @@ package edu.cornell.mannlib.vitro.webapp.servlet.setup;
import java.io.IOException;
import java.net.ConnectException;
import java.net.MalformedURLException;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.UnknownHostException;
@ -35,23 +34,28 @@ import org.apache.http.util.EntityUtils;
*
* If we can't connect to Solr, add a Warning item to the StartupStatus.
*/
public class SolrSmokeTest implements ServletContextListener {
public class SolrSmokeTest {
private static final Log log = LogFactory.getLog(SolrSmokeTest.class);
private final ServletContextListener listener;
/*
* We don't want to treat socket timeout as a non-recoverable error like the
* other exceptions. So pretend there's a status code for it instead.
*/
private static final int SOCKET_TIMEOUT_STATUS = -500;
@Override
public void contextInitialized(ServletContextEvent sce) {
public SolrSmokeTest(ServletContextListener listener) {
this.listener = listener;
}
public void doTest(ServletContextEvent sce) {
final StartupStatus ss = StartupStatus.getBean(sce.getServletContext());
String solrUrlString = ConfigurationProperties.getBean(sce)
.getProperty("vitro.local.solr.url", "");
if (solrUrlString.isEmpty()) {
ss.fatal(this, "Can't connect to Solr search engine. "
ss.fatal(listener, "Can't connect to Solr search engine. "
+ "runtime.properties must contain a value for "
+ "vitro.local.solr.url");
return;
@ -62,27 +66,22 @@ public class SolrSmokeTest implements ServletContextListener {
try {
solrUrl = new URL(solrUrlString);
} catch (MalformedURLException e) {
ss.fatal(this, "Can't connect to Solr search engine. "
ss.fatal(listener, "Can't connect to Solr search engine. "
+ "The value for vitro.local.solr.url "
+ "in runtime.properties is not a valid URL: '"
+ solrUrlString + "'", e);
}
ss.info(this, "Starting thread for Solr test.");
new SolrSmokeTestThread(this, solrUrl, ss).start();
}
@Override
public void contextDestroyed(ServletContextEvent sce) {
// nothing to tear down.
ss.info(listener, "Starting thread for Solr test.");
new SolrSmokeTestThread(listener, solrUrl, ss).start();
}
private static class SolrSmokeTestThread extends VitroBackgroundThread {
private final SolrSmokeTest listener;
private final ServletContextListener listener;
private final URL solrUrl;
private final StartupStatus ss;
public SolrSmokeTestThread(SolrSmokeTest listener, URL solrUrl,
public SolrSmokeTestThread(ServletContextListener listener, URL solrUrl,
StartupStatus ss) {
super("SolrSmokeTest");
this.listener = listener;
@ -205,6 +204,8 @@ public class SolrSmokeTest implements ServletContextListener {
*/
private static class SolrHomePager {
private static final long SLEEP_INTERVAL = 20000; // 20 seconds
private static final long SLEEP_MAX = 300000; // maximum sleep time: 5 minutes
private static long SLEEP_DURATION = 0; // how long have we been sleeping?
private final URL solrUrl;
private final HttpClient httpClient = HttpClientFactory.getHttpClient();
@ -218,12 +219,7 @@ public class SolrSmokeTest implements ServletContextListener {
public void connect() throws SolrProblemException {
tryToConnect();
if (!isDone()) {
sleep();
tryToConnect();
}
if (!isDone()) {
while (!isDone() && SLEEP_DURATION < SLEEP_MAX) {
sleep();
tryToConnect();
}
@ -234,17 +230,19 @@ public class SolrSmokeTest implements ServletContextListener {
}
private void tryToConnect() throws SolrProblemException {
SolrSmokeTest.log.debug("Trying to connect to Solr, wait up to " + SLEEP_MAX / 60000 + " minutes - " +
(int)(SLEEP_DURATION * 100.0 / SLEEP_MAX) + "%");
try {
HttpGet method = new HttpGet(solrUrl.toExternalForm());
SolrSmokeTest.log.debug("Trying to connect to Solr");
HttpResponse response = httpClient.execute(method);
HttpGet method = new HttpGet(solrUrl.toExternalForm() + "/select");
HttpResponse response = httpClient.execute(method);
try {
statusCode = response.getStatusLine().getStatusCode();
SolrSmokeTest.log.debug("HTTP status was " + statusCode);
} finally {
EntityUtils.consume(response.getEntity());
}
} catch (SocketTimeoutException e) {
} catch (IOException e) {
// Catch the exception so we can retry this.
// Save the status so we know why we failed.
statusCode = SolrSmokeTest.SOCKET_TIMEOUT_STATUS;
@ -264,6 +262,7 @@ public class SolrSmokeTest implements ServletContextListener {
private void sleep() {
try {
SLEEP_DURATION += SLEEP_INTERVAL;
Thread.sleep(SLEEP_INTERVAL);
} catch (InterruptedException e) {
e.printStackTrace(); // Should never happen