commit
cc2b98dac0
128 changed files with 1516 additions and 63936 deletions
|
@ -0,0 +1,92 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
import org.apache.http.entity.ContentType;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
|
||||
|
||||
/**
|
||||
* The nuts and bolts of adding a document to the Elasticsearch index
|
||||
*/
|
||||
public class ESAdder {
|
||||
private static final Log log = LogFactory.getLog(ESAdder.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
public ESAdder(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public void add(Collection<SearchInputDocument> docs)
|
||||
throws SearchEngineException {
|
||||
for (SearchInputDocument doc : docs) {
|
||||
addDocument(doc);
|
||||
}
|
||||
}
|
||||
|
||||
private void addDocument(SearchInputDocument doc)
|
||||
throws SearchEngineException {
|
||||
try {
|
||||
Map<String, List<Object>> map = convertDocToMap(doc);
|
||||
String json = new ObjectMapper().writeValueAsString(map);
|
||||
log.debug("Adding document for '" + doc.getField("DocId") + "': "
|
||||
+ json);
|
||||
|
||||
putToElastic(json, (String) doc.getField("DocId").getFirstValue());
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to convert to JSON", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Some field values are collections. Add the members of the collection
|
||||
* instead.
|
||||
*/
|
||||
private Map<String, List<Object>> convertDocToMap(SearchInputDocument doc) {
|
||||
Map<String, List<Object>> map = new HashMap<>();
|
||||
for (SearchInputField field : doc.getFieldMap().values()) {
|
||||
ArrayList<Object> list = new ArrayList<>();
|
||||
for (Object value : field.getValues()) {
|
||||
if (value instanceof Collection) {
|
||||
Collection<?> cValue = (Collection<?>) value;
|
||||
list.addAll(cValue);
|
||||
} else {
|
||||
list.add(value);
|
||||
}
|
||||
}
|
||||
map.put(field.getName(), list);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private void putToElastic(String json, String docId)
|
||||
throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_doc/"
|
||||
+ URLEncoder.encode(docId, "UTF8");
|
||||
Response response = Request.Put(url)
|
||||
.bodyString(json, ContentType.APPLICATION_JSON).execute();
|
||||
log.debug("Response from Elasticsearch: "
|
||||
+ response.returnContent().asString());
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
|
||||
/**
|
||||
* The nuts and bolts of getting the number of documents in the Elasticsearch
|
||||
* index.
|
||||
*/
|
||||
public class ESCounter {
|
||||
private final String baseUrl;
|
||||
|
||||
public ESCounter(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public int count() throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_doc/_count";
|
||||
Response response = Request.Get(url).execute();
|
||||
String json = response.returnContent().asString();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> map = new ObjectMapper().readValue(json,
|
||||
HashMap.class);
|
||||
return (Integer) map.get("count");
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.Header;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.StatusLine;
|
||||
import org.apache.http.client.HttpResponseException;
|
||||
import org.apache.http.client.ResponseHandler;
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
import org.apache.http.entity.ContentType;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
|
||||
|
||||
/**
|
||||
* The nuts and bolts of deleting documents from the Elasticsearch index.
|
||||
*/
|
||||
public class ESDeleter {
|
||||
private static final Log log = LogFactory.getLog(ESDeleter.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
/**
|
||||
* @param baseUrl
|
||||
*/
|
||||
public ESDeleter(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public void deleteByIds(Collection<String> ids)
|
||||
throws SearchEngineException {
|
||||
for (String id : ids) {
|
||||
deleteById(id);
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteById(String id) throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_doc/"
|
||||
+ URLEncoder.encode(id, "UTF8");
|
||||
Response response = Request.Delete(url).execute();
|
||||
String json = response.returnContent().asString();
|
||||
} catch (HttpResponseException e) {
|
||||
if (e.getStatusCode() == 404) {
|
||||
// Don't care if it has already been deleted.
|
||||
} else {
|
||||
throw new SearchEngineException(
|
||||
"Failed to delete Elasticsearch document " + id, e);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException(
|
||||
"Failed to delete Elasticsearch document " + id, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void deleteByQuery(String queryString) throws SearchEngineException {
|
||||
String url = baseUrl + "/_delete_by_query";
|
||||
SearchQuery query = new BaseSearchQuery().setQuery(queryString);
|
||||
String queryJson = new QueryConverter(query).asString();
|
||||
|
||||
try {
|
||||
Response response = Request.Post(url)
|
||||
.bodyString(queryJson, ContentType.APPLICATION_JSON)
|
||||
.execute();
|
||||
|
||||
BaseResponseHandler handler = new BaseResponseHandler();
|
||||
response.handleResponse(handler);
|
||||
if (handler.getStatusCode() >= 400) {
|
||||
log.warn(String.format(
|
||||
"Failed to delete Elasticsearch documents by query: %s, %d - %s\n%s",
|
||||
queryString, handler.getStatusCode(),
|
||||
handler.getReasonPhrase(), handler.getContentString()));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SearchEngineException("Failed to delete Elasticsearch "
|
||||
+ "documents by query " + queryString, e);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper class for interpreting HttpResponse errors
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private class BaseResponseHandler implements ResponseHandler<Object> {
|
||||
private int statusCode;
|
||||
private String reasonPhrase;
|
||||
private Map<String, List<String>> headers;
|
||||
private String contentString;
|
||||
|
||||
@Override
|
||||
public Object handleResponse(org.apache.http.HttpResponse innerResponse)
|
||||
throws IOException {
|
||||
StatusLine statusLine = innerResponse.getStatusLine();
|
||||
statusCode = statusLine.getStatusCode();
|
||||
reasonPhrase = statusLine.getReasonPhrase();
|
||||
|
||||
headers = new HashMap<>();
|
||||
for (Header header : innerResponse.getAllHeaders()) {
|
||||
String name = header.getName();
|
||||
if (!headers.containsKey(name)) {
|
||||
headers.put(name, new ArrayList<String>());
|
||||
}
|
||||
headers.get(name).add(header.getValue());
|
||||
}
|
||||
|
||||
HttpEntity entity = innerResponse.getEntity();
|
||||
if (entity == null) {
|
||||
contentString = "";
|
||||
} else {
|
||||
contentString = EntityUtils.toString(entity);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public int getStatusCode() {
|
||||
return statusCode;
|
||||
}
|
||||
|
||||
public String getReasonPhrase() {
|
||||
return reasonPhrase;
|
||||
}
|
||||
|
||||
public Map<String, List<String>> getHeaders() {
|
||||
return headers;
|
||||
}
|
||||
|
||||
public String getContentString() {
|
||||
return contentString;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
|
||||
/**
|
||||
* Just does a "commit" or "flush" to the index.
|
||||
*/
|
||||
public class ESFlusher {
|
||||
private static final Log log = LogFactory.getLog(ESFlusher.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
public ESFlusher(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public void flush() throws SearchEngineException {
|
||||
flush(false);
|
||||
}
|
||||
|
||||
public void flush(boolean wait) throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_flush"
|
||||
+ (wait ? "?wait_for_ongoing" : "");
|
||||
Response response = Request.Get(url).execute();
|
||||
String json = response.returnContent().asString();
|
||||
log.debug("flush response: " + json);
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.methods.HttpEntityEnclosingRequestBase;
|
||||
import org.apache.http.entity.ContentType;
|
||||
import org.apache.http.entity.StringEntity;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
|
||||
|
||||
/**
|
||||
* Convert a SearchQuery to JSON, send it to Elasticsearch, and convert the JSON
|
||||
* response to a SearchResponse.
|
||||
*/
|
||||
public class ESQuery {
|
||||
private static final Log log = LogFactory.getLog(ESQuery.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
public ESQuery(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public SearchResponse query(SearchQuery query)
|
||||
throws SearchEngineException {
|
||||
String queryString = new QueryConverter(query).asString();
|
||||
String response = doTheQuery(queryString);
|
||||
return new ResponseParser(response).parse();
|
||||
}
|
||||
|
||||
private String doTheQuery(String queryString) {
|
||||
log.debug("QUERY: " + queryString);
|
||||
try {
|
||||
String url = baseUrl + "/_search";
|
||||
HttpResponse response = new ESFunkyGetRequest(url)
|
||||
.bodyString(queryString, ContentType.APPLICATION_JSON)
|
||||
.execute();
|
||||
String responseString = IOUtils
|
||||
.toString(response.getEntity().getContent());
|
||||
log.debug("RESPONSE: " + responseString);
|
||||
return responseString;
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to put to Elasticsearch", e);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper class -- a GET request that accepts a body
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The HttpClient implementations, both regular and conversational, do not
|
||||
* allow you to put a body on a GET request. In online discussion, some say
|
||||
* that the HTTP spec is ambiguous on this point, so each implementation
|
||||
* makes its own choice. For example, CURL allows it.
|
||||
*
|
||||
* More to the point however, is that ElasticSearch requires it. So here's a
|
||||
* simple class to make that possible.
|
||||
*
|
||||
* USE POST INSTEAD!!
|
||||
*/
|
||||
private static class ESFunkyGetRequest
|
||||
extends HttpEntityEnclosingRequestBase {
|
||||
public ESFunkyGetRequest(String url) throws SearchEngineException {
|
||||
super();
|
||||
try {
|
||||
setURI(new URI(url));
|
||||
} catch (URISyntaxException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public ESFunkyGetRequest bodyString(String contents,
|
||||
ContentType contentType) {
|
||||
setEntity(new StringEntity(contents, contentType));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HttpResponse execute() throws SearchEngineException {
|
||||
try {
|
||||
return HttpClientFactory.getHttpClient().execute(this);
|
||||
} catch (IOException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMethod() {
|
||||
return "GET";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.Application;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
|
||||
|
||||
/**
|
||||
* A first draft of an Elasticsearch implementation.
|
||||
*/
|
||||
public class ElasticSearchEngine implements SearchEngine {
|
||||
private static final Log log = LogFactory.getLog(ElasticSearchEngine.class);
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Configuration
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private String baseUrl;
|
||||
|
||||
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBaseUrl")
|
||||
public void setBaseUrl(String url) {
|
||||
if (baseUrl == null) {
|
||||
if (url.endsWith("/")) {
|
||||
url = url.substring(0, url.length() - 1);
|
||||
}
|
||||
baseUrl = url;
|
||||
} else {
|
||||
throw new IllegalStateException(
|
||||
"Configuration includes multiple base URLs: " + url
|
||||
+ ", and " + baseUrl);
|
||||
}
|
||||
}
|
||||
|
||||
@Validation
|
||||
public void validate() throws Exception {
|
||||
if (baseUrl == null) {
|
||||
throw new IllegalStateException(
|
||||
"Configuration did not include a base URL.");
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// The instance
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void startup(Application application, ComponentStartupStatus ss) {
|
||||
log.warn("ElasticSearchEngine.startup() not implemented."); // TODO
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown(Application application) {
|
||||
// TODO Flush the buffers
|
||||
log.warn("ElasticSearchEngine.shutdown not implemented.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void ping() throws SearchEngineException {
|
||||
// TODO What's the simplest we can do? Another smoke test?
|
||||
log.warn("ElasticSearchEngine.ping() not implemented."); // TODO
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchInputDocument createInputDocument() {
|
||||
return new BaseSearchInputDocument();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(SearchInputDocument... docs) throws SearchEngineException {
|
||||
add(Arrays.asList(docs));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(Collection<SearchInputDocument> docs)
|
||||
throws SearchEngineException {
|
||||
new ESAdder(baseUrl).add(docs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() throws SearchEngineException {
|
||||
new ESFlusher(baseUrl).flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit(boolean wait) throws SearchEngineException {
|
||||
new ESFlusher(baseUrl).flush(wait);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteById(String... ids) throws SearchEngineException {
|
||||
deleteById(Arrays.asList(ids));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteById(Collection<String> ids)
|
||||
throws SearchEngineException {
|
||||
new ESDeleter(baseUrl).deleteByIds(ids);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteByQuery(String query) throws SearchEngineException {
|
||||
new ESDeleter(baseUrl).deleteByQuery(query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchQuery createQuery() {
|
||||
return new BaseSearchQuery();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchQuery createQuery(String queryText) {
|
||||
BaseSearchQuery query = new BaseSearchQuery();
|
||||
query.setQuery(queryText);
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchResponse query(SearchQuery query)
|
||||
throws SearchEngineException {
|
||||
return new ESQuery(baseUrl).query(query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int documentCount() throws SearchEngineException {
|
||||
return new ESCounter(baseUrl).count();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
|
||||
|
||||
/**
|
||||
* A simple implementation. In fact, this is so simple that perhaps it should be
|
||||
* named BaseSearchResultDocumentList.
|
||||
*/
|
||||
class ElasticSearchResultDocumentList implements SearchResultDocumentList {
|
||||
private final List<SearchResultDocument> documents;
|
||||
private final long numberFound;
|
||||
|
||||
public ElasticSearchResultDocumentList(List<SearchResultDocument> documents,
|
||||
long numberFound) {
|
||||
this.documents = documents;
|
||||
this.numberFound = numberFound;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<SearchResultDocument> iterator() {
|
||||
return documents.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getNumFound() {
|
||||
return documents.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return documents.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchResultDocument get(int i) {
|
||||
return documents.get(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
"ElasticSearchResultDocumentList[numberFound=%s, documents=%s]",
|
||||
numberFound, documents);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,216 @@
|
|||
# What is this package?
|
||||
* The first draft of a Elasticsearch driver for VIVO
|
||||
|
||||
# What has been done?
|
||||
* Implement the `SearchEngine` interface
|
||||
* Classes in `edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch`
|
||||
* No attempt to add new functions.
|
||||
|
||||
# How to experiment with it?
|
||||
* Install elasticsearch somewhere.
|
||||
* Create a search index with the appropriate mapping (see below).
|
||||
* Check out VIVO and this branch of Vitro (see below), and do the usual installation procedure.
|
||||
* Modify `{vitro_home}/config/applicationSetup.n3` to use this driver (see below).
|
||||
* Start elasticsearch
|
||||
* Start VIVO
|
||||
|
||||
# Not ready for production
|
||||
* Documentation
|
||||
* Instructions on how to install and configure the driver.
|
||||
* Instructions on how to setup elasticsearch?
|
||||
* Smoke test
|
||||
* Display a warning if the elasticsearch server is not responding.
|
||||
* Functional testing
|
||||
* Are we getting the proper search results?
|
||||
* Are search results in the order that we would like?
|
||||
* Improved snippets
|
||||
* Copy the technique used for Solr
|
||||
* Code improvement
|
||||
* Rigorous closing of HTTP connections.
|
||||
* IOC for HTTP code, to help in unit testing
|
||||
* Consistent use of exceptions and logging
|
||||
* Unit tests
|
||||
* Automatic initialization of the index
|
||||
* If VIVO detects an empty index, apply the mapping.
|
||||
|
||||
# The next steps: adding functionality
|
||||
|
||||
## Stay within the framework
|
||||
* Add fields that enhance the contents of the search index documents (see below).
|
||||
* Add data distributors that run queries and format the output (see below).
|
||||
|
||||
## Go outside the framework
|
||||
* Add functions to the Elasticsearch driver that the Solr driver will simply ignore.
|
||||
* Or remove Solr entirely
|
||||
* Query Elasticsearch directly
|
||||
* Or write a data distributor that will run the query
|
||||
|
||||
# The details:
|
||||
|
||||
## Check out VIVO and Vitro
|
||||
* For now, the Elasticsearch driver only lives in my fork of Vitro
|
||||
* No changes to VIVO are required (yet).
|
||||
|
||||
```
|
||||
git clone https://github.com/vivo-project/VIVO.git
|
||||
git clone -b feature/elasticsearchExperiments https://github.com/j2blake/Vitro.git
|
||||
```
|
||||
|
||||
## A mapping for the search index
|
||||
* If the index uses the default mapping, it will not work correctly.
|
||||
* Some fields must be declared as `keyword`, some as unstemmed, etc.
|
||||
|
||||
* Example mapping script:
|
||||
|
||||
```
|
||||
curl -X PUT "localhost:9200/vivo?pretty" -H 'Content-Type: application/json' -d'
|
||||
{
|
||||
"mappings": {
|
||||
"_doc": {
|
||||
"properties": {
|
||||
"ALLTEXT": {
|
||||
"type": "text",
|
||||
"analyzer": "english"
|
||||
},
|
||||
"ALLTEXTUNSTEMMED": {
|
||||
"type": "text",
|
||||
"analyzer": "standard"
|
||||
},
|
||||
"DocId": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"classgroup": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"type": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"mostSpecificTypeURIs": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"indexedTime": {
|
||||
"type": "long"
|
||||
},
|
||||
"nameRaw": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"URI": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"THUMBNAIL": {
|
||||
"type": "integer"
|
||||
},
|
||||
"THUMBNAIL_URL": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"nameLowercaseSingleValued": {
|
||||
"type": "text",
|
||||
"analyzer": "standard",
|
||||
"fielddata": "true"
|
||||
},
|
||||
"BETA" : {
|
||||
"type" : "float"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"query": {
|
||||
"default_field": "ALLTEXT"
|
||||
}
|
||||
}
|
||||
'
|
||||
```
|
||||
* __*Note:*__ The first line of the script specifies the name of the index as `vivo`.
|
||||
Any name may be used, but it must match the "base URL" that is specified in `applicationSetup.n3` (see below).
|
||||
* __*Note:*__ The same first line specifies the location and port number of the elasticsearch server.
|
||||
Again, any location and port may be used, but they must match the "base URL" in `applicationSetup.n3`.
|
||||
|
||||
## Modify `applicationSetup.n3`
|
||||
* Change this:
|
||||
|
||||
```
|
||||
# ----------------------------
|
||||
#
|
||||
# Search engine module:
|
||||
# The Solr-based implementation is the only standard option, but it can be
|
||||
# wrapped in an "instrumented" wrapper, which provides additional logging
|
||||
# and more rigorous life-cycle checking.
|
||||
#
|
||||
|
||||
:instrumentedSearchEngineWrapper
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||
:wraps :solrSearchEngine .
|
||||
|
||||
```
|
||||
|
||||
* To this:
|
||||
|
||||
```
|
||||
# ----------------------------
|
||||
#
|
||||
# Search engine module:
|
||||
# The Solr-based implementation is the only standard option, but it can be
|
||||
# wrapped in an "instrumented" wrapper, which provides additional logging
|
||||
# and more rigorous life-cycle checking.
|
||||
#
|
||||
|
||||
:instrumentedSearchEngineWrapper
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||
:wraps :elasticSearchEngine .
|
||||
|
||||
:elasticSearchEngine
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.ElasticSearchEngine> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||
:hasBaseUrl "http://localhost:9200/vivo" .
|
||||
```
|
||||
|
||||
## Enhance the contents of the search index
|
||||
### An example: Publication URIs in the author's search document
|
||||
* Add a keyword field to the search index
|
||||
|
||||
```
|
||||
"publicationURI": {
|
||||
"type": "keyword"
|
||||
},
|
||||
```
|
||||
|
||||
* Add a `DocumentModifier` to VIVO.
|
||||
|
||||
```
|
||||
:documentModifier_publications
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SelectQueryDocumentModifier> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
|
||||
rdfs:label "URIs of publications are added to publicationURI field." ;
|
||||
:hasTargetField "publicationURI" ;
|
||||
:hasSelectQuery """
|
||||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||
PREFIX vivo: <http://vivoweb.org/ontology/core#>
|
||||
PREFIX bibo: <http://purl.org/ontology/bibo/>
|
||||
SELECT ?publication
|
||||
WHERE {
|
||||
?uri vivo:relatedBy ?authorship .
|
||||
?authorship a vivo:Authorship .
|
||||
?authorship vivo:relates ?publication .
|
||||
?publication a bibo:Document .
|
||||
}
|
||||
""" .
|
||||
```
|
||||
|
||||
## Use data distributors to query the search index
|
||||
* Install the Data Distribution API
|
||||
* Add a distributor:
|
||||
|
||||
```
|
||||
:drill_by_URI
|
||||
a <java:edu.cornell.library.scholars.webapp.controller.api.distribute.DataDistributor> ,
|
||||
<java:edu.cornell.library.scholars.webapp.controller.api.distribute.search.DrillDownSearchByUriDataDistributor> ;
|
||||
:actionName "searchAndDrill" .
|
||||
```
|
||||
|
||||
* Run the query:
|
||||
|
||||
```
|
||||
http://localhost:8080/vivo/api/dataRequest/searchAndDrill?uri=http://scholars.cornell.edu/individual/mj495
|
||||
```
|
|
@ -0,0 +1,77 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Build a Map of Objects, suitable for marshalling by Jackson.
|
||||
*
|
||||
* Include conditional logic, so null values, empty maps, or empty lists will
|
||||
* not be added, unless you use the special values.
|
||||
*/
|
||||
public class JsonTree {
|
||||
/**
|
||||
* Empty maps will not be added, except for this one.
|
||||
*/
|
||||
public static final Map<String, Object> EMPTY_JSON_MAP = Collections
|
||||
.emptyMap();
|
||||
|
||||
/**
|
||||
* Empty lists will not be added, except for this one.
|
||||
*/
|
||||
public static final List<Object> EMPTY_JSON_LIST = Collections.emptyList();
|
||||
|
||||
/**
|
||||
* Create the tree
|
||||
*/
|
||||
public static JsonTree tree() {
|
||||
return new JsonTree();
|
||||
}
|
||||
|
||||
/**
|
||||
* This will cause negative integers to be ignored.
|
||||
*/
|
||||
public static Integer ifPositive(int i) {
|
||||
return (i > 0) ? i : null;
|
||||
}
|
||||
|
||||
private Map<String, Object> map = new HashMap<>();
|
||||
|
||||
public JsonTree put(String key, Object value) {
|
||||
if (isSignificant(value)) {
|
||||
storeIt(key, value);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private boolean isSignificant(Object value) {
|
||||
if (value == null) {
|
||||
return false;
|
||||
}
|
||||
if (value instanceof Map && ((Map) value).isEmpty()
|
||||
&& value != EMPTY_JSON_MAP) {
|
||||
return false;
|
||||
}
|
||||
if (value instanceof List && ((List) value).isEmpty()
|
||||
&& value != EMPTY_JSON_LIST) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void storeIt(String key, Object value) {
|
||||
if (value instanceof JsonTree) {
|
||||
map.put(key, ((JsonTree) value).asMap());
|
||||
} else {
|
||||
map.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, Object> asMap() {
|
||||
return new HashMap<>(map);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.EMPTY_JSON_MAP;
|
||||
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.ifPositive;
|
||||
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.tree;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery.Order;
|
||||
|
||||
/**
|
||||
* Accept a SearchQuery and make it available as a JSON string, suitable for
|
||||
* Elasticsearch.
|
||||
*/
|
||||
public class QueryConverter {
|
||||
private static final Log log = LogFactory.getLog(QueryConverter.class);
|
||||
|
||||
private final SearchQuery query;
|
||||
private final Map<String, Object> queryAndFilters;
|
||||
private final Map<String, Object> sortFields;
|
||||
private final Map<String, Object> facets;
|
||||
private final Map<String, Object> highlighter;
|
||||
private final List<String> returnFields;
|
||||
private final Map<String, Object> fullMap;
|
||||
|
||||
public QueryConverter(SearchQuery query) {
|
||||
this.query = query;
|
||||
this.queryAndFilters = filteredOrNot();
|
||||
this.sortFields = figureSortFields();
|
||||
this.facets = figureFacets();
|
||||
this.highlighter = figureHighlighter();
|
||||
this.returnFields = figureReturnFields();
|
||||
|
||||
this.fullMap = figureFullMap();
|
||||
}
|
||||
|
||||
private Map<String, Object> filteredOrNot() {
|
||||
if (query.getFilters().isEmpty()) {
|
||||
return new QueryStringMap(query.getQuery()).map;
|
||||
} else {
|
||||
return buildFilterStructure();
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> buildFilterStructure() {
|
||||
return tree() //
|
||||
.put("bool", tree() //
|
||||
.put("must", new QueryStringMap(query.getQuery()).map) //
|
||||
.put("filter", buildFiltersList())) //
|
||||
.asMap();
|
||||
}
|
||||
|
||||
private List<Map<String, Object>> buildFiltersList() {
|
||||
List<Map<String, Object>> list = new ArrayList<>();
|
||||
for (String filter : query.getFilters()) {
|
||||
list.add(new QueryStringMap(filter).map);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
private Map<String, Object> figureSortFields() {
|
||||
Map<String, Order> fields = query.getSortFields();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
for (String name : fields.keySet()) {
|
||||
String sortOrder = fields.get(name).toString().toLowerCase();
|
||||
map.put(name, sortOrder);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> figureFacets() {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
for (String field : query.getFacetFields()) {
|
||||
map.put("facet_" + field, figureFacet(field));
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> figureHighlighter() {
|
||||
return tree() //
|
||||
.put("fields", tree() //
|
||||
.put("ALLTEXT", EMPTY_JSON_MAP))
|
||||
.asMap();
|
||||
}
|
||||
|
||||
private Map<String, Object> figureFacet(String field) {
|
||||
return tree() //
|
||||
.put("terms", tree() //
|
||||
.put("field", field) //
|
||||
.put("size", ifPositive(query.getFacetLimit())) //
|
||||
.put("min_doc_count",
|
||||
ifPositive(query.getFacetMinCount()))) //
|
||||
.asMap();
|
||||
}
|
||||
|
||||
private List<String> figureReturnFields() {
|
||||
return new ArrayList<>(query.getFieldsToReturn());
|
||||
}
|
||||
|
||||
private Map<String, Object> figureFullMap() {
|
||||
return tree() //
|
||||
.put("query", queryAndFilters) //
|
||||
.put("from", ifPositive(query.getStart())) //
|
||||
.put("highlight", highlighter)
|
||||
.put("size", ifPositive(query.getRows())) //
|
||||
.put("sort", sortFields) //
|
||||
.put("_source", returnFields) //
|
||||
.put("aggregations", facets) //
|
||||
.asMap();
|
||||
}
|
||||
|
||||
public String asString() throws SearchEngineException {
|
||||
try {
|
||||
return new ObjectMapper().writeValueAsString(fullMap);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static class QueryStringMap {
|
||||
public final Map<String, Object> map;
|
||||
|
||||
public QueryStringMap(String queryString) {
|
||||
map = new HashMap<>();
|
||||
map.put("query_string", makeInnerMap(escape(queryString)));
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a kluge, but perhaps it will work for now.
|
||||
*
|
||||
* Apparently Solr is willing to put up with query strings that contain
|
||||
* special characters in odd places, but Elasticsearch is not.
|
||||
*
|
||||
* So, a query string of "classgroup:http://this/that" must be escaped
|
||||
* as "classgroup:http\:\/\/this\/that". Notice that the first colon
|
||||
* delimits the field name, and so must not be escaped.
|
||||
*
|
||||
* But what if no field is specified? Then all colons must be escaped.
|
||||
* How would we distinguish that?
|
||||
*
|
||||
* And what if the query is more complex, and more than one field is
|
||||
* specified? What if other special characters are included?
|
||||
*
|
||||
* This could be a real problem.
|
||||
*/
|
||||
private String escape(String queryString) {
|
||||
return queryString.replace(":", "\\:").replace("/", "\\/")
|
||||
.replaceFirst("\\\\:", ":");
|
||||
}
|
||||
|
||||
private Map<String, String> makeInnerMap(String queryString) {
|
||||
Map<String, String> inner = new HashMap<>();
|
||||
inner.put("default_field", "ALLTEXT");
|
||||
inner.put("query", queryString);
|
||||
return inner;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,182 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField.Count;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField.BaseCount;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResultDocument;
|
||||
|
||||
/**
|
||||
* Elastic search sends a JSON response to a query. parse it to a
|
||||
* SearchResponse.
|
||||
*/
|
||||
class ResponseParser {
|
||||
private static final Log log = LogFactory.getLog(ResponseParser.class);
|
||||
|
||||
private final Map<String, Object> responseMap;
|
||||
|
||||
private Map<String, Map<String, List<String>>> highlightingMap;
|
||||
private Map<String, SearchFacetField> facetFieldsMap;
|
||||
private long totalHits;
|
||||
private List<SearchResultDocument> documentList;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public ResponseParser(String responseString) throws SearchEngineException {
|
||||
try {
|
||||
this.responseMap = new ObjectMapper().readValue(responseString,
|
||||
HashMap.class);
|
||||
} catch (IOException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public SearchResponse parse() {
|
||||
parseDocumentList();
|
||||
parseFacetFields();
|
||||
SearchResponse response = new BaseSearchResponse(highlightingMap,
|
||||
facetFieldsMap,
|
||||
new ElasticSearchResultDocumentList(documentList, totalHits));
|
||||
log.debug("ESQuery.ResponseParser.parse: " + response);
|
||||
return response;
|
||||
}
|
||||
|
||||
private void parseFacetFields() {
|
||||
facetFieldsMap = new HashMap<>();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Map<String, Object>> aggregations = (Map<String, Map<String, Object>>) responseMap
|
||||
.get("aggregations");
|
||||
if (aggregations == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (String key : aggregations.keySet()) {
|
||||
if (key.startsWith("facet_")) {
|
||||
String name = key.substring(6);
|
||||
parseFacetField(name, aggregations.get(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void parseFacetField(String name, Map<String, Object> facetMap) {
|
||||
@SuppressWarnings("unchecked")
|
||||
List<Map<String, Object>> bucketsList = (List<Map<String, Object>>) facetMap
|
||||
.get("buckets");
|
||||
if (bucketsList == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
List<Count> counts = new ArrayList<>();
|
||||
for (Map<String, Object> bucket : bucketsList) {
|
||||
counts.add(new BaseCount((String) bucket.get("key"),
|
||||
(Integer) bucket.get("doc_count")));
|
||||
}
|
||||
|
||||
facetFieldsMap.put(name, new BaseSearchFacetField(name, counts));
|
||||
}
|
||||
|
||||
private void parseDocumentList() {
|
||||
documentList = new ArrayList<>();
|
||||
highlightingMap = new HashMap<>();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> uberHits = (Map<String, Object>) responseMap
|
||||
.get("hits");
|
||||
if (uberHits == null) {
|
||||
log.warn("Didn't find a 'hits' field " + "in the query response: "
|
||||
+ responseMap);
|
||||
return;
|
||||
}
|
||||
|
||||
Integer total = (Integer) uberHits.get("total");
|
||||
if (total == null) {
|
||||
log.warn("Didn't find a 'hits.total' field "
|
||||
+ "in the query response: " + responseMap);
|
||||
return;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
List<Map<String, Object>> hits = (List<Map<String, Object>>) uberHits
|
||||
.get("hits");
|
||||
if (hits == null) {
|
||||
log.warn("Didn't find a 'hits.hits' field "
|
||||
+ "in the query response: " + responseMap);
|
||||
return;
|
||||
}
|
||||
|
||||
parseDocuments(hits);
|
||||
}
|
||||
|
||||
private void parseDocuments(List<Map<String, Object>> hits) {
|
||||
for (Map<String, Object> hit : hits) {
|
||||
SearchResultDocument doc = parseDocument(hit);
|
||||
if (doc != null) {
|
||||
documentList.add(doc);
|
||||
|
||||
Map<String, List<String>> highlight = parseHighlight(hit);
|
||||
if (highlight != null) {
|
||||
highlightingMap.put(doc.getUniqueId(), highlight);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Collection<Object>> sourceMap = (Map<String, Collection<Object>>) hitMap
|
||||
.get("_source");
|
||||
if (sourceMap == null) {
|
||||
log.warn("Didn't find a '_source' field in the hit: " + hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
String id = (String) hitMap.get("_id");
|
||||
if (id == null) {
|
||||
log.warn("Didn't find a '_id' field in the hit: " + hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
return new BaseSearchResultDocument(id, sourceMap);
|
||||
}
|
||||
|
||||
private Map<String, List<String>> parseHighlight(
|
||||
Map<String, Object> hitMap) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, List<String>> highlightMap = (Map<String, List<String>>) hitMap
|
||||
.get("highlight");
|
||||
if (highlightMap == null) {
|
||||
log.debug("Didn't find a 'highlight' field in the hit: " + hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
List<String> snippets = highlightMap.get("ALLTEXT");
|
||||
if (snippets == null) {
|
||||
log.warn("Didn't find a 'highlight.ALLTEXT' field in the hit: "
|
||||
+ hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
Map<String, List<String>> snippetMap = new HashMap<>();
|
||||
snippetMap.put("ALLTEXT", snippets);
|
||||
return snippetMap;
|
||||
}
|
||||
}
|
|
@ -51,7 +51,6 @@ public class SolrConversionUtils {
|
|||
SearchInputDocument doc) {
|
||||
SolrInputDocument solrDoc = new SolrInputDocument(
|
||||
convertToSolrInputFieldMap(doc.getFieldMap()));
|
||||
solrDoc.setDocumentBoost(doc.getDocumentBoost());
|
||||
return solrDoc;
|
||||
}
|
||||
|
||||
|
@ -81,11 +80,10 @@ public class SolrConversionUtils {
|
|||
// No values, nothing to do.
|
||||
} else if (values.size() == 1) {
|
||||
// One value? Insure that it is accepted as such.
|
||||
solrField.addValue(values.iterator().next(),
|
||||
searchInputField.getBoost());
|
||||
solrField.addValue(values.iterator().next());
|
||||
} else {
|
||||
// A collection of values? Add them.
|
||||
solrField.addValue(values, searchInputField.getBoost());
|
||||
solrField.addValue(values);
|
||||
}
|
||||
|
||||
return solrField;
|
||||
|
@ -125,7 +123,6 @@ public class SolrConversionUtils {
|
|||
* Convert from a SearchQuery to a SolrQuery, so the Solr server may execute
|
||||
* it.
|
||||
*/
|
||||
@SuppressWarnings("deprecation")
|
||||
static SolrQuery convertToSolrQuery(SearchQuery query) {
|
||||
SolrQuery solrQuery = new SolrQuery(query.getQuery());
|
||||
solrQuery.setStart(query.getStart());
|
||||
|
@ -141,7 +138,7 @@ public class SolrConversionUtils {
|
|||
|
||||
Map<String, Order> sortFields = query.getSortFields();
|
||||
for (String sortField : sortFields.keySet()) {
|
||||
solrQuery.addSortField(sortField,
|
||||
solrQuery.addOrUpdateSort(sortField,
|
||||
convertToSolrOrder(sortFields.get(sortField)));
|
||||
}
|
||||
|
||||
|
|
|
@ -10,10 +10,13 @@ import java.util.Collection;
|
|||
|
||||
import javax.servlet.ServletContext;
|
||||
|
||||
import org.apache.http.impl.client.HttpClientBuilder;
|
||||
import org.apache.http.impl.client.StandardHttpRequestRetryHandler;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrQuery;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrServer;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||
import org.apache.solr.client.solrj.impl.ConcurrentUpdateSolrClient;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.client.solrj.response.QueryResponse;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
|
||||
|
@ -32,8 +35,8 @@ import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
|
|||
* The Solr-based implementation of SearchEngine.
|
||||
*/
|
||||
public class SolrSearchEngine implements SearchEngine {
|
||||
private HttpSolrServer queryEngine;
|
||||
private ConcurrentUpdateSolrServer updateEngine;
|
||||
private SolrClient queryEngine;
|
||||
private ConcurrentUpdateSolrClient updateEngine;
|
||||
|
||||
/**
|
||||
* Set up the http connection with the solr server
|
||||
|
@ -53,16 +56,26 @@ public class SolrSearchEngine implements SearchEngine {
|
|||
}
|
||||
|
||||
try {
|
||||
queryEngine = new HttpSolrServer(solrServerUrlString);
|
||||
queryEngine.setSoTimeout(10000); // socket read timeout
|
||||
queryEngine.setConnectionTimeout(10000);
|
||||
queryEngine.setDefaultMaxConnectionsPerHost(100);
|
||||
queryEngine.setMaxTotalConnections(100);
|
||||
queryEngine.setMaxRetries(1);
|
||||
HttpSolrClient.Builder builder = new HttpSolrClient.Builder(solrServerUrlString);
|
||||
|
||||
updateEngine = new ConcurrentUpdateSolrServer(solrServerUrlString, 100, 1);
|
||||
updateEngine.setConnectionTimeout(10000);
|
||||
updateEngine.setPollQueueTime(25);
|
||||
builder.withSocketTimeout(10000); // socket read timeout
|
||||
builder.withConnectionTimeout(10000);
|
||||
|
||||
HttpClientBuilder httpClientBuilder = HttpClientBuilder.create();
|
||||
httpClientBuilder.setMaxConnPerRoute(100);
|
||||
httpClientBuilder.setMaxConnTotal(100);
|
||||
httpClientBuilder.setRetryHandler(new StandardHttpRequestRetryHandler(1, false));
|
||||
|
||||
builder.withHttpClient(httpClientBuilder.build());
|
||||
|
||||
queryEngine = builder.build();
|
||||
|
||||
ConcurrentUpdateSolrClient.Builder updateBuilder =
|
||||
new ConcurrentUpdateSolrClient.Builder(solrServerUrlString);
|
||||
updateBuilder.withConnectionTimeout(10000);
|
||||
// no apparent 7.4.0 analogy to `setPollQueueTime(25)`
|
||||
|
||||
updateEngine = updateBuilder.build();
|
||||
|
||||
css.info("Set up the Solr search engine; URL = '" + solrServerUrlString + "'.");
|
||||
} catch (Exception e) {
|
||||
|
@ -72,8 +85,12 @@ public class SolrSearchEngine implements SearchEngine {
|
|||
|
||||
@Override
|
||||
public void shutdown(Application application) {
|
||||
queryEngine.shutdown();
|
||||
updateEngine.shutdown();
|
||||
try {
|
||||
queryEngine.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Error shutting down 'queryEngine'", e);
|
||||
}
|
||||
updateEngine.shutdownNow();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -170,7 +187,7 @@ public class SolrSearchEngine implements SearchEngine {
|
|||
SolrQuery solrQuery = SolrConversionUtils.convertToSolrQuery(query);
|
||||
QueryResponse response = queryEngine.query(solrQuery);
|
||||
return SolrConversionUtils.convertToSearchResponse(response);
|
||||
} catch (SolrServerException e) {
|
||||
} catch (SolrServerException | IOException e) {
|
||||
throw appropriateException(
|
||||
"Solr server failed to execute the query" + query, e);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,128 @@
|
|||
/* $This file is distributed under the terms of the license in LICENSE$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.servlet.setup;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
|
||||
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.apache.http.client.HttpClient;
|
||||
import org.apache.http.client.methods.HttpGet;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
|
||||
import javax.servlet.ServletContextEvent;
|
||||
import javax.servlet.ServletContextListener;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.URL;
|
||||
|
||||
/**
|
||||
* If we can't connect to ElasticSearch, add a Warning item to the StartupStatus.
|
||||
*/
|
||||
public class ElasticSmokeTest {
|
||||
|
||||
private static final Log log = LogFactory.getLog(ElasticSmokeTest.class);
|
||||
|
||||
private final ServletContextListener listener;
|
||||
|
||||
public ElasticSmokeTest(ServletContextListener listener) {
|
||||
this.listener = listener;
|
||||
}
|
||||
|
||||
public void doTest(ServletContextEvent sce) {
|
||||
final StartupStatus ss = StartupStatus.getBean(sce.getServletContext());
|
||||
|
||||
String elasticUrlString = ConfigurationProperties.getBean(sce).getProperty("vitro.local.elastic.url", "");
|
||||
if (elasticUrlString.isEmpty()) {
|
||||
ss.fatal(listener, "Can't connect to ElasticSearch engine. "
|
||||
+ "runtime.properties must contain a value for "
|
||||
+ "vitro.local.elastic.url");
|
||||
return;
|
||||
}
|
||||
|
||||
URL elasticUrl = null;
|
||||
|
||||
try {
|
||||
elasticUrl = new URL(elasticUrlString);
|
||||
} catch (MalformedURLException e) {
|
||||
ss.fatal(listener, "Can't connect to ElasticSearch engine. "
|
||||
+ "The value for vitro.local.elastic.url "
|
||||
+ "in runtime.properties is not a valid URL: '"
|
||||
+ elasticUrlString + "'", e);
|
||||
}
|
||||
|
||||
ss.info(listener, "Starting ElasticSearch test.");
|
||||
|
||||
checkConnection(elasticUrl, ss);
|
||||
}
|
||||
|
||||
private void checkConnection(URL elasticUrl, StartupStatus ss) {
|
||||
try {
|
||||
new ElasticPinger(elasticUrl).ping();
|
||||
reportGoodPing(ss);
|
||||
} catch (ElasticProblemException e) {
|
||||
reportPingProblem(ss, e);
|
||||
}
|
||||
}
|
||||
|
||||
private void reportGoodPing(StartupStatus ss) {
|
||||
ss.info(listener, "The ElasticSearch server responded to a 'ping'.");
|
||||
}
|
||||
|
||||
private void reportPingProblem(StartupStatus ss, ElasticProblemException e) {
|
||||
ss.warning(listener, "The ElasticSearch engine did not respond to a 'ping' request", e);
|
||||
}
|
||||
|
||||
/**
|
||||
* Issue a "ping" to ElasticSearch. If we get here, we've already established
|
||||
* contact, so any error is a fatal one.
|
||||
*/
|
||||
private static class ElasticPinger {
|
||||
private final URL elasticUrl;
|
||||
private final HttpClient httpClient = HttpClientFactory.getHttpClient();
|
||||
|
||||
public ElasticPinger(URL elasticUrl) {
|
||||
this.elasticUrl = elasticUrl;
|
||||
}
|
||||
|
||||
public void ping() throws ElasticProblemException {
|
||||
try {
|
||||
HttpGet method = new HttpGet(elasticUrl.toExternalForm());
|
||||
log.debug("Trying to ping ElasticSearch");
|
||||
HttpResponse response = httpClient.execute(method);
|
||||
try {
|
||||
log.debug("Finished pinging ElasticSearch");
|
||||
int statusCode = response.getStatusLine().getStatusCode();
|
||||
if (statusCode != HttpStatus.SC_OK) {
|
||||
throw new ElasticProblemException(statusCode);
|
||||
}
|
||||
} finally {
|
||||
EntityUtils.consume(response.getEntity());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ElasticProblemException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static class ElasticProblemException extends Exception {
|
||||
private final int statusCode;
|
||||
|
||||
ElasticProblemException(int statusCode) {
|
||||
super("HTTP status code = " + statusCode);
|
||||
this.statusCode = statusCode;
|
||||
}
|
||||
|
||||
ElasticProblemException(Throwable cause) {
|
||||
super(cause);
|
||||
this.statusCode = 0;
|
||||
}
|
||||
|
||||
int getStatusCode() {
|
||||
return this.statusCode;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
/* $This file is distributed under the terms of the license in LICENSE$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.servlet.setup;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
|
||||
import edu.cornell.mannlib.vitro.webapp.startup.StartupStatus;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import javax.servlet.ServletContextEvent;
|
||||
import javax.servlet.ServletContextListener;
|
||||
|
||||
/**
|
||||
* Start up the appropriate search engine smoke test based on the configured URL property.
|
||||
*/
|
||||
public class SearchEngineSmokeTest implements ServletContextListener {
|
||||
|
||||
private static final Log log = LogFactory.getLog(SearchEngineSmokeTest.class);
|
||||
|
||||
@Override
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
final StartupStatus ss = StartupStatus.getBean(sce.getServletContext());
|
||||
|
||||
String solrUrlString = ConfigurationProperties.getBean(sce).getProperty("vitro.local.solr.url", "");
|
||||
String elasticUrlString = ConfigurationProperties.getBean(sce).getProperty("vitro.local.elastic.url", "");
|
||||
|
||||
if (!solrUrlString.isEmpty() && !elasticUrlString.isEmpty()) {
|
||||
ss.fatal(this, "More than one search engine is configured: " + solrUrlString + ", and " + elasticUrlString);
|
||||
|
||||
} else if (solrUrlString.isEmpty() && elasticUrlString.isEmpty()) {
|
||||
ss.fatal(this, "No search engine is configured");
|
||||
|
||||
} else if (!solrUrlString.isEmpty()) {
|
||||
log.debug("Initializing Solr: " + solrUrlString);
|
||||
new SolrSmokeTest(this).doTest(sce);
|
||||
|
||||
} else {
|
||||
log.debug("Initializing ElasticSearch: " + elasticUrlString);
|
||||
new ElasticSmokeTest(this).doTest(sce);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void contextDestroyed(ServletContextEvent sce) {
|
||||
// nothing to tear down.
|
||||
}
|
||||
|
||||
}
|
|
@ -5,7 +5,6 @@ package edu.cornell.mannlib.vitro.webapp.servlet.setup;
|
|||
import java.io.IOException;
|
||||
import java.net.ConnectException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.net.SocketTimeoutException;
|
||||
import java.net.URL;
|
||||
import java.net.UnknownHostException;
|
||||
|
||||
|
@ -35,23 +34,28 @@ import org.apache.http.util.EntityUtils;
|
|||
*
|
||||
* If we can't connect to Solr, add a Warning item to the StartupStatus.
|
||||
*/
|
||||
public class SolrSmokeTest implements ServletContextListener {
|
||||
public class SolrSmokeTest {
|
||||
private static final Log log = LogFactory.getLog(SolrSmokeTest.class);
|
||||
|
||||
private final ServletContextListener listener;
|
||||
|
||||
/*
|
||||
* We don't want to treat socket timeout as a non-recoverable error like the
|
||||
* other exceptions. So pretend there's a status code for it instead.
|
||||
*/
|
||||
private static final int SOCKET_TIMEOUT_STATUS = -500;
|
||||
|
||||
@Override
|
||||
public void contextInitialized(ServletContextEvent sce) {
|
||||
public SolrSmokeTest(ServletContextListener listener) {
|
||||
this.listener = listener;
|
||||
}
|
||||
|
||||
public void doTest(ServletContextEvent sce) {
|
||||
final StartupStatus ss = StartupStatus.getBean(sce.getServletContext());
|
||||
|
||||
String solrUrlString = ConfigurationProperties.getBean(sce)
|
||||
.getProperty("vitro.local.solr.url", "");
|
||||
if (solrUrlString.isEmpty()) {
|
||||
ss.fatal(this, "Can't connect to Solr search engine. "
|
||||
ss.fatal(listener, "Can't connect to Solr search engine. "
|
||||
+ "runtime.properties must contain a value for "
|
||||
+ "vitro.local.solr.url");
|
||||
return;
|
||||
|
@ -62,27 +66,22 @@ public class SolrSmokeTest implements ServletContextListener {
|
|||
try {
|
||||
solrUrl = new URL(solrUrlString);
|
||||
} catch (MalformedURLException e) {
|
||||
ss.fatal(this, "Can't connect to Solr search engine. "
|
||||
ss.fatal(listener, "Can't connect to Solr search engine. "
|
||||
+ "The value for vitro.local.solr.url "
|
||||
+ "in runtime.properties is not a valid URL: '"
|
||||
+ solrUrlString + "'", e);
|
||||
}
|
||||
|
||||
ss.info(this, "Starting thread for Solr test.");
|
||||
new SolrSmokeTestThread(this, solrUrl, ss).start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void contextDestroyed(ServletContextEvent sce) {
|
||||
// nothing to tear down.
|
||||
ss.info(listener, "Starting thread for Solr test.");
|
||||
new SolrSmokeTestThread(listener, solrUrl, ss).start();
|
||||
}
|
||||
|
||||
private static class SolrSmokeTestThread extends VitroBackgroundThread {
|
||||
private final SolrSmokeTest listener;
|
||||
private final ServletContextListener listener;
|
||||
private final URL solrUrl;
|
||||
private final StartupStatus ss;
|
||||
|
||||
public SolrSmokeTestThread(SolrSmokeTest listener, URL solrUrl,
|
||||
public SolrSmokeTestThread(ServletContextListener listener, URL solrUrl,
|
||||
StartupStatus ss) {
|
||||
super("SolrSmokeTest");
|
||||
this.listener = listener;
|
||||
|
@ -205,6 +204,8 @@ public class SolrSmokeTest implements ServletContextListener {
|
|||
*/
|
||||
private static class SolrHomePager {
|
||||
private static final long SLEEP_INTERVAL = 20000; // 20 seconds
|
||||
private static final long SLEEP_MAX = 300000; // maximum sleep time: 5 minutes
|
||||
private static long SLEEP_DURATION = 0; // how long have we been sleeping?
|
||||
|
||||
private final URL solrUrl;
|
||||
private final HttpClient httpClient = HttpClientFactory.getHttpClient();
|
||||
|
@ -218,12 +219,7 @@ public class SolrSmokeTest implements ServletContextListener {
|
|||
public void connect() throws SolrProblemException {
|
||||
tryToConnect();
|
||||
|
||||
if (!isDone()) {
|
||||
sleep();
|
||||
tryToConnect();
|
||||
}
|
||||
|
||||
if (!isDone()) {
|
||||
while (!isDone() && SLEEP_DURATION < SLEEP_MAX) {
|
||||
sleep();
|
||||
tryToConnect();
|
||||
}
|
||||
|
@ -234,17 +230,19 @@ public class SolrSmokeTest implements ServletContextListener {
|
|||
}
|
||||
|
||||
private void tryToConnect() throws SolrProblemException {
|
||||
SolrSmokeTest.log.debug("Trying to connect to Solr, wait up to " + SLEEP_MAX / 60000 + " minutes - " +
|
||||
(int)(SLEEP_DURATION * 100.0 / SLEEP_MAX) + "%");
|
||||
|
||||
try {
|
||||
HttpGet method = new HttpGet(solrUrl.toExternalForm());
|
||||
SolrSmokeTest.log.debug("Trying to connect to Solr");
|
||||
HttpResponse response = httpClient.execute(method);
|
||||
HttpGet method = new HttpGet(solrUrl.toExternalForm() + "/select");
|
||||
HttpResponse response = httpClient.execute(method);
|
||||
try {
|
||||
statusCode = response.getStatusLine().getStatusCode();
|
||||
SolrSmokeTest.log.debug("HTTP status was " + statusCode);
|
||||
} finally {
|
||||
EntityUtils.consume(response.getEntity());
|
||||
}
|
||||
} catch (SocketTimeoutException e) {
|
||||
} catch (IOException e) {
|
||||
// Catch the exception so we can retry this.
|
||||
// Save the status so we know why we failed.
|
||||
statusCode = SolrSmokeTest.SOCKET_TIMEOUT_STATUS;
|
||||
|
@ -264,6 +262,7 @@ public class SolrSmokeTest implements ServletContextListener {
|
|||
|
||||
private void sleep() {
|
||||
try {
|
||||
SLEEP_DURATION += SLEEP_INTERVAL;
|
||||
Thread.sleep(SLEEP_INTERVAL);
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace(); // Should never happen
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue