initial files for ElasticSearch: ticket vivo-1587 (#85)
* initial files from ticket vivo-1587 * Add https://mvnrepository.com/artifact/org.apache.httpcomponents/fluent-hc/4.5.6 dependency * Resolves: https://jira.duraspace.org/browse/VIVO-1587
This commit is contained in:
parent
734b9ccf68
commit
40f78e58a8
12 changed files with 1275 additions and 0 deletions
|
@ -58,6 +58,11 @@
|
||||||
<artifactId>argon2-jvm</artifactId>
|
<artifactId>argon2-jvm</artifactId>
|
||||||
<version>2.4</version>
|
<version>2.4</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.httpcomponents</groupId>
|
||||||
|
<artifactId>fluent-hc</artifactId>
|
||||||
|
<version>4.5.6</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.vivoweb</groupId>
|
<groupId>org.vivoweb</groupId>
|
||||||
<artifactId>vitro-dependencies</artifactId>
|
<artifactId>vitro-dependencies</artifactId>
|
||||||
|
|
|
@ -0,0 +1,92 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.http.client.fluent.Request;
|
||||||
|
import org.apache.http.client.fluent.Response;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The nuts and bolts of adding a document to the Elasticsearch index
|
||||||
|
*/
|
||||||
|
public class ESAdder {
|
||||||
|
private static final Log log = LogFactory.getLog(ESAdder.class);
|
||||||
|
|
||||||
|
private final String baseUrl;
|
||||||
|
|
||||||
|
public ESAdder(String baseUrl) {
|
||||||
|
this.baseUrl = baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void add(Collection<SearchInputDocument> docs)
|
||||||
|
throws SearchEngineException {
|
||||||
|
for (SearchInputDocument doc : docs) {
|
||||||
|
addDocument(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addDocument(SearchInputDocument doc)
|
||||||
|
throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
Map<String, List<Object>> map = convertDocToMap(doc);
|
||||||
|
String json = new ObjectMapper().writeValueAsString(map);
|
||||||
|
log.debug("Adding document for '" + doc.getField("DocId") + "': "
|
||||||
|
+ json);
|
||||||
|
|
||||||
|
putToElastic(json, (String) doc.getField("DocId").getFirstValue());
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new SearchEngineException("Failed to convert to JSON", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Some field values are collections. Add the members of the collection
|
||||||
|
* instead.
|
||||||
|
*/
|
||||||
|
private Map<String, List<Object>> convertDocToMap(SearchInputDocument doc) {
|
||||||
|
Map<String, List<Object>> map = new HashMap<>();
|
||||||
|
for (SearchInputField field : doc.getFieldMap().values()) {
|
||||||
|
ArrayList<Object> list = new ArrayList<>();
|
||||||
|
for (Object value : field.getValues()) {
|
||||||
|
if (value instanceof Collection) {
|
||||||
|
Collection<?> cValue = (Collection<?>) value;
|
||||||
|
list.addAll(cValue);
|
||||||
|
} else {
|
||||||
|
list.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
map.put(field.getName(), list);
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void putToElastic(String json, String docId)
|
||||||
|
throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
String url = baseUrl + "/_doc/"
|
||||||
|
+ URLEncoder.encode(docId, "UTF8");
|
||||||
|
Response response = Request.Put(url)
|
||||||
|
.bodyString(json, ContentType.APPLICATION_JSON).execute();
|
||||||
|
log.debug("Response from Elasticsearch: "
|
||||||
|
+ response.returnContent().asString());
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,42 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.http.client.fluent.Request;
|
||||||
|
import org.apache.http.client.fluent.Response;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The nuts and bolts of getting the number of documents in the Elasticsearch
|
||||||
|
* index.
|
||||||
|
*/
|
||||||
|
public class ESCounter {
|
||||||
|
private final String baseUrl;
|
||||||
|
|
||||||
|
public ESCounter(String baseUrl) {
|
||||||
|
this.baseUrl = baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int count() throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
String url = baseUrl + "/_doc/_count";
|
||||||
|
Response response = Request.Get(url).execute();
|
||||||
|
String json = response.returnContent().asString();
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<String, Object> map = new ObjectMapper().readValue(json,
|
||||||
|
HashMap.class);
|
||||||
|
return (Integer) map.get("count");
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,147 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URLEncoder;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.http.Header;
|
||||||
|
import org.apache.http.HttpEntity;
|
||||||
|
import org.apache.http.StatusLine;
|
||||||
|
import org.apache.http.client.HttpResponseException;
|
||||||
|
import org.apache.http.client.ResponseHandler;
|
||||||
|
import org.apache.http.client.fluent.Request;
|
||||||
|
import org.apache.http.client.fluent.Response;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
import org.apache.http.util.EntityUtils;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The nuts and bolts of deleting documents from the Elasticsearch index.
|
||||||
|
*/
|
||||||
|
public class ESDeleter {
|
||||||
|
private static final Log log = LogFactory.getLog(ESDeleter.class);
|
||||||
|
|
||||||
|
private final String baseUrl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param baseUrl
|
||||||
|
*/
|
||||||
|
public ESDeleter(String baseUrl) {
|
||||||
|
this.baseUrl = baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteByIds(Collection<String> ids)
|
||||||
|
throws SearchEngineException {
|
||||||
|
for (String id : ids) {
|
||||||
|
deleteById(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteById(String id) throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
String url = baseUrl + "/_doc/"
|
||||||
|
+ URLEncoder.encode(id, "UTF8");
|
||||||
|
Response response = Request.Delete(url).execute();
|
||||||
|
String json = response.returnContent().asString();
|
||||||
|
} catch (HttpResponseException e) {
|
||||||
|
if (e.getStatusCode() == 404) {
|
||||||
|
// Don't care if it has already been deleted.
|
||||||
|
} else {
|
||||||
|
throw new SearchEngineException(
|
||||||
|
"Failed to delete Elasticsearch document " + id, e);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new SearchEngineException(
|
||||||
|
"Failed to delete Elasticsearch document " + id, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteByQuery(String queryString) throws SearchEngineException {
|
||||||
|
String url = baseUrl + "/_delete_by_query";
|
||||||
|
SearchQuery query = new BaseSearchQuery().setQuery(queryString);
|
||||||
|
String queryJson = new QueryConverter(query).asString();
|
||||||
|
|
||||||
|
try {
|
||||||
|
Response response = Request.Post(url)
|
||||||
|
.bodyString(queryJson, ContentType.APPLICATION_JSON)
|
||||||
|
.execute();
|
||||||
|
|
||||||
|
BaseResponseHandler handler = new BaseResponseHandler();
|
||||||
|
response.handleResponse(handler);
|
||||||
|
if (handler.getStatusCode() >= 400) {
|
||||||
|
log.warn(String.format(
|
||||||
|
"Failed to delete Elasticsearch documents by query: %s, %d - %s\n%s",
|
||||||
|
queryString, handler.getStatusCode(),
|
||||||
|
handler.getReasonPhrase(), handler.getContentString()));
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SearchEngineException("Failed to delete Elasticsearch "
|
||||||
|
+ "documents by query " + queryString, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Helper class for interpreting HttpResponse errors
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
private class BaseResponseHandler implements ResponseHandler<Object> {
|
||||||
|
private int statusCode;
|
||||||
|
private String reasonPhrase;
|
||||||
|
private Map<String, List<String>> headers;
|
||||||
|
private String contentString;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object handleResponse(org.apache.http.HttpResponse innerResponse)
|
||||||
|
throws IOException {
|
||||||
|
StatusLine statusLine = innerResponse.getStatusLine();
|
||||||
|
statusCode = statusLine.getStatusCode();
|
||||||
|
reasonPhrase = statusLine.getReasonPhrase();
|
||||||
|
|
||||||
|
headers = new HashMap<>();
|
||||||
|
for (Header header : innerResponse.getAllHeaders()) {
|
||||||
|
String name = header.getName();
|
||||||
|
if (!headers.containsKey(name)) {
|
||||||
|
headers.put(name, new ArrayList<String>());
|
||||||
|
}
|
||||||
|
headers.get(name).add(header.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
HttpEntity entity = innerResponse.getEntity();
|
||||||
|
if (entity == null) {
|
||||||
|
contentString = "";
|
||||||
|
} else {
|
||||||
|
contentString = EntityUtils.toString(entity);
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getStatusCode() {
|
||||||
|
return statusCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getReasonPhrase() {
|
||||||
|
return reasonPhrase;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, List<String>> getHeaders() {
|
||||||
|
return headers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getContentString() {
|
||||||
|
return contentString;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,41 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.http.client.fluent.Request;
|
||||||
|
import org.apache.http.client.fluent.Response;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Just does a "commit" or "flush" to the index.
|
||||||
|
*/
|
||||||
|
public class ESFlusher {
|
||||||
|
private static final Log log = LogFactory.getLog(ESFlusher.class);
|
||||||
|
|
||||||
|
private final String baseUrl;
|
||||||
|
|
||||||
|
public ESFlusher(String baseUrl) {
|
||||||
|
this.baseUrl = baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void flush() throws SearchEngineException {
|
||||||
|
flush(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void flush(boolean wait) throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
String url = baseUrl + "/_flush"
|
||||||
|
+ (wait ? "?wait_for_ongoing" : "");
|
||||||
|
Response response = Request.Get(url).execute();
|
||||||
|
String json = response.returnContent().asString();
|
||||||
|
log.debug("flush response: " + json);
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,106 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
|
||||||
|
import org.apache.commons.io.IOUtils;
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
import org.apache.http.HttpResponse;
|
||||||
|
import org.apache.http.client.methods.HttpEntityEnclosingRequestBase;
|
||||||
|
import org.apache.http.entity.ContentType;
|
||||||
|
import org.apache.http.entity.StringEntity;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a SearchQuery to JSON, send it to Elasticsearch, and convert the JSON
|
||||||
|
* response to a SearchResponse.
|
||||||
|
*/
|
||||||
|
public class ESQuery {
|
||||||
|
private static final Log log = LogFactory.getLog(ESQuery.class);
|
||||||
|
|
||||||
|
private final String baseUrl;
|
||||||
|
|
||||||
|
public ESQuery(String baseUrl) {
|
||||||
|
this.baseUrl = baseUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchResponse query(SearchQuery query)
|
||||||
|
throws SearchEngineException {
|
||||||
|
String queryString = new QueryConverter(query).asString();
|
||||||
|
String response = doTheQuery(queryString);
|
||||||
|
return new ResponseParser(response).parse();
|
||||||
|
}
|
||||||
|
|
||||||
|
private String doTheQuery(String queryString) {
|
||||||
|
log.debug("QUERY: " + queryString);
|
||||||
|
try {
|
||||||
|
String url = baseUrl + "/_search";
|
||||||
|
HttpResponse response = new ESFunkyGetRequest(url)
|
||||||
|
.bodyString(queryString, ContentType.APPLICATION_JSON)
|
||||||
|
.execute();
|
||||||
|
String responseString = IOUtils
|
||||||
|
.toString(response.getEntity().getContent());
|
||||||
|
log.debug("RESPONSE: " + responseString);
|
||||||
|
return responseString;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("Failed to put to Elasticsearch", e);
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Helper class -- a GET request that accepts a body
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The HttpClient implementations, both regular and conversational, do not
|
||||||
|
* allow you to put a body on a GET request. In online discussion, some say
|
||||||
|
* that the HTTP spec is ambiguous on this point, so each implementation
|
||||||
|
* makes its own choice. For example, CURL allows it.
|
||||||
|
*
|
||||||
|
* More to the point however, is that ElasticSearch requires it. So here's a
|
||||||
|
* simple class to make that possible.
|
||||||
|
*
|
||||||
|
* USE POST INSTEAD!!
|
||||||
|
*/
|
||||||
|
private static class ESFunkyGetRequest
|
||||||
|
extends HttpEntityEnclosingRequestBase {
|
||||||
|
public ESFunkyGetRequest(String url) throws SearchEngineException {
|
||||||
|
super();
|
||||||
|
try {
|
||||||
|
setURI(new URI(url));
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new SearchEngineException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ESFunkyGetRequest bodyString(String contents,
|
||||||
|
ContentType contentType) {
|
||||||
|
setEntity(new StringEntity(contents, contentType));
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HttpResponse execute() throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
return HttpClientFactory.getHttpClient().execute(this);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SearchEngineException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getMethod() {
|
||||||
|
return "GET";
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,142 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.Application;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A first draft of an Elasticsearch implementation.
|
||||||
|
*/
|
||||||
|
public class ElasticSearchEngine implements SearchEngine {
|
||||||
|
private static final Log log = LogFactory.getLog(ElasticSearchEngine.class);
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// Configuration
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
private String baseUrl;
|
||||||
|
|
||||||
|
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBaseUrl")
|
||||||
|
public void setBaseUrl(String url) {
|
||||||
|
if (baseUrl == null) {
|
||||||
|
if (url.endsWith("/")) {
|
||||||
|
url = url.substring(0, url.length() - 1);
|
||||||
|
}
|
||||||
|
baseUrl = url;
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Configuration includes multiple base URLs: " + url
|
||||||
|
+ ", and " + baseUrl);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Validation
|
||||||
|
public void validate() throws Exception {
|
||||||
|
if (baseUrl == null) {
|
||||||
|
throw new IllegalStateException(
|
||||||
|
"Configuration did not include a base URL.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
// The instance
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void startup(Application application, ComponentStartupStatus ss) {
|
||||||
|
log.warn("ElasticSearchEngine.startup() not implemented."); // TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void shutdown(Application application) {
|
||||||
|
// TODO Flush the buffers
|
||||||
|
log.warn("ElasticSearchEngine.shutdown not implemented.");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void ping() throws SearchEngineException {
|
||||||
|
// TODO What's the simplest we can do? Another smoke test?
|
||||||
|
log.warn("ElasticSearchEngine.ping() not implemented."); // TODO
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchInputDocument createInputDocument() {
|
||||||
|
return new BaseSearchInputDocument();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(SearchInputDocument... docs) throws SearchEngineException {
|
||||||
|
add(Arrays.asList(docs));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(Collection<SearchInputDocument> docs)
|
||||||
|
throws SearchEngineException {
|
||||||
|
new ESAdder(baseUrl).add(docs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void commit() throws SearchEngineException {
|
||||||
|
new ESFlusher(baseUrl).flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void commit(boolean wait) throws SearchEngineException {
|
||||||
|
new ESFlusher(baseUrl).flush(wait);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteById(String... ids) throws SearchEngineException {
|
||||||
|
deleteById(Arrays.asList(ids));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteById(Collection<String> ids)
|
||||||
|
throws SearchEngineException {
|
||||||
|
new ESDeleter(baseUrl).deleteByIds(ids);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteByQuery(String query) throws SearchEngineException {
|
||||||
|
new ESDeleter(baseUrl).deleteByQuery(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchQuery createQuery() {
|
||||||
|
return new BaseSearchQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchQuery createQuery(String queryText) {
|
||||||
|
BaseSearchQuery query = new BaseSearchQuery();
|
||||||
|
query.setQuery(queryText);
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchResponse query(SearchQuery query)
|
||||||
|
throws SearchEngineException {
|
||||||
|
return new ESQuery(baseUrl).query(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int documentCount() throws SearchEngineException {
|
||||||
|
return new ESCounter(baseUrl).count();
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,53 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple implementation. In fact, this is so simple that perhaps it should be
|
||||||
|
* named BaseSearchResultDocumentList.
|
||||||
|
*/
|
||||||
|
class ElasticSearchResultDocumentList implements SearchResultDocumentList {
|
||||||
|
private final List<SearchResultDocument> documents;
|
||||||
|
private final long numberFound;
|
||||||
|
|
||||||
|
public ElasticSearchResultDocumentList(List<SearchResultDocument> documents,
|
||||||
|
long numberFound) {
|
||||||
|
this.documents = documents;
|
||||||
|
this.numberFound = numberFound;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<SearchResultDocument> iterator() {
|
||||||
|
return documents.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getNumFound() {
|
||||||
|
return documents.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int size() {
|
||||||
|
return documents.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public SearchResultDocument get(int i) {
|
||||||
|
return documents.get(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format(
|
||||||
|
"ElasticSearchResultDocumentList[numberFound=%s, documents=%s]",
|
||||||
|
numberFound, documents);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,216 @@
|
||||||
|
# What is this package?
|
||||||
|
* The first draft of a Elasticsearch driver for VIVO
|
||||||
|
|
||||||
|
# What has been done?
|
||||||
|
* Implement the `SearchEngine` interface
|
||||||
|
* Classes in `edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch`
|
||||||
|
* No attempt to add new functions.
|
||||||
|
|
||||||
|
# How to experiment with it?
|
||||||
|
* Install elasticsearch somewhere.
|
||||||
|
* Create a search index with the appropriate mapping (see below).
|
||||||
|
* Check out VIVO and this branch of Vitro (see below), and do the usual installation procedure.
|
||||||
|
* Modify `{vitro_home}/config/applicationSetup.n3` to use this driver (see below).
|
||||||
|
* Start elasticsearch
|
||||||
|
* Start VIVO
|
||||||
|
|
||||||
|
# Not ready for production
|
||||||
|
* Documentation
|
||||||
|
* Instructions on how to install and configure the driver.
|
||||||
|
* Instructions on how to setup elasticsearch?
|
||||||
|
* Smoke test
|
||||||
|
* Display a warning if the elasticsearch server is not responding.
|
||||||
|
* Functional testing
|
||||||
|
* Are we getting the proper search results?
|
||||||
|
* Are search results in the order that we would like?
|
||||||
|
* Improved snippets
|
||||||
|
* Copy the technique used for Solr
|
||||||
|
* Code improvement
|
||||||
|
* Rigorous closing of HTTP connections.
|
||||||
|
* IOC for HTTP code, to help in unit testing
|
||||||
|
* Consistent use of exceptions and logging
|
||||||
|
* Unit tests
|
||||||
|
* Automatic initialization of the index
|
||||||
|
* If VIVO detects an empty index, apply the mapping.
|
||||||
|
|
||||||
|
# The next steps: adding functionality
|
||||||
|
|
||||||
|
## Stay within the framework
|
||||||
|
* Add fields that enhance the contents of the search index documents (see below).
|
||||||
|
* Add data distributors that run queries and format the output (see below).
|
||||||
|
|
||||||
|
## Go outside the framework
|
||||||
|
* Add functions to the Elasticsearch driver that the Solr driver will simply ignore.
|
||||||
|
* Or remove Solr entirely
|
||||||
|
* Query Elasticsearch directly
|
||||||
|
* Or write a data distributor that will run the query
|
||||||
|
|
||||||
|
# The details:
|
||||||
|
|
||||||
|
## Check out VIVO and Vitro
|
||||||
|
* For now, the Elasticsearch driver only lives in my fork of Vitro
|
||||||
|
* No changes to VIVO are required (yet).
|
||||||
|
|
||||||
|
```
|
||||||
|
git clone https://github.com/vivo-project/VIVO.git
|
||||||
|
git clone -b feature/elasticsearchExperiments https://github.com/j2blake/Vitro.git
|
||||||
|
```
|
||||||
|
|
||||||
|
## A mapping for the search index
|
||||||
|
* If the index uses the default mapping, it will not work correctly.
|
||||||
|
* Some fields must be declared as `keyword`, some as unstemmed, etc.
|
||||||
|
|
||||||
|
* Example mapping script:
|
||||||
|
|
||||||
|
```
|
||||||
|
curl -X PUT "localhost:9200/vivo?pretty" -H 'Content-Type: application/json' -d'
|
||||||
|
{
|
||||||
|
"mappings": {
|
||||||
|
"_doc": {
|
||||||
|
"properties": {
|
||||||
|
"ALLTEXT": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "english"
|
||||||
|
},
|
||||||
|
"ALLTEXTUNSTEMMED": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "standard"
|
||||||
|
},
|
||||||
|
"DocId": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"classgroup": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"type": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"mostSpecificTypeURIs": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"indexedTime": {
|
||||||
|
"type": "long"
|
||||||
|
},
|
||||||
|
"nameRaw": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"URI": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"THUMBNAIL": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"THUMBNAIL_URL": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
"nameLowercaseSingleValued": {
|
||||||
|
"type": "text",
|
||||||
|
"analyzer": "standard",
|
||||||
|
"fielddata": "true"
|
||||||
|
},
|
||||||
|
"BETA" : {
|
||||||
|
"type" : "float"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"query": {
|
||||||
|
"default_field": "ALLTEXT"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
'
|
||||||
|
```
|
||||||
|
* __*Note:*__ The first line of the script specifies the name of the index as `vivo`.
|
||||||
|
Any name may be used, but it must match the "base URL" that is specified in `applicationSetup.n3` (see below).
|
||||||
|
* __*Note:*__ The same first line specifies the location and port number of the elasticsearch server.
|
||||||
|
Again, any location and port may be used, but they must match the "base URL" in `applicationSetup.n3`.
|
||||||
|
|
||||||
|
## Modify `applicationSetup.n3`
|
||||||
|
* Change this:
|
||||||
|
|
||||||
|
```
|
||||||
|
# ----------------------------
|
||||||
|
#
|
||||||
|
# Search engine module:
|
||||||
|
# The Solr-based implementation is the only standard option, but it can be
|
||||||
|
# wrapped in an "instrumented" wrapper, which provides additional logging
|
||||||
|
# and more rigorous life-cycle checking.
|
||||||
|
#
|
||||||
|
|
||||||
|
:instrumentedSearchEngineWrapper
|
||||||
|
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
|
||||||
|
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||||
|
:wraps :solrSearchEngine .
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
* To this:
|
||||||
|
|
||||||
|
```
|
||||||
|
# ----------------------------
|
||||||
|
#
|
||||||
|
# Search engine module:
|
||||||
|
# The Solr-based implementation is the only standard option, but it can be
|
||||||
|
# wrapped in an "instrumented" wrapper, which provides additional logging
|
||||||
|
# and more rigorous life-cycle checking.
|
||||||
|
#
|
||||||
|
|
||||||
|
:instrumentedSearchEngineWrapper
|
||||||
|
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
|
||||||
|
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||||
|
:wraps :elasticSearchEngine .
|
||||||
|
|
||||||
|
:elasticSearchEngine
|
||||||
|
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.ElasticSearchEngine> ,
|
||||||
|
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||||
|
:hasBaseUrl "http://localhost:9200/vivo" .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Enhance the contents of the search index
|
||||||
|
### An example: Publication URIs in the author's search document
|
||||||
|
* Add a keyword field to the search index
|
||||||
|
|
||||||
|
```
|
||||||
|
"publicationURI": {
|
||||||
|
"type": "keyword"
|
||||||
|
},
|
||||||
|
```
|
||||||
|
|
||||||
|
* Add a `DocumentModifier` to VIVO.
|
||||||
|
|
||||||
|
```
|
||||||
|
:documentModifier_publications
|
||||||
|
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SelectQueryDocumentModifier> ,
|
||||||
|
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
|
||||||
|
rdfs:label "URIs of publications are added to publicationURI field." ;
|
||||||
|
:hasTargetField "publicationURI" ;
|
||||||
|
:hasSelectQuery """
|
||||||
|
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||||
|
PREFIX vivo: <http://vivoweb.org/ontology/core#>
|
||||||
|
PREFIX bibo: <http://purl.org/ontology/bibo/>
|
||||||
|
SELECT ?publication
|
||||||
|
WHERE {
|
||||||
|
?uri vivo:relatedBy ?authorship .
|
||||||
|
?authorship a vivo:Authorship .
|
||||||
|
?authorship vivo:relates ?publication .
|
||||||
|
?publication a bibo:Document .
|
||||||
|
}
|
||||||
|
""" .
|
||||||
|
```
|
||||||
|
|
||||||
|
## Use data distributors to query the search index
|
||||||
|
* Install the Data Distribution API
|
||||||
|
* Add a distributor:
|
||||||
|
|
||||||
|
```
|
||||||
|
:drill_by_URI
|
||||||
|
a <java:edu.cornell.library.scholars.webapp.controller.api.distribute.DataDistributor> ,
|
||||||
|
<java:edu.cornell.library.scholars.webapp.controller.api.distribute.search.DrillDownSearchByUriDataDistributor> ;
|
||||||
|
:actionName "searchAndDrill" .
|
||||||
|
```
|
||||||
|
|
||||||
|
* Run the query:
|
||||||
|
|
||||||
|
```
|
||||||
|
http://localhost:8080/vivo/api/dataRequest/searchAndDrill?uri=http://scholars.cornell.edu/individual/mj495
|
||||||
|
```
|
|
@ -0,0 +1,77 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build a Map of Objects, suitable for marshalling by Jackson.
|
||||||
|
*
|
||||||
|
* Include conditional logic, so null values, empty maps, or empty lists will
|
||||||
|
* not be added, unless you use the special values.
|
||||||
|
*/
|
||||||
|
public class JsonTree {
|
||||||
|
/**
|
||||||
|
* Empty maps will not be added, except for this one.
|
||||||
|
*/
|
||||||
|
public static final Map<String, Object> EMPTY_JSON_MAP = Collections
|
||||||
|
.emptyMap();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Empty lists will not be added, except for this one.
|
||||||
|
*/
|
||||||
|
public static final List<Object> EMPTY_JSON_LIST = Collections.emptyList();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the tree
|
||||||
|
*/
|
||||||
|
public static JsonTree tree() {
|
||||||
|
return new JsonTree();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This will cause negative integers to be ignored.
|
||||||
|
*/
|
||||||
|
public static Integer ifPositive(int i) {
|
||||||
|
return (i > 0) ? i : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> map = new HashMap<>();
|
||||||
|
|
||||||
|
public JsonTree put(String key, Object value) {
|
||||||
|
if (isSignificant(value)) {
|
||||||
|
storeIt(key, value);
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isSignificant(Object value) {
|
||||||
|
if (value == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (value instanceof Map && ((Map) value).isEmpty()
|
||||||
|
&& value != EMPTY_JSON_MAP) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (value instanceof List && ((List) value).isEmpty()
|
||||||
|
&& value != EMPTY_JSON_LIST) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void storeIt(String key, Object value) {
|
||||||
|
if (value instanceof JsonTree) {
|
||||||
|
map.put(key, ((JsonTree) value).asMap());
|
||||||
|
} else {
|
||||||
|
map.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Object> asMap() {
|
||||||
|
return new HashMap<>(map);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,172 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.EMPTY_JSON_MAP;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.ifPositive;
|
||||||
|
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.tree;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery.Order;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Accept a SearchQuery and make it available as a JSON string, suitable for
|
||||||
|
* Elasticsearch.
|
||||||
|
*/
|
||||||
|
public class QueryConverter {
|
||||||
|
private static final Log log = LogFactory.getLog(QueryConverter.class);
|
||||||
|
|
||||||
|
private final SearchQuery query;
|
||||||
|
private final Map<String, Object> queryAndFilters;
|
||||||
|
private final Map<String, Object> sortFields;
|
||||||
|
private final Map<String, Object> facets;
|
||||||
|
private final Map<String, Object> highlighter;
|
||||||
|
private final List<String> returnFields;
|
||||||
|
private final Map<String, Object> fullMap;
|
||||||
|
|
||||||
|
public QueryConverter(SearchQuery query) {
|
||||||
|
this.query = query;
|
||||||
|
this.queryAndFilters = filteredOrNot();
|
||||||
|
this.sortFields = figureSortFields();
|
||||||
|
this.facets = figureFacets();
|
||||||
|
this.highlighter = figureHighlighter();
|
||||||
|
this.returnFields = figureReturnFields();
|
||||||
|
|
||||||
|
this.fullMap = figureFullMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> filteredOrNot() {
|
||||||
|
if (query.getFilters().isEmpty()) {
|
||||||
|
return new QueryStringMap(query.getQuery()).map;
|
||||||
|
} else {
|
||||||
|
return buildFilterStructure();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> buildFilterStructure() {
|
||||||
|
return tree() //
|
||||||
|
.put("bool", tree() //
|
||||||
|
.put("must", new QueryStringMap(query.getQuery()).map) //
|
||||||
|
.put("filter", buildFiltersList())) //
|
||||||
|
.asMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Map<String, Object>> buildFiltersList() {
|
||||||
|
List<Map<String, Object>> list = new ArrayList<>();
|
||||||
|
for (String filter : query.getFilters()) {
|
||||||
|
list.add(new QueryStringMap(filter).map);
|
||||||
|
}
|
||||||
|
return list;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> figureSortFields() {
|
||||||
|
Map<String, Order> fields = query.getSortFields();
|
||||||
|
Map<String, Object> map = new HashMap<>();
|
||||||
|
for (String name : fields.keySet()) {
|
||||||
|
String sortOrder = fields.get(name).toString().toLowerCase();
|
||||||
|
map.put(name, sortOrder);
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> figureFacets() {
|
||||||
|
Map<String, Object> map = new HashMap<>();
|
||||||
|
for (String field : query.getFacetFields()) {
|
||||||
|
map.put("facet_" + field, figureFacet(field));
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> figureHighlighter() {
|
||||||
|
return tree() //
|
||||||
|
.put("fields", tree() //
|
||||||
|
.put("ALLTEXT", EMPTY_JSON_MAP))
|
||||||
|
.asMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> figureFacet(String field) {
|
||||||
|
return tree() //
|
||||||
|
.put("terms", tree() //
|
||||||
|
.put("field", field) //
|
||||||
|
.put("size", ifPositive(query.getFacetLimit())) //
|
||||||
|
.put("min_doc_count",
|
||||||
|
ifPositive(query.getFacetMinCount()))) //
|
||||||
|
.asMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<String> figureReturnFields() {
|
||||||
|
return new ArrayList<>(query.getFieldsToReturn());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> figureFullMap() {
|
||||||
|
return tree() //
|
||||||
|
.put("query", queryAndFilters) //
|
||||||
|
.put("from", ifPositive(query.getStart())) //
|
||||||
|
.put("highlight", highlighter)
|
||||||
|
.put("size", ifPositive(query.getRows())) //
|
||||||
|
.put("sort", sortFields) //
|
||||||
|
.put("_source", returnFields) //
|
||||||
|
.put("aggregations", facets) //
|
||||||
|
.asMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
public String asString() throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
return new ObjectMapper().writeValueAsString(fullMap);
|
||||||
|
} catch (JsonProcessingException e) {
|
||||||
|
throw new SearchEngineException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class QueryStringMap {
|
||||||
|
public final Map<String, Object> map;
|
||||||
|
|
||||||
|
public QueryStringMap(String queryString) {
|
||||||
|
map = new HashMap<>();
|
||||||
|
map.put("query_string", makeInnerMap(escape(queryString)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a kluge, but perhaps it will work for now.
|
||||||
|
*
|
||||||
|
* Apparently Solr is willing to put up with query strings that contain
|
||||||
|
* special characters in odd places, but Elasticsearch is not.
|
||||||
|
*
|
||||||
|
* So, a query string of "classgroup:http://this/that" must be escaped
|
||||||
|
* as "classgroup:http\:\/\/this\/that". Notice that the first colon
|
||||||
|
* delimits the field name, and so must not be escaped.
|
||||||
|
*
|
||||||
|
* But what if no field is specified? Then all colons must be escaped.
|
||||||
|
* How would we distinguish that?
|
||||||
|
*
|
||||||
|
* And what if the query is more complex, and more than one field is
|
||||||
|
* specified? What if other special characters are included?
|
||||||
|
*
|
||||||
|
* This could be a real problem.
|
||||||
|
*/
|
||||||
|
private String escape(String queryString) {
|
||||||
|
return queryString.replace(":", "\\:").replace("/", "\\/")
|
||||||
|
.replaceFirst("\\\\:", ":");
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, String> makeInnerMap(String queryString) {
|
||||||
|
Map<String, String> inner = new HashMap<>();
|
||||||
|
inner.put("default_field", "ALLTEXT");
|
||||||
|
inner.put("query", queryString);
|
||||||
|
return inner;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,182 @@
|
||||||
|
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||||
|
|
||||||
|
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField.Count;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField.BaseCount;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResponse;
|
||||||
|
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResultDocument;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Elastic search sends a JSON response to a query. parse it to a
|
||||||
|
* SearchResponse.
|
||||||
|
*/
|
||||||
|
class ResponseParser {
|
||||||
|
private static final Log log = LogFactory.getLog(ResponseParser.class);
|
||||||
|
|
||||||
|
private final Map<String, Object> responseMap;
|
||||||
|
|
||||||
|
private Map<String, Map<String, List<String>>> highlightingMap;
|
||||||
|
private Map<String, SearchFacetField> facetFieldsMap;
|
||||||
|
private long totalHits;
|
||||||
|
private List<SearchResultDocument> documentList;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public ResponseParser(String responseString) throws SearchEngineException {
|
||||||
|
try {
|
||||||
|
this.responseMap = new ObjectMapper().readValue(responseString,
|
||||||
|
HashMap.class);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SearchEngineException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public SearchResponse parse() {
|
||||||
|
parseDocumentList();
|
||||||
|
parseFacetFields();
|
||||||
|
SearchResponse response = new BaseSearchResponse(highlightingMap,
|
||||||
|
facetFieldsMap,
|
||||||
|
new ElasticSearchResultDocumentList(documentList, totalHits));
|
||||||
|
log.debug("ESQuery.ResponseParser.parse: " + response);
|
||||||
|
return response;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseFacetFields() {
|
||||||
|
facetFieldsMap = new HashMap<>();
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<String, Map<String, Object>> aggregations = (Map<String, Map<String, Object>>) responseMap
|
||||||
|
.get("aggregations");
|
||||||
|
if (aggregations == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (String key : aggregations.keySet()) {
|
||||||
|
if (key.startsWith("facet_")) {
|
||||||
|
String name = key.substring(6);
|
||||||
|
parseFacetField(name, aggregations.get(key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseFacetField(String name, Map<String, Object> facetMap) {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
List<Map<String, Object>> bucketsList = (List<Map<String, Object>>) facetMap
|
||||||
|
.get("buckets");
|
||||||
|
if (bucketsList == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Count> counts = new ArrayList<>();
|
||||||
|
for (Map<String, Object> bucket : bucketsList) {
|
||||||
|
counts.add(new BaseCount((String) bucket.get("key"),
|
||||||
|
(Integer) bucket.get("doc_count")));
|
||||||
|
}
|
||||||
|
|
||||||
|
facetFieldsMap.put(name, new BaseSearchFacetField(name, counts));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseDocumentList() {
|
||||||
|
documentList = new ArrayList<>();
|
||||||
|
highlightingMap = new HashMap<>();
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<String, Object> uberHits = (Map<String, Object>) responseMap
|
||||||
|
.get("hits");
|
||||||
|
if (uberHits == null) {
|
||||||
|
log.warn("Didn't find a 'hits' field " + "in the query response: "
|
||||||
|
+ responseMap);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer total = (Integer) uberHits.get("total");
|
||||||
|
if (total == null) {
|
||||||
|
log.warn("Didn't find a 'hits.total' field "
|
||||||
|
+ "in the query response: " + responseMap);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
List<Map<String, Object>> hits = (List<Map<String, Object>>) uberHits
|
||||||
|
.get("hits");
|
||||||
|
if (hits == null) {
|
||||||
|
log.warn("Didn't find a 'hits.hits' field "
|
||||||
|
+ "in the query response: " + responseMap);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
parseDocuments(hits);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseDocuments(List<Map<String, Object>> hits) {
|
||||||
|
for (Map<String, Object> hit : hits) {
|
||||||
|
SearchResultDocument doc = parseDocument(hit);
|
||||||
|
if (doc != null) {
|
||||||
|
documentList.add(doc);
|
||||||
|
|
||||||
|
Map<String, List<String>> highlight = parseHighlight(hit);
|
||||||
|
if (highlight != null) {
|
||||||
|
highlightingMap.put(doc.getUniqueId(), highlight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<String, Collection<Object>> sourceMap = (Map<String, Collection<Object>>) hitMap
|
||||||
|
.get("_source");
|
||||||
|
if (sourceMap == null) {
|
||||||
|
log.warn("Didn't find a '_source' field in the hit: " + hitMap);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String id = (String) hitMap.get("_id");
|
||||||
|
if (id == null) {
|
||||||
|
log.warn("Didn't find a '_id' field in the hit: " + hitMap);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return new BaseSearchResultDocument(id, sourceMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, List<String>> parseHighlight(
|
||||||
|
Map<String, Object> hitMap) {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
Map<String, List<String>> highlightMap = (Map<String, List<String>>) hitMap
|
||||||
|
.get("highlight");
|
||||||
|
if (highlightMap == null) {
|
||||||
|
log.debug("Didn't find a 'highlight' field in the hit: " + hitMap);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
List<String> snippets = highlightMap.get("ALLTEXT");
|
||||||
|
if (snippets == null) {
|
||||||
|
log.warn("Didn't find a 'highlight.ALLTEXT' field in the hit: "
|
||||||
|
+ hitMap);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<String, List<String>> snippetMap = new HashMap<>();
|
||||||
|
snippetMap.put("ALLTEXT", snippets);
|
||||||
|
return snippetMap;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue