initial files for ElasticSearch: ticket vivo-1587 (#85)
* initial files from ticket vivo-1587 * Add https://mvnrepository.com/artifact/org.apache.httpcomponents/fluent-hc/4.5.6 dependency * Resolves: https://jira.duraspace.org/browse/VIVO-1587
This commit is contained in:
parent
b049aa6d32
commit
45ddb56294
12 changed files with 1275 additions and 0 deletions
|
@ -58,6 +58,11 @@
|
|||
<artifactId>argon2-jvm</artifactId>
|
||||
<version>2.4</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.httpcomponents</groupId>
|
||||
<artifactId>fluent-hc</artifactId>
|
||||
<version>4.5.6</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.vivoweb</groupId>
|
||||
<artifactId>vitro-dependencies</artifactId>
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
import org.apache.http.entity.ContentType;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
|
||||
|
||||
/**
|
||||
* The nuts and bolts of adding a document to the Elasticsearch index
|
||||
*/
|
||||
public class ESAdder {
|
||||
private static final Log log = LogFactory.getLog(ESAdder.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
public ESAdder(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public void add(Collection<SearchInputDocument> docs)
|
||||
throws SearchEngineException {
|
||||
for (SearchInputDocument doc : docs) {
|
||||
addDocument(doc);
|
||||
}
|
||||
}
|
||||
|
||||
private void addDocument(SearchInputDocument doc)
|
||||
throws SearchEngineException {
|
||||
try {
|
||||
Map<String, List<Object>> map = convertDocToMap(doc);
|
||||
String json = new ObjectMapper().writeValueAsString(map);
|
||||
log.debug("Adding document for '" + doc.getField("DocId") + "': "
|
||||
+ json);
|
||||
|
||||
putToElastic(json, (String) doc.getField("DocId").getFirstValue());
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to convert to JSON", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Some field values are collections. Add the members of the collection
|
||||
* instead.
|
||||
*/
|
||||
private Map<String, List<Object>> convertDocToMap(SearchInputDocument doc) {
|
||||
Map<String, List<Object>> map = new HashMap<>();
|
||||
for (SearchInputField field : doc.getFieldMap().values()) {
|
||||
ArrayList<Object> list = new ArrayList<>();
|
||||
for (Object value : field.getValues()) {
|
||||
if (value instanceof Collection) {
|
||||
Collection<?> cValue = (Collection<?>) value;
|
||||
list.addAll(cValue);
|
||||
} else {
|
||||
list.add(value);
|
||||
}
|
||||
}
|
||||
map.put(field.getName(), list);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private void putToElastic(String json, String docId)
|
||||
throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_doc/"
|
||||
+ URLEncoder.encode(docId, "UTF8");
|
||||
Response response = Request.Put(url)
|
||||
.bodyString(json, ContentType.APPLICATION_JSON).execute();
|
||||
log.debug("Response from Elasticsearch: "
|
||||
+ response.returnContent().asString());
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,42 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
|
||||
/**
|
||||
* The nuts and bolts of getting the number of documents in the Elasticsearch
|
||||
* index.
|
||||
*/
|
||||
public class ESCounter {
|
||||
private final String baseUrl;
|
||||
|
||||
public ESCounter(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public int count() throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_doc/_count";
|
||||
Response response = Request.Get(url).execute();
|
||||
String json = response.returnContent().asString();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> map = new ObjectMapper().readValue(json,
|
||||
HashMap.class);
|
||||
return (Integer) map.get("count");
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,147 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URLEncoder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.Header;
|
||||
import org.apache.http.HttpEntity;
|
||||
import org.apache.http.StatusLine;
|
||||
import org.apache.http.client.HttpResponseException;
|
||||
import org.apache.http.client.ResponseHandler;
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
import org.apache.http.entity.ContentType;
|
||||
import org.apache.http.util.EntityUtils;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
|
||||
|
||||
/**
|
||||
* The nuts and bolts of deleting documents from the Elasticsearch index.
|
||||
*/
|
||||
public class ESDeleter {
|
||||
private static final Log log = LogFactory.getLog(ESDeleter.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
/**
|
||||
* @param baseUrl
|
||||
*/
|
||||
public ESDeleter(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public void deleteByIds(Collection<String> ids)
|
||||
throws SearchEngineException {
|
||||
for (String id : ids) {
|
||||
deleteById(id);
|
||||
}
|
||||
}
|
||||
|
||||
private void deleteById(String id) throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_doc/"
|
||||
+ URLEncoder.encode(id, "UTF8");
|
||||
Response response = Request.Delete(url).execute();
|
||||
String json = response.returnContent().asString();
|
||||
} catch (HttpResponseException e) {
|
||||
if (e.getStatusCode() == 404) {
|
||||
// Don't care if it has already been deleted.
|
||||
} else {
|
||||
throw new SearchEngineException(
|
||||
"Failed to delete Elasticsearch document " + id, e);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException(
|
||||
"Failed to delete Elasticsearch document " + id, e);
|
||||
}
|
||||
}
|
||||
|
||||
public void deleteByQuery(String queryString) throws SearchEngineException {
|
||||
String url = baseUrl + "/_delete_by_query";
|
||||
SearchQuery query = new BaseSearchQuery().setQuery(queryString);
|
||||
String queryJson = new QueryConverter(query).asString();
|
||||
|
||||
try {
|
||||
Response response = Request.Post(url)
|
||||
.bodyString(queryJson, ContentType.APPLICATION_JSON)
|
||||
.execute();
|
||||
|
||||
BaseResponseHandler handler = new BaseResponseHandler();
|
||||
response.handleResponse(handler);
|
||||
if (handler.getStatusCode() >= 400) {
|
||||
log.warn(String.format(
|
||||
"Failed to delete Elasticsearch documents by query: %s, %d - %s\n%s",
|
||||
queryString, handler.getStatusCode(),
|
||||
handler.getReasonPhrase(), handler.getContentString()));
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new SearchEngineException("Failed to delete Elasticsearch "
|
||||
+ "documents by query " + queryString, e);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper class for interpreting HttpResponse errors
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private class BaseResponseHandler implements ResponseHandler<Object> {
|
||||
private int statusCode;
|
||||
private String reasonPhrase;
|
||||
private Map<String, List<String>> headers;
|
||||
private String contentString;
|
||||
|
||||
@Override
|
||||
public Object handleResponse(org.apache.http.HttpResponse innerResponse)
|
||||
throws IOException {
|
||||
StatusLine statusLine = innerResponse.getStatusLine();
|
||||
statusCode = statusLine.getStatusCode();
|
||||
reasonPhrase = statusLine.getReasonPhrase();
|
||||
|
||||
headers = new HashMap<>();
|
||||
for (Header header : innerResponse.getAllHeaders()) {
|
||||
String name = header.getName();
|
||||
if (!headers.containsKey(name)) {
|
||||
headers.put(name, new ArrayList<String>());
|
||||
}
|
||||
headers.get(name).add(header.getValue());
|
||||
}
|
||||
|
||||
HttpEntity entity = innerResponse.getEntity();
|
||||
if (entity == null) {
|
||||
contentString = "";
|
||||
} else {
|
||||
contentString = EntityUtils.toString(entity);
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public int getStatusCode() {
|
||||
return statusCode;
|
||||
}
|
||||
|
||||
public String getReasonPhrase() {
|
||||
return reasonPhrase;
|
||||
}
|
||||
|
||||
public Map<String, List<String>> getHeaders() {
|
||||
return headers;
|
||||
}
|
||||
|
||||
public String getContentString() {
|
||||
return contentString;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.client.fluent.Request;
|
||||
import org.apache.http.client.fluent.Response;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
|
||||
/**
|
||||
* Just does a "commit" or "flush" to the index.
|
||||
*/
|
||||
public class ESFlusher {
|
||||
private static final Log log = LogFactory.getLog(ESFlusher.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
public ESFlusher(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public void flush() throws SearchEngineException {
|
||||
flush(false);
|
||||
}
|
||||
|
||||
public void flush(boolean wait) throws SearchEngineException {
|
||||
try {
|
||||
String url = baseUrl + "/_flush"
|
||||
+ (wait ? "?wait_for_ongoing" : "");
|
||||
Response response = Request.Get(url).execute();
|
||||
String json = response.returnContent().asString();
|
||||
log.debug("flush response: " + json);
|
||||
} catch (Exception e) {
|
||||
throw new SearchEngineException("Failed to put to Elasticsearch",
|
||||
e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.http.HttpResponse;
|
||||
import org.apache.http.client.methods.HttpEntityEnclosingRequestBase;
|
||||
import org.apache.http.entity.ContentType;
|
||||
import org.apache.http.entity.StringEntity;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
|
||||
|
||||
/**
|
||||
* Convert a SearchQuery to JSON, send it to Elasticsearch, and convert the JSON
|
||||
* response to a SearchResponse.
|
||||
*/
|
||||
public class ESQuery {
|
||||
private static final Log log = LogFactory.getLog(ESQuery.class);
|
||||
|
||||
private final String baseUrl;
|
||||
|
||||
public ESQuery(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
public SearchResponse query(SearchQuery query)
|
||||
throws SearchEngineException {
|
||||
String queryString = new QueryConverter(query).asString();
|
||||
String response = doTheQuery(queryString);
|
||||
return new ResponseParser(response).parse();
|
||||
}
|
||||
|
||||
private String doTheQuery(String queryString) {
|
||||
log.debug("QUERY: " + queryString);
|
||||
try {
|
||||
String url = baseUrl + "/_search";
|
||||
HttpResponse response = new ESFunkyGetRequest(url)
|
||||
.bodyString(queryString, ContentType.APPLICATION_JSON)
|
||||
.execute();
|
||||
String responseString = IOUtils
|
||||
.toString(response.getEntity().getContent());
|
||||
log.debug("RESPONSE: " + responseString);
|
||||
return responseString;
|
||||
} catch (Exception e) {
|
||||
log.error("Failed to put to Elasticsearch", e);
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Helper class -- a GET request that accepts a body
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* The HttpClient implementations, both regular and conversational, do not
|
||||
* allow you to put a body on a GET request. In online discussion, some say
|
||||
* that the HTTP spec is ambiguous on this point, so each implementation
|
||||
* makes its own choice. For example, CURL allows it.
|
||||
*
|
||||
* More to the point however, is that ElasticSearch requires it. So here's a
|
||||
* simple class to make that possible.
|
||||
*
|
||||
* USE POST INSTEAD!!
|
||||
*/
|
||||
private static class ESFunkyGetRequest
|
||||
extends HttpEntityEnclosingRequestBase {
|
||||
public ESFunkyGetRequest(String url) throws SearchEngineException {
|
||||
super();
|
||||
try {
|
||||
setURI(new URI(url));
|
||||
} catch (URISyntaxException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public ESFunkyGetRequest bodyString(String contents,
|
||||
ContentType contentType) {
|
||||
setEntity(new StringEntity(contents, contentType));
|
||||
return this;
|
||||
}
|
||||
|
||||
public HttpResponse execute() throws SearchEngineException {
|
||||
try {
|
||||
return HttpClientFactory.getHttpClient().execute(this);
|
||||
} catch (IOException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getMethod() {
|
||||
return "GET";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,142 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.Application;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
|
||||
import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
|
||||
|
||||
/**
|
||||
* A first draft of an Elasticsearch implementation.
|
||||
*/
|
||||
public class ElasticSearchEngine implements SearchEngine {
|
||||
private static final Log log = LogFactory.getLog(ElasticSearchEngine.class);
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// Configuration
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private String baseUrl;
|
||||
|
||||
@Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBaseUrl")
|
||||
public void setBaseUrl(String url) {
|
||||
if (baseUrl == null) {
|
||||
if (url.endsWith("/")) {
|
||||
url = url.substring(0, url.length() - 1);
|
||||
}
|
||||
baseUrl = url;
|
||||
} else {
|
||||
throw new IllegalStateException(
|
||||
"Configuration includes multiple base URLs: " + url
|
||||
+ ", and " + baseUrl);
|
||||
}
|
||||
}
|
||||
|
||||
@Validation
|
||||
public void validate() throws Exception {
|
||||
if (baseUrl == null) {
|
||||
throw new IllegalStateException(
|
||||
"Configuration did not include a base URL.");
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
// The instance
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void startup(Application application, ComponentStartupStatus ss) {
|
||||
log.warn("ElasticSearchEngine.startup() not implemented."); // TODO
|
||||
}
|
||||
|
||||
@Override
|
||||
public void shutdown(Application application) {
|
||||
// TODO Flush the buffers
|
||||
log.warn("ElasticSearchEngine.shutdown not implemented.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void ping() throws SearchEngineException {
|
||||
// TODO What's the simplest we can do? Another smoke test?
|
||||
log.warn("ElasticSearchEngine.ping() not implemented."); // TODO
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchInputDocument createInputDocument() {
|
||||
return new BaseSearchInputDocument();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(SearchInputDocument... docs) throws SearchEngineException {
|
||||
add(Arrays.asList(docs));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(Collection<SearchInputDocument> docs)
|
||||
throws SearchEngineException {
|
||||
new ESAdder(baseUrl).add(docs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit() throws SearchEngineException {
|
||||
new ESFlusher(baseUrl).flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void commit(boolean wait) throws SearchEngineException {
|
||||
new ESFlusher(baseUrl).flush(wait);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteById(String... ids) throws SearchEngineException {
|
||||
deleteById(Arrays.asList(ids));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteById(Collection<String> ids)
|
||||
throws SearchEngineException {
|
||||
new ESDeleter(baseUrl).deleteByIds(ids);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteByQuery(String query) throws SearchEngineException {
|
||||
new ESDeleter(baseUrl).deleteByQuery(query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchQuery createQuery() {
|
||||
return new BaseSearchQuery();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchQuery createQuery(String queryText) {
|
||||
BaseSearchQuery query = new BaseSearchQuery();
|
||||
query.setQuery(queryText);
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchResponse query(SearchQuery query)
|
||||
throws SearchEngineException {
|
||||
return new ESQuery(baseUrl).query(query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int documentCount() throws SearchEngineException {
|
||||
return new ESCounter(baseUrl).count();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
|
||||
|
||||
/**
|
||||
* A simple implementation. In fact, this is so simple that perhaps it should be
|
||||
* named BaseSearchResultDocumentList.
|
||||
*/
|
||||
class ElasticSearchResultDocumentList implements SearchResultDocumentList {
|
||||
private final List<SearchResultDocument> documents;
|
||||
private final long numberFound;
|
||||
|
||||
public ElasticSearchResultDocumentList(List<SearchResultDocument> documents,
|
||||
long numberFound) {
|
||||
this.documents = documents;
|
||||
this.numberFound = numberFound;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<SearchResultDocument> iterator() {
|
||||
return documents.iterator();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getNumFound() {
|
||||
return documents.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return documents.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
public SearchResultDocument get(int i) {
|
||||
return documents.get(i);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format(
|
||||
"ElasticSearchResultDocumentList[numberFound=%s, documents=%s]",
|
||||
numberFound, documents);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,216 @@
|
|||
# What is this package?
|
||||
* The first draft of a Elasticsearch driver for VIVO
|
||||
|
||||
# What has been done?
|
||||
* Implement the `SearchEngine` interface
|
||||
* Classes in `edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch`
|
||||
* No attempt to add new functions.
|
||||
|
||||
# How to experiment with it?
|
||||
* Install elasticsearch somewhere.
|
||||
* Create a search index with the appropriate mapping (see below).
|
||||
* Check out VIVO and this branch of Vitro (see below), and do the usual installation procedure.
|
||||
* Modify `{vitro_home}/config/applicationSetup.n3` to use this driver (see below).
|
||||
* Start elasticsearch
|
||||
* Start VIVO
|
||||
|
||||
# Not ready for production
|
||||
* Documentation
|
||||
* Instructions on how to install and configure the driver.
|
||||
* Instructions on how to setup elasticsearch?
|
||||
* Smoke test
|
||||
* Display a warning if the elasticsearch server is not responding.
|
||||
* Functional testing
|
||||
* Are we getting the proper search results?
|
||||
* Are search results in the order that we would like?
|
||||
* Improved snippets
|
||||
* Copy the technique used for Solr
|
||||
* Code improvement
|
||||
* Rigorous closing of HTTP connections.
|
||||
* IOC for HTTP code, to help in unit testing
|
||||
* Consistent use of exceptions and logging
|
||||
* Unit tests
|
||||
* Automatic initialization of the index
|
||||
* If VIVO detects an empty index, apply the mapping.
|
||||
|
||||
# The next steps: adding functionality
|
||||
|
||||
## Stay within the framework
|
||||
* Add fields that enhance the contents of the search index documents (see below).
|
||||
* Add data distributors that run queries and format the output (see below).
|
||||
|
||||
## Go outside the framework
|
||||
* Add functions to the Elasticsearch driver that the Solr driver will simply ignore.
|
||||
* Or remove Solr entirely
|
||||
* Query Elasticsearch directly
|
||||
* Or write a data distributor that will run the query
|
||||
|
||||
# The details:
|
||||
|
||||
## Check out VIVO and Vitro
|
||||
* For now, the Elasticsearch driver only lives in my fork of Vitro
|
||||
* No changes to VIVO are required (yet).
|
||||
|
||||
```
|
||||
git clone https://github.com/vivo-project/VIVO.git
|
||||
git clone -b feature/elasticsearchExperiments https://github.com/j2blake/Vitro.git
|
||||
```
|
||||
|
||||
## A mapping for the search index
|
||||
* If the index uses the default mapping, it will not work correctly.
|
||||
* Some fields must be declared as `keyword`, some as unstemmed, etc.
|
||||
|
||||
* Example mapping script:
|
||||
|
||||
```
|
||||
curl -X PUT "localhost:9200/vivo?pretty" -H 'Content-Type: application/json' -d'
|
||||
{
|
||||
"mappings": {
|
||||
"_doc": {
|
||||
"properties": {
|
||||
"ALLTEXT": {
|
||||
"type": "text",
|
||||
"analyzer": "english"
|
||||
},
|
||||
"ALLTEXTUNSTEMMED": {
|
||||
"type": "text",
|
||||
"analyzer": "standard"
|
||||
},
|
||||
"DocId": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"classgroup": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"type": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"mostSpecificTypeURIs": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"indexedTime": {
|
||||
"type": "long"
|
||||
},
|
||||
"nameRaw": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"URI": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"THUMBNAIL": {
|
||||
"type": "integer"
|
||||
},
|
||||
"THUMBNAIL_URL": {
|
||||
"type": "keyword"
|
||||
},
|
||||
"nameLowercaseSingleValued": {
|
||||
"type": "text",
|
||||
"analyzer": "standard",
|
||||
"fielddata": "true"
|
||||
},
|
||||
"BETA" : {
|
||||
"type" : "float"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"query": {
|
||||
"default_field": "ALLTEXT"
|
||||
}
|
||||
}
|
||||
'
|
||||
```
|
||||
* __*Note:*__ The first line of the script specifies the name of the index as `vivo`.
|
||||
Any name may be used, but it must match the "base URL" that is specified in `applicationSetup.n3` (see below).
|
||||
* __*Note:*__ The same first line specifies the location and port number of the elasticsearch server.
|
||||
Again, any location and port may be used, but they must match the "base URL" in `applicationSetup.n3`.
|
||||
|
||||
## Modify `applicationSetup.n3`
|
||||
* Change this:
|
||||
|
||||
```
|
||||
# ----------------------------
|
||||
#
|
||||
# Search engine module:
|
||||
# The Solr-based implementation is the only standard option, but it can be
|
||||
# wrapped in an "instrumented" wrapper, which provides additional logging
|
||||
# and more rigorous life-cycle checking.
|
||||
#
|
||||
|
||||
:instrumentedSearchEngineWrapper
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||
:wraps :solrSearchEngine .
|
||||
|
||||
```
|
||||
|
||||
* To this:
|
||||
|
||||
```
|
||||
# ----------------------------
|
||||
#
|
||||
# Search engine module:
|
||||
# The Solr-based implementation is the only standard option, but it can be
|
||||
# wrapped in an "instrumented" wrapper, which provides additional logging
|
||||
# and more rigorous life-cycle checking.
|
||||
#
|
||||
|
||||
:instrumentedSearchEngineWrapper
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||
:wraps :elasticSearchEngine .
|
||||
|
||||
:elasticSearchEngine
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.ElasticSearchEngine> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
|
||||
:hasBaseUrl "http://localhost:9200/vivo" .
|
||||
```
|
||||
|
||||
## Enhance the contents of the search index
|
||||
### An example: Publication URIs in the author's search document
|
||||
* Add a keyword field to the search index
|
||||
|
||||
```
|
||||
"publicationURI": {
|
||||
"type": "keyword"
|
||||
},
|
||||
```
|
||||
|
||||
* Add a `DocumentModifier` to VIVO.
|
||||
|
||||
```
|
||||
:documentModifier_publications
|
||||
a <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SelectQueryDocumentModifier> ,
|
||||
<java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
|
||||
rdfs:label "URIs of publications are added to publicationURI field." ;
|
||||
:hasTargetField "publicationURI" ;
|
||||
:hasSelectQuery """
|
||||
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
|
||||
PREFIX vivo: <http://vivoweb.org/ontology/core#>
|
||||
PREFIX bibo: <http://purl.org/ontology/bibo/>
|
||||
SELECT ?publication
|
||||
WHERE {
|
||||
?uri vivo:relatedBy ?authorship .
|
||||
?authorship a vivo:Authorship .
|
||||
?authorship vivo:relates ?publication .
|
||||
?publication a bibo:Document .
|
||||
}
|
||||
""" .
|
||||
```
|
||||
|
||||
## Use data distributors to query the search index
|
||||
* Install the Data Distribution API
|
||||
* Add a distributor:
|
||||
|
||||
```
|
||||
:drill_by_URI
|
||||
a <java:edu.cornell.library.scholars.webapp.controller.api.distribute.DataDistributor> ,
|
||||
<java:edu.cornell.library.scholars.webapp.controller.api.distribute.search.DrillDownSearchByUriDataDistributor> ;
|
||||
:actionName "searchAndDrill" .
|
||||
```
|
||||
|
||||
* Run the query:
|
||||
|
||||
```
|
||||
http://localhost:8080/vivo/api/dataRequest/searchAndDrill?uri=http://scholars.cornell.edu/individual/mj495
|
||||
```
|
|
@ -0,0 +1,77 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Build a Map of Objects, suitable for marshalling by Jackson.
|
||||
*
|
||||
* Include conditional logic, so null values, empty maps, or empty lists will
|
||||
* not be added, unless you use the special values.
|
||||
*/
|
||||
public class JsonTree {
|
||||
/**
|
||||
* Empty maps will not be added, except for this one.
|
||||
*/
|
||||
public static final Map<String, Object> EMPTY_JSON_MAP = Collections
|
||||
.emptyMap();
|
||||
|
||||
/**
|
||||
* Empty lists will not be added, except for this one.
|
||||
*/
|
||||
public static final List<Object> EMPTY_JSON_LIST = Collections.emptyList();
|
||||
|
||||
/**
|
||||
* Create the tree
|
||||
*/
|
||||
public static JsonTree tree() {
|
||||
return new JsonTree();
|
||||
}
|
||||
|
||||
/**
|
||||
* This will cause negative integers to be ignored.
|
||||
*/
|
||||
public static Integer ifPositive(int i) {
|
||||
return (i > 0) ? i : null;
|
||||
}
|
||||
|
||||
private Map<String, Object> map = new HashMap<>();
|
||||
|
||||
public JsonTree put(String key, Object value) {
|
||||
if (isSignificant(value)) {
|
||||
storeIt(key, value);
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
private boolean isSignificant(Object value) {
|
||||
if (value == null) {
|
||||
return false;
|
||||
}
|
||||
if (value instanceof Map && ((Map) value).isEmpty()
|
||||
&& value != EMPTY_JSON_MAP) {
|
||||
return false;
|
||||
}
|
||||
if (value instanceof List && ((List) value).isEmpty()
|
||||
&& value != EMPTY_JSON_LIST) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private void storeIt(String key, Object value) {
|
||||
if (value instanceof JsonTree) {
|
||||
map.put(key, ((JsonTree) value).asMap());
|
||||
} else {
|
||||
map.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
public Map<String, Object> asMap() {
|
||||
return new HashMap<>(map);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,172 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.EMPTY_JSON_MAP;
|
||||
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.ifPositive;
|
||||
import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.tree;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery.Order;
|
||||
|
||||
/**
|
||||
* Accept a SearchQuery and make it available as a JSON string, suitable for
|
||||
* Elasticsearch.
|
||||
*/
|
||||
public class QueryConverter {
|
||||
private static final Log log = LogFactory.getLog(QueryConverter.class);
|
||||
|
||||
private final SearchQuery query;
|
||||
private final Map<String, Object> queryAndFilters;
|
||||
private final Map<String, Object> sortFields;
|
||||
private final Map<String, Object> facets;
|
||||
private final Map<String, Object> highlighter;
|
||||
private final List<String> returnFields;
|
||||
private final Map<String, Object> fullMap;
|
||||
|
||||
public QueryConverter(SearchQuery query) {
|
||||
this.query = query;
|
||||
this.queryAndFilters = filteredOrNot();
|
||||
this.sortFields = figureSortFields();
|
||||
this.facets = figureFacets();
|
||||
this.highlighter = figureHighlighter();
|
||||
this.returnFields = figureReturnFields();
|
||||
|
||||
this.fullMap = figureFullMap();
|
||||
}
|
||||
|
||||
private Map<String, Object> filteredOrNot() {
|
||||
if (query.getFilters().isEmpty()) {
|
||||
return new QueryStringMap(query.getQuery()).map;
|
||||
} else {
|
||||
return buildFilterStructure();
|
||||
}
|
||||
}
|
||||
|
||||
private Map<String, Object> buildFilterStructure() {
|
||||
return tree() //
|
||||
.put("bool", tree() //
|
||||
.put("must", new QueryStringMap(query.getQuery()).map) //
|
||||
.put("filter", buildFiltersList())) //
|
||||
.asMap();
|
||||
}
|
||||
|
||||
private List<Map<String, Object>> buildFiltersList() {
|
||||
List<Map<String, Object>> list = new ArrayList<>();
|
||||
for (String filter : query.getFilters()) {
|
||||
list.add(new QueryStringMap(filter).map);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
private Map<String, Object> figureSortFields() {
|
||||
Map<String, Order> fields = query.getSortFields();
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
for (String name : fields.keySet()) {
|
||||
String sortOrder = fields.get(name).toString().toLowerCase();
|
||||
map.put(name, sortOrder);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> figureFacets() {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
for (String field : query.getFacetFields()) {
|
||||
map.put("facet_" + field, figureFacet(field));
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private Map<String, Object> figureHighlighter() {
|
||||
return tree() //
|
||||
.put("fields", tree() //
|
||||
.put("ALLTEXT", EMPTY_JSON_MAP))
|
||||
.asMap();
|
||||
}
|
||||
|
||||
private Map<String, Object> figureFacet(String field) {
|
||||
return tree() //
|
||||
.put("terms", tree() //
|
||||
.put("field", field) //
|
||||
.put("size", ifPositive(query.getFacetLimit())) //
|
||||
.put("min_doc_count",
|
||||
ifPositive(query.getFacetMinCount()))) //
|
||||
.asMap();
|
||||
}
|
||||
|
||||
private List<String> figureReturnFields() {
|
||||
return new ArrayList<>(query.getFieldsToReturn());
|
||||
}
|
||||
|
||||
private Map<String, Object> figureFullMap() {
|
||||
return tree() //
|
||||
.put("query", queryAndFilters) //
|
||||
.put("from", ifPositive(query.getStart())) //
|
||||
.put("highlight", highlighter)
|
||||
.put("size", ifPositive(query.getRows())) //
|
||||
.put("sort", sortFields) //
|
||||
.put("_source", returnFields) //
|
||||
.put("aggregations", facets) //
|
||||
.asMap();
|
||||
}
|
||||
|
||||
public String asString() throws SearchEngineException {
|
||||
try {
|
||||
return new ObjectMapper().writeValueAsString(fullMap);
|
||||
} catch (JsonProcessingException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private static class QueryStringMap {
|
||||
public final Map<String, Object> map;
|
||||
|
||||
public QueryStringMap(String queryString) {
|
||||
map = new HashMap<>();
|
||||
map.put("query_string", makeInnerMap(escape(queryString)));
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a kluge, but perhaps it will work for now.
|
||||
*
|
||||
* Apparently Solr is willing to put up with query strings that contain
|
||||
* special characters in odd places, but Elasticsearch is not.
|
||||
*
|
||||
* So, a query string of "classgroup:http://this/that" must be escaped
|
||||
* as "classgroup:http\:\/\/this\/that". Notice that the first colon
|
||||
* delimits the field name, and so must not be escaped.
|
||||
*
|
||||
* But what if no field is specified? Then all colons must be escaped.
|
||||
* How would we distinguish that?
|
||||
*
|
||||
* And what if the query is more complex, and more than one field is
|
||||
* specified? What if other special characters are included?
|
||||
*
|
||||
* This could be a real problem.
|
||||
*/
|
||||
private String escape(String queryString) {
|
||||
return queryString.replace(":", "\\:").replace("/", "\\/")
|
||||
.replaceFirst("\\\\:", ":");
|
||||
}
|
||||
|
||||
private Map<String, String> makeInnerMap(String queryString) {
|
||||
Map<String, String> inner = new HashMap<>();
|
||||
inner.put("default_field", "ALLTEXT");
|
||||
inner.put("query", queryString);
|
||||
return inner;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,182 @@
|
|||
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
|
||||
|
||||
package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField.Count;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField.BaseCount;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResponse;
|
||||
import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResultDocument;
|
||||
|
||||
/**
|
||||
* Elastic search sends a JSON response to a query. parse it to a
|
||||
* SearchResponse.
|
||||
*/
|
||||
class ResponseParser {
|
||||
private static final Log log = LogFactory.getLog(ResponseParser.class);
|
||||
|
||||
private final Map<String, Object> responseMap;
|
||||
|
||||
private Map<String, Map<String, List<String>>> highlightingMap;
|
||||
private Map<String, SearchFacetField> facetFieldsMap;
|
||||
private long totalHits;
|
||||
private List<SearchResultDocument> documentList;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public ResponseParser(String responseString) throws SearchEngineException {
|
||||
try {
|
||||
this.responseMap = new ObjectMapper().readValue(responseString,
|
||||
HashMap.class);
|
||||
} catch (IOException e) {
|
||||
throw new SearchEngineException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public SearchResponse parse() {
|
||||
parseDocumentList();
|
||||
parseFacetFields();
|
||||
SearchResponse response = new BaseSearchResponse(highlightingMap,
|
||||
facetFieldsMap,
|
||||
new ElasticSearchResultDocumentList(documentList, totalHits));
|
||||
log.debug("ESQuery.ResponseParser.parse: " + response);
|
||||
return response;
|
||||
}
|
||||
|
||||
private void parseFacetFields() {
|
||||
facetFieldsMap = new HashMap<>();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Map<String, Object>> aggregations = (Map<String, Map<String, Object>>) responseMap
|
||||
.get("aggregations");
|
||||
if (aggregations == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (String key : aggregations.keySet()) {
|
||||
if (key.startsWith("facet_")) {
|
||||
String name = key.substring(6);
|
||||
parseFacetField(name, aggregations.get(key));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void parseFacetField(String name, Map<String, Object> facetMap) {
|
||||
@SuppressWarnings("unchecked")
|
||||
List<Map<String, Object>> bucketsList = (List<Map<String, Object>>) facetMap
|
||||
.get("buckets");
|
||||
if (bucketsList == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
List<Count> counts = new ArrayList<>();
|
||||
for (Map<String, Object> bucket : bucketsList) {
|
||||
counts.add(new BaseCount((String) bucket.get("key"),
|
||||
(Integer) bucket.get("doc_count")));
|
||||
}
|
||||
|
||||
facetFieldsMap.put(name, new BaseSearchFacetField(name, counts));
|
||||
}
|
||||
|
||||
private void parseDocumentList() {
|
||||
documentList = new ArrayList<>();
|
||||
highlightingMap = new HashMap<>();
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Object> uberHits = (Map<String, Object>) responseMap
|
||||
.get("hits");
|
||||
if (uberHits == null) {
|
||||
log.warn("Didn't find a 'hits' field " + "in the query response: "
|
||||
+ responseMap);
|
||||
return;
|
||||
}
|
||||
|
||||
Integer total = (Integer) uberHits.get("total");
|
||||
if (total == null) {
|
||||
log.warn("Didn't find a 'hits.total' field "
|
||||
+ "in the query response: " + responseMap);
|
||||
return;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
List<Map<String, Object>> hits = (List<Map<String, Object>>) uberHits
|
||||
.get("hits");
|
||||
if (hits == null) {
|
||||
log.warn("Didn't find a 'hits.hits' field "
|
||||
+ "in the query response: " + responseMap);
|
||||
return;
|
||||
}
|
||||
|
||||
parseDocuments(hits);
|
||||
}
|
||||
|
||||
private void parseDocuments(List<Map<String, Object>> hits) {
|
||||
for (Map<String, Object> hit : hits) {
|
||||
SearchResultDocument doc = parseDocument(hit);
|
||||
if (doc != null) {
|
||||
documentList.add(doc);
|
||||
|
||||
Map<String, List<String>> highlight = parseHighlight(hit);
|
||||
if (highlight != null) {
|
||||
highlightingMap.put(doc.getUniqueId(), highlight);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, Collection<Object>> sourceMap = (Map<String, Collection<Object>>) hitMap
|
||||
.get("_source");
|
||||
if (sourceMap == null) {
|
||||
log.warn("Didn't find a '_source' field in the hit: " + hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
String id = (String) hitMap.get("_id");
|
||||
if (id == null) {
|
||||
log.warn("Didn't find a '_id' field in the hit: " + hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
return new BaseSearchResultDocument(id, sourceMap);
|
||||
}
|
||||
|
||||
private Map<String, List<String>> parseHighlight(
|
||||
Map<String, Object> hitMap) {
|
||||
@SuppressWarnings("unchecked")
|
||||
Map<String, List<String>> highlightMap = (Map<String, List<String>>) hitMap
|
||||
.get("highlight");
|
||||
if (highlightMap == null) {
|
||||
log.debug("Didn't find a 'highlight' field in the hit: " + hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
List<String> snippets = highlightMap.get("ALLTEXT");
|
||||
if (snippets == null) {
|
||||
log.warn("Didn't find a 'highlight.ALLTEXT' field in the hit: "
|
||||
+ hitMap);
|
||||
return null;
|
||||
}
|
||||
|
||||
Map<String, List<String>> snippetMap = new HashMap<>();
|
||||
snippetMap.put("ALLTEXT", snippets);
|
||||
return snippetMap;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue