initial files for ElasticSearch: ticket vivo-1587 (#85)

* initial files from ticket vivo-1587 * Add https://mvnrepository.com/artifact/org.apache.httpcomponents/fluent-hc/4.5.6 dependency * Resolves: https://jira.duraspace.org/browse/VIVO-1587
2018-09-24 20:05:42 -06:00 · 2018-09-24 20:05:42 -06:00 · 45ddb56294
commit 45ddb56294
parent b049aa6d32
12 changed files with 1275 additions and 0 deletions
--- a/api/pom.xml
+++ b/api/pom.xml
@ -58,6 +58,11 @@
            <artifactId>argon2-jvm</artifactId>
            <version>2.4</version>
        </dependency>
+        <dependency>
+            <groupId>org.apache.httpcomponents</groupId>
+            <artifactId>fluent-hc</artifactId>
+            <version>4.5.6</version>
+        </dependency>
        <dependency>
            <groupId>org.vivoweb</groupId>
            <artifactId>vitro-dependencies</artifactId>
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESAdder.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESAdder.java
@ -0,0 +1,92 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.http.client.fluent.Request;
+import org.apache.http.client.fluent.Response;
+import org.apache.http.entity.ContentType;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
+
+/**
+ * The nuts and bolts of adding a document to the Elasticsearch index
+ */
+public class ESAdder {
+    private static final Log log = LogFactory.getLog(ESAdder.class);
+
+    private final String baseUrl;
+
+    public ESAdder(String baseUrl) {
+        this.baseUrl = baseUrl;
+    }
+
+    public void add(Collection<SearchInputDocument> docs)
+            throws SearchEngineException {
+        for (SearchInputDocument doc : docs) {
+            addDocument(doc);
+        }
+    }
+
+    private void addDocument(SearchInputDocument doc)
+            throws SearchEngineException {
+        try {
+            Map<String, List<Object>> map = convertDocToMap(doc);
+            String json = new ObjectMapper().writeValueAsString(map);
+            log.debug("Adding document for '" + doc.getField("DocId") + "': "
+                    + json);
+
+            putToElastic(json, (String) doc.getField("DocId").getFirstValue());
+        } catch (Exception e) {
+            throw new SearchEngineException("Failed to convert to JSON", e);
+        }
+    }
+
+    /**
+     * Some field values are collections. Add the members of the collection
+     * instead.
+     */
+    private Map<String, List<Object>> convertDocToMap(SearchInputDocument doc) {
+        Map<String, List<Object>> map = new HashMap<>();
+        for (SearchInputField field : doc.getFieldMap().values()) {
+            ArrayList<Object> list = new ArrayList<>();
+            for (Object value : field.getValues()) {
+                if (value instanceof Collection) {
+                    Collection<?> cValue = (Collection<?>) value;
+                    list.addAll(cValue);
+                } else {
+                    list.add(value);
+                }
+            }
+            map.put(field.getName(), list);
+        }
+        return map;
+    }
+
+    private void putToElastic(String json, String docId)
+            throws SearchEngineException {
+        try {
+            String url = baseUrl + "/_doc/"
+                    + URLEncoder.encode(docId, "UTF8");
+            Response response = Request.Put(url)
+                    .bodyString(json, ContentType.APPLICATION_JSON).execute();
+            log.debug("Response from Elasticsearch: "
+                    + response.returnContent().asString());
+        } catch (Exception e) {
+            throw new SearchEngineException("Failed to put to Elasticsearch",
+                    e);
+        }
+    }
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESCounter.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESCounter.java
@ -0,0 +1,42 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.http.client.fluent.Request;
+import org.apache.http.client.fluent.Response;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+
+/**
+ * The nuts and bolts of getting the number of documents in the Elasticsearch
+ * index.
+ */
+public class ESCounter {
+    private final String baseUrl;
+
+    public ESCounter(String baseUrl) {
+        this.baseUrl = baseUrl;
+    }
+
+    public int count() throws SearchEngineException {
+        try {
+            String url = baseUrl + "/_doc/_count";
+            Response response = Request.Get(url).execute();
+            String json = response.returnContent().asString();
+
+            @SuppressWarnings("unchecked")
+            Map<String, Object> map = new ObjectMapper().readValue(json,
+                    HashMap.class);
+            return (Integer) map.get("count");
+        } catch (Exception e) {
+            throw new SearchEngineException("Failed to put to Elasticsearch",
+                    e);
+        }
+    }
+
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESDeleter.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESDeleter.java
@ -0,0 +1,147 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.io.IOException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.http.Header;
+import org.apache.http.HttpEntity;
+import org.apache.http.StatusLine;
+import org.apache.http.client.HttpResponseException;
+import org.apache.http.client.ResponseHandler;
+import org.apache.http.client.fluent.Request;
+import org.apache.http.client.fluent.Response;
+import org.apache.http.entity.ContentType;
+import org.apache.http.util.EntityUtils;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
+import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
+
+/**
+ * The nuts and bolts of deleting documents from the Elasticsearch index.
+ */
+public class ESDeleter {
+    private static final Log log = LogFactory.getLog(ESDeleter.class);
+
+    private final String baseUrl;
+
+    /**
+     * @param baseUrl
+     */
+    public ESDeleter(String baseUrl) {
+        this.baseUrl = baseUrl;
+    }
+
+    public void deleteByIds(Collection<String> ids)
+            throws SearchEngineException {
+        for (String id : ids) {
+            deleteById(id);
+        }
+    }
+
+    private void deleteById(String id) throws SearchEngineException {
+        try {
+            String url = baseUrl + "/_doc/"
+                    + URLEncoder.encode(id, "UTF8");
+            Response response = Request.Delete(url).execute();
+            String json = response.returnContent().asString();
+        } catch (HttpResponseException e) {
+            if (e.getStatusCode() == 404) {
+                // Don't care if it has already been deleted.
+            } else {
+                throw new SearchEngineException(
+                        "Failed to delete Elasticsearch document " + id, e);
+            }
+        } catch (Exception e) {
+            throw new SearchEngineException(
+                    "Failed to delete Elasticsearch document " + id, e);
+        }
+    }
+
+    public void deleteByQuery(String queryString) throws SearchEngineException {
+        String url = baseUrl + "/_delete_by_query";
+        SearchQuery query = new BaseSearchQuery().setQuery(queryString);
+        String queryJson = new QueryConverter(query).asString();
+
+        try {
+            Response response = Request.Post(url)
+                    .bodyString(queryJson, ContentType.APPLICATION_JSON)
+                    .execute();
+
+            BaseResponseHandler handler = new BaseResponseHandler();
+            response.handleResponse(handler);
+            if (handler.getStatusCode() >= 400) {
+                log.warn(String.format(
+                        "Failed to delete Elasticsearch documents by query: %s, %d - %s\n%s",
+                        queryString, handler.getStatusCode(),
+                        handler.getReasonPhrase(), handler.getContentString()));
+            }
+        } catch (IOException e) {
+            throw new SearchEngineException("Failed to delete Elasticsearch "
+                    + "documents by query " + queryString, e);
+        }
+    }
+
+    // ----------------------------------------------------------------------
+    // Helper class for interpreting HttpResponse errors
+    // ----------------------------------------------------------------------
+
+    private class BaseResponseHandler implements ResponseHandler<Object> {
+        private int statusCode;
+        private String reasonPhrase;
+        private Map<String, List<String>> headers;
+        private String contentString;
+
+        @Override
+        public Object handleResponse(org.apache.http.HttpResponse innerResponse)
+                throws IOException {
+            StatusLine statusLine = innerResponse.getStatusLine();
+            statusCode = statusLine.getStatusCode();
+            reasonPhrase = statusLine.getReasonPhrase();
+
+            headers = new HashMap<>();
+            for (Header header : innerResponse.getAllHeaders()) {
+                String name = header.getName();
+                if (!headers.containsKey(name)) {
+                    headers.put(name, new ArrayList<String>());
+                }
+                headers.get(name).add(header.getValue());
+            }
+
+            HttpEntity entity = innerResponse.getEntity();
+            if (entity == null) {
+                contentString = "";
+            } else {
+                contentString = EntityUtils.toString(entity);
+            }
+            return "";
+        }
+
+        public int getStatusCode() {
+            return statusCode;
+        }
+
+        public String getReasonPhrase() {
+            return reasonPhrase;
+        }
+
+        public Map<String, List<String>> getHeaders() {
+            return headers;
+        }
+
+        public String getContentString() {
+            return contentString;
+        }
+
+    }
+
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESFlusher.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESFlusher.java
@ -0,0 +1,41 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.http.client.fluent.Request;
+import org.apache.http.client.fluent.Response;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+
+/**
+ * Just does a "commit" or "flush" to the index.
+ */
+public class ESFlusher {
+    private static final Log log = LogFactory.getLog(ESFlusher.class);
+
+    private final String baseUrl;
+
+    public ESFlusher(String baseUrl) {
+        this.baseUrl = baseUrl;
+    }
+
+    public void flush() throws SearchEngineException {
+        flush(false);
+    }
+
+    public void flush(boolean wait) throws SearchEngineException {
+        try {
+            String url = baseUrl + "/_flush"
+                    + (wait ? "?wait_for_ongoing" : "");
+            Response response = Request.Get(url).execute();
+            String json = response.returnContent().asString();
+            log.debug("flush response: " + json);
+        } catch (Exception e) {
+            throw new SearchEngineException("Failed to put to Elasticsearch",
+                    e);
+        }
+    }
+
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESQuery.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESQuery.java
@ -0,0 +1,106 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.http.HttpResponse;
+import org.apache.http.client.methods.HttpEntityEnclosingRequestBase;
+import org.apache.http.entity.ContentType;
+import org.apache.http.entity.StringEntity;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
+import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
+
+/**
+ * Convert a SearchQuery to JSON, send it to Elasticsearch, and convert the JSON
+ * response to a SearchResponse.
+ */
+public class ESQuery {
+    private static final Log log = LogFactory.getLog(ESQuery.class);
+
+    private final String baseUrl;
+
+    public ESQuery(String baseUrl) {
+        this.baseUrl = baseUrl;
+    }
+
+    public SearchResponse query(SearchQuery query)
+            throws SearchEngineException {
+        String queryString = new QueryConverter(query).asString();
+        String response = doTheQuery(queryString);
+        return new ResponseParser(response).parse();
+    }
+
+    private String doTheQuery(String queryString) {
+        log.debug("QUERY: " + queryString);
+        try {
+            String url = baseUrl + "/_search";
+            HttpResponse response = new ESFunkyGetRequest(url)
+                    .bodyString(queryString, ContentType.APPLICATION_JSON)
+                    .execute();
+            String responseString = IOUtils
+                    .toString(response.getEntity().getContent());
+            log.debug("RESPONSE: " + responseString);
+            return responseString;
+        } catch (Exception e) {
+            log.error("Failed to put to Elasticsearch", e);
+            return "";
+        }
+    }
+
+    // ----------------------------------------------------------------------
+    // Helper class -- a GET request that accepts a body
+    // ----------------------------------------------------------------------
+
+    /**
+     * The HttpClient implementations, both regular and conversational, do not
+     * allow you to put a body on a GET request. In online discussion, some say
+     * that the HTTP spec is ambiguous on this point, so each implementation
+     * makes its own choice. For example, CURL allows it.
+     * 
+     * More to the point however, is that ElasticSearch requires it. So here's a
+     * simple class to make that possible.
+     * 
+     * USE POST INSTEAD!!
+     */
+    private static class ESFunkyGetRequest
+            extends HttpEntityEnclosingRequestBase {
+        public ESFunkyGetRequest(String url) throws SearchEngineException {
+            super();
+            try {
+                setURI(new URI(url));
+            } catch (URISyntaxException e) {
+                throw new SearchEngineException(e);
+            }
+        }
+
+        public ESFunkyGetRequest bodyString(String contents,
+                ContentType contentType) {
+            setEntity(new StringEntity(contents, contentType));
+            return this;
+        }
+
+        public HttpResponse execute() throws SearchEngineException {
+            try {
+                return HttpClientFactory.getHttpClient().execute(this);
+            } catch (IOException e) {
+                throw new SearchEngineException(e);
+            }
+        }
+
+        @Override
+        public String getMethod() {
+            return "GET";
+        }
+
+    }
+
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchEngine.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchEngine.java
@ -0,0 +1,142 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.util.Arrays;
+import java.util.Collection;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import edu.cornell.mannlib.vitro.webapp.modules.Application;
+import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
+import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
+import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
+import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
+import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
+
+/**
+ * A first draft of an Elasticsearch implementation.
+ */
+public class ElasticSearchEngine implements SearchEngine {
+    private static final Log log = LogFactory.getLog(ElasticSearchEngine.class);
+
+    // ----------------------------------------------------------------------
+    // Configuration
+    // ----------------------------------------------------------------------
+
+    private String baseUrl;
+
+    @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBaseUrl")
+    public void setBaseUrl(String url) {
+        if (baseUrl == null) {
+            if (url.endsWith("/")) {
+                url = url.substring(0, url.length() - 1);
+            }
+            baseUrl = url;
+        } else {
+            throw new IllegalStateException(
+                    "Configuration includes multiple base URLs: " + url
+                            + ", and " + baseUrl);
+        }
+    }
+
+    @Validation
+    public void validate() throws Exception {
+        if (baseUrl == null) {
+            throw new IllegalStateException(
+                    "Configuration did not include a base URL.");
+        }
+    }
+
+    // ----------------------------------------------------------------------
+    // The instance
+    // ----------------------------------------------------------------------
+
+    @Override
+    public void startup(Application application, ComponentStartupStatus ss) {
+        log.warn("ElasticSearchEngine.startup() not implemented."); // TODO
+    }
+
+    @Override
+    public void shutdown(Application application) {
+        // TODO Flush the buffers
+        log.warn("ElasticSearchEngine.shutdown not implemented.");
+    }
+
+    @Override
+    public void ping() throws SearchEngineException {
+        // TODO What's the simplest we can do? Another smoke test?
+        log.warn("ElasticSearchEngine.ping() not implemented."); // TODO
+    }
+
+    @Override
+    public SearchInputDocument createInputDocument() {
+        return new BaseSearchInputDocument();
+    }
+
+    @Override
+    public void add(SearchInputDocument... docs) throws SearchEngineException {
+        add(Arrays.asList(docs));
+    }
+
+    @Override
+    public void add(Collection<SearchInputDocument> docs)
+            throws SearchEngineException {
+        new ESAdder(baseUrl).add(docs);
+    }
+
+    @Override
+    public void commit() throws SearchEngineException {
+        new ESFlusher(baseUrl).flush();
+    }
+
+    @Override
+    public void commit(boolean wait) throws SearchEngineException {
+        new ESFlusher(baseUrl).flush(wait);
+    }
+
+    @Override
+    public void deleteById(String... ids) throws SearchEngineException {
+        deleteById(Arrays.asList(ids));
+    }
+
+    @Override
+    public void deleteById(Collection<String> ids)
+            throws SearchEngineException {
+        new ESDeleter(baseUrl).deleteByIds(ids);
+    }
+
+    @Override
+    public void deleteByQuery(String query) throws SearchEngineException {
+        new ESDeleter(baseUrl).deleteByQuery(query);
+    }
+
+    @Override
+    public SearchQuery createQuery() {
+        return new BaseSearchQuery();
+    }
+
+    @Override
+    public SearchQuery createQuery(String queryText) {
+        BaseSearchQuery query = new BaseSearchQuery();
+        query.setQuery(queryText);
+        return query;
+    }
+
+    @Override
+    public SearchResponse query(SearchQuery query)
+            throws SearchEngineException {
+        return new ESQuery(baseUrl).query(query);
+    }
+
+    @Override
+    public int documentCount() throws SearchEngineException {
+        return new ESCounter(baseUrl).count();
+    }
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchResultDocumentList.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchResultDocumentList.java
@ -0,0 +1,53 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
+
+/**
+ * A simple implementation. In fact, this is so simple that perhaps it should be
+ * named BaseSearchResultDocumentList.
+ */
+class ElasticSearchResultDocumentList implements SearchResultDocumentList {
+    private final List<SearchResultDocument> documents;
+    private final long numberFound;
+
+    public ElasticSearchResultDocumentList(List<SearchResultDocument> documents,
+            long numberFound) {
+        this.documents = documents;
+        this.numberFound = numberFound;
+    }
+
+    @Override
+    public Iterator<SearchResultDocument> iterator() {
+        return documents.iterator();
+    }
+
+    @Override
+    public long getNumFound() {
+        return documents.size();
+    }
+
+    @Override
+    public int size() {
+        return documents.size();
+    }
+
+    @Override
+    public SearchResultDocument get(int i) {
+        return documents.get(i);
+    }
+
+    @Override
+    public String toString() {
+        return String.format(
+                "ElasticSearchResultDocumentList[numberFound=%s, documents=%s]",
+                numberFound, documents);
+    }
+
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/Elasticsearch_notes_on_the_first_draft.md
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/Elasticsearch_notes_on_the_first_draft.md
@ -0,0 +1,216 @@
+# What is this package?
+* The first draft of a Elasticsearch driver for VIVO
+
+# What has been done? 
+* Implement the `SearchEngine` interface
+	* Classes in `edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch`
+* No attempt to add new functions.
+
+# How to experiment with it?
+* Install elasticsearch somewhere.
+* Create a search index with the appropriate mapping (see below).
+* Check out VIVO and this branch of Vitro (see below), and do the usual installation procedure.
+* Modify `{vitro_home}/config/applicationSetup.n3` to use this driver (see below).
+* Start elasticsearch
+* Start VIVO
+
+# Not ready for production
+* Documentation
+	* Instructions on how to install and configure the driver.
+	* Instructions on how to setup elasticsearch?
+* Smoke test
+	* Display a warning if the elasticsearch server is not responding.
+* Functional testing
+	* Are we getting the proper search results?
+	* Are search results in the order that we would like?
+* Improved snippets
+	* Copy the technique used for Solr
+* Code improvement
+	* Rigorous closing of HTTP connections.
+	* IOC for HTTP code, to help in unit testing
+	* Consistent use of exceptions and logging
+* Unit tests
+* Automatic initialization of the index
+	* If VIVO detects an empty index, apply the mapping.
+
+# The next steps: adding functionality
+
+## Stay within the framework
+* Add fields that enhance the contents of the search index documents (see below).
+* Add data distributors that run queries and format the output (see below).
+
+## Go outside the framework
+* Add functions to the Elasticsearch driver that the Solr driver will simply ignore.
+	* Or remove Solr entirely
+* Query Elasticsearch directly
+	* Or write a data distributor that will run the query
+
+# The details:
+
+## Check out VIVO and Vitro
+* For now, the Elasticsearch driver only lives in my fork of Vitro
+* No changes to VIVO are required (yet).
+
+```
+git clone https://github.com/vivo-project/VIVO.git
+git clone -b feature/elasticsearchExperiments https://github.com/j2blake/Vitro.git
+```
+
+## A mapping for the search index
+* If the index uses the default mapping, it will not work correctly.
+* Some fields must be declared as `keyword`, some as unstemmed, etc.
+
+* Example mapping script:
+
+```
+curl -X PUT "localhost:9200/vivo?pretty" -H 'Content-Type: application/json' -d'
+{
+  "mappings": {
+    "_doc": { 
+      "properties": { 
+        "ALLTEXT": { 
+          "type": "text",
+          "analyzer": "english"
+        }, 
+        "ALLTEXTUNSTEMMED": { 
+          "type": "text",
+          "analyzer": "standard"
+        }, 
+        "DocId": {
+          "type": "keyword"  
+        }, 
+        "classgroup": {
+          "type": "keyword"  
+        }, 
+        "type": {
+          "type": "keyword"  
+        }, 
+        "mostSpecificTypeURIs": {
+          "type": "keyword"  
+        }, 
+        "indexedTime": { 
+          "type": "long" 
+        },
+        "nameRaw": { 
+          "type": "keyword" 
+        },
+        "URI": { 
+          "type": "keyword" 
+        },
+        "THUMBNAIL": { 
+          "type": "integer" 
+        },
+        "THUMBNAIL_URL": { 
+          "type": "keyword" 
+        },
+        "nameLowercaseSingleValued": {
+          "type": "text",
+          "analyzer": "standard",
+          "fielddata": "true"
+        },
+        "BETA" : {
+          "type" : "float"
+        }
+      }
+    }
+  },
+  "query": {
+    "default_field": "ALLTEXT"
+  }
+}
+'
+```
+* __*Note:*__ The first line of the script specifies the name of the index as `vivo`. 
+Any name may be used, but it must match the "base URL" that is specified in `applicationSetup.n3` (see below).
+* __*Note:*__ The same first line specifies the location and port number of the elasticsearch server.
+Again, any location and port may be used, but they must match the "base URL" in `applicationSetup.n3`.
+
+## Modify `applicationSetup.n3`
+* Change this:
+
+```
+# ----------------------------
+#
+# Search engine module: 
+#    The Solr-based implementation is the only standard option, but it can be
+#    wrapped in an "instrumented" wrapper, which provides additional logging 
+#    and more rigorous life-cycle checking.
+#
+
+:instrumentedSearchEngineWrapper 
+    a   <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> , 
+        <java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
+    :wraps :solrSearchEngine .
+
+```
+
+* To this:
+
+```
+# ----------------------------
+#
+# Search engine module: 
+#    The Solr-based implementation is the only standard option, but it can be
+#    wrapped in an "instrumented" wrapper, which provides additional logging 
+#    and more rigorous life-cycle checking.
+#
+
+:instrumentedSearchEngineWrapper 
+    a   <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> , 
+        <java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
+    :wraps :elasticSearchEngine .
+
+:elasticSearchEngine
+    a   <java:edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.ElasticSearchEngine> ,
+        <java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
+    :hasBaseUrl "http://localhost:9200/vivo" .
+```
+
+## Enhance the contents of the search index
+### An example: Publication URIs in the author's search document
+* Add a keyword field to the search index
+
+```
+        "publicationURI": { 
+          "type": "keyword" 
+        },
+```
+
+* Add a `DocumentModifier` to VIVO.
+
+```
+:documentModifier_publications
+    a   <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SelectQueryDocumentModifier> ,
+        <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
+    rdfs:label "URIs of publications are added to publicationURI field." ;
+    :hasTargetField "publicationURI" ;
+    :hasSelectQuery """
+        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
+        PREFIX vivo: <http://vivoweb.org/ontology/core#>
+        PREFIX bibo: <http://purl.org/ontology/bibo/>
+        SELECT ?publication 
+		WHERE {
+			?uri vivo:relatedBy ?authorship .
+			?authorship a vivo:Authorship .
+			?authorship vivo:relates ?publication .
+			?publication a bibo:Document .
+	    }
+	    """ .
+```
+
+## Use data distributors to query the search index
+* Install the Data Distribution API
+* Add a distributor:
+
+```
+:drill_by_URI
+    a   <java:edu.cornell.library.scholars.webapp.controller.api.distribute.DataDistributor> ,
+        <java:edu.cornell.library.scholars.webapp.controller.api.distribute.search.DrillDownSearchByUriDataDistributor> ;
+    :actionName "searchAndDrill" .
+```
+
+* Run the query:
+
+```
+http://localhost:8080/vivo/api/dataRequest/searchAndDrill?uri=http://scholars.cornell.edu/individual/mj495
+```
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/JsonTree.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/JsonTree.java
@ -0,0 +1,77 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Build a Map of Objects, suitable for marshalling by Jackson.
+ * 
+ * Include conditional logic, so null values, empty maps, or empty lists will
+ * not be added, unless you use the special values.
+ */
+public class JsonTree {
+    /**
+     * Empty maps will not be added, except for this one.
+     */
+    public static final Map<String, Object> EMPTY_JSON_MAP = Collections
+            .emptyMap();
+
+    /**
+     * Empty lists will not be added, except for this one.
+     */
+    public static final List<Object> EMPTY_JSON_LIST = Collections.emptyList();
+
+    /**
+     * Create the tree
+     */
+    public static JsonTree tree() {
+        return new JsonTree();
+    }
+
+    /**
+     * This will cause negative integers to be ignored.
+     */
+    public static Integer ifPositive(int i) {
+        return (i > 0) ? i : null;
+    }
+
+    private Map<String, Object> map = new HashMap<>();
+
+    public JsonTree put(String key, Object value) {
+        if (isSignificant(value)) {
+            storeIt(key, value);
+        }
+        return this;
+    }
+
+    private boolean isSignificant(Object value) {
+        if (value == null) {
+            return false;
+        }
+        if (value instanceof Map && ((Map) value).isEmpty()
+                && value != EMPTY_JSON_MAP) {
+            return false;
+        }
+        if (value instanceof List && ((List) value).isEmpty()
+                && value != EMPTY_JSON_LIST) {
+            return false;
+        }
+        return true;
+    }
+
+    private void storeIt(String key, Object value) {
+        if (value instanceof JsonTree) {
+            map.put(key, ((JsonTree) value).asMap());
+        } else {
+            map.put(key, value);
+        }
+    }
+
+    public Map<String, Object> asMap() {
+        return new HashMap<>(map);
+    }
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/QueryConverter.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/QueryConverter.java
@ -0,0 +1,172 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.EMPTY_JSON_MAP;
+import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.ifPositive;
+import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.tree;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery.Order;
+
+/**
+ * Accept a SearchQuery and make it available as a JSON string, suitable for
+ * Elasticsearch.
+ */
+public class QueryConverter {
+    private static final Log log = LogFactory.getLog(QueryConverter.class);
+
+    private final SearchQuery query;
+    private final Map<String, Object> queryAndFilters;
+    private final Map<String, Object> sortFields;
+    private final Map<String, Object> facets;
+    private final Map<String, Object> highlighter;
+    private final List<String> returnFields;
+    private final Map<String, Object> fullMap;
+
+    public QueryConverter(SearchQuery query) {
+        this.query = query;
+        this.queryAndFilters = filteredOrNot();
+        this.sortFields = figureSortFields();
+        this.facets = figureFacets();
+        this.highlighter = figureHighlighter();
+        this.returnFields = figureReturnFields();
+
+        this.fullMap = figureFullMap();
+    }
+
+    private Map<String, Object> filteredOrNot() {
+        if (query.getFilters().isEmpty()) {
+            return new QueryStringMap(query.getQuery()).map;
+        } else {
+            return buildFilterStructure();
+        }
+    }
+
+    private Map<String, Object> buildFilterStructure() {
+        return tree() //
+                .put("bool", tree() //
+                        .put("must", new QueryStringMap(query.getQuery()).map) //
+                        .put("filter", buildFiltersList())) //
+                .asMap();
+    }
+
+    private List<Map<String, Object>> buildFiltersList() {
+        List<Map<String, Object>> list = new ArrayList<>();
+        for (String filter : query.getFilters()) {
+            list.add(new QueryStringMap(filter).map);
+        }
+        return list;
+    }
+
+    private Map<String, Object> figureSortFields() {
+        Map<String, Order> fields = query.getSortFields();
+        Map<String, Object> map = new HashMap<>();
+        for (String name : fields.keySet()) {
+            String sortOrder = fields.get(name).toString().toLowerCase();
+            map.put(name, sortOrder);
+        }
+        return map;
+    }
+
+    private Map<String, Object> figureFacets() {
+        Map<String, Object> map = new HashMap<>();
+        for (String field : query.getFacetFields()) {
+            map.put("facet_" + field, figureFacet(field));
+        }
+        return map;
+    }
+
+    private Map<String, Object> figureHighlighter() {
+        return tree() //
+                .put("fields", tree() //
+                        .put("ALLTEXT", EMPTY_JSON_MAP))
+                .asMap();
+    }
+
+    private Map<String, Object> figureFacet(String field) {
+        return tree() //
+                .put("terms", tree() //
+                        .put("field", field) //
+                        .put("size", ifPositive(query.getFacetLimit())) //
+                        .put("min_doc_count",
+                                ifPositive(query.getFacetMinCount()))) //
+                .asMap();
+    }
+
+    private List<String> figureReturnFields() {
+        return new ArrayList<>(query.getFieldsToReturn());
+    }
+
+    private Map<String, Object> figureFullMap() {
+        return tree() //
+                .put("query", queryAndFilters) //
+                .put("from", ifPositive(query.getStart())) //
+                .put("highlight", highlighter)
+                .put("size", ifPositive(query.getRows())) //
+                .put("sort", sortFields) //
+                .put("_source", returnFields) //
+                .put("aggregations", facets) //
+                .asMap();
+    }
+
+    public String asString() throws SearchEngineException {
+        try {
+            return new ObjectMapper().writeValueAsString(fullMap);
+        } catch (JsonProcessingException e) {
+            throw new SearchEngineException(e);
+        }
+    }
+
+    private static class QueryStringMap {
+        public final Map<String, Object> map;
+
+        public QueryStringMap(String queryString) {
+            map = new HashMap<>();
+            map.put("query_string", makeInnerMap(escape(queryString)));
+        }
+
+        /**
+         * This is a kluge, but perhaps it will work for now.
+         * 
+         * Apparently Solr is willing to put up with query strings that contain
+         * special characters in odd places, but Elasticsearch is not.
+         * 
+         * So, a query string of "classgroup:http://this/that" must be escaped
+         * as "classgroup:http\:\/\/this\/that". Notice that the first colon
+         * delimits the field name, and so must not be escaped.
+         * 
+         * But what if no field is specified? Then all colons must be escaped.
+         * How would we distinguish that?
+         * 
+         * And what if the query is more complex, and more than one field is
+         * specified? What if other special characters are included?
+         * 
+         * This could be a real problem.
+         */
+        private String escape(String queryString) {
+            return queryString.replace(":", "\\:").replace("/", "\\/")
+                    .replaceFirst("\\\\:", ":");
+        }
+
+        private Map<String, String> makeInnerMap(String queryString) {
+            Map<String, String> inner = new HashMap<>();
+            inner.put("default_field", "ALLTEXT");
+            inner.put("query", queryString);
+            return inner;
+        }
+    }
+
+}
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ResponseParser.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ResponseParser.java
@ -0,0 +1,182 @@
+/* $This file is distributed under the terms of the license in /doc/license.txt$ */
+
+package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField.Count;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
+import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
+import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField;
+import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField.BaseCount;
+import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResponse;
+import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResultDocument;
+
+/**
+ * Elastic search sends a JSON response to a query. parse it to a
+ * SearchResponse.
+ */
+class ResponseParser {
+    private static final Log log = LogFactory.getLog(ResponseParser.class);
+
+    private final Map<String, Object> responseMap;
+
+    private Map<String, Map<String, List<String>>> highlightingMap;
+    private Map<String, SearchFacetField> facetFieldsMap;
+    private long totalHits;
+    private List<SearchResultDocument> documentList;
+
+    @SuppressWarnings("unchecked")
+    public ResponseParser(String responseString) throws SearchEngineException {
+        try {
+            this.responseMap = new ObjectMapper().readValue(responseString,
+                    HashMap.class);
+        } catch (IOException e) {
+            throw new SearchEngineException(e);
+        }
+    }
+
+    public SearchResponse parse() {
+        parseDocumentList();
+        parseFacetFields();
+        SearchResponse response = new BaseSearchResponse(highlightingMap,
+                facetFieldsMap,
+                new ElasticSearchResultDocumentList(documentList, totalHits));
+        log.debug("ESQuery.ResponseParser.parse: " + response);
+        return response;
+    }
+
+    private void parseFacetFields() {
+        facetFieldsMap = new HashMap<>();
+
+        @SuppressWarnings("unchecked")
+        Map<String, Map<String, Object>> aggregations = (Map<String, Map<String, Object>>) responseMap
+                .get("aggregations");
+        if (aggregations == null) {
+            return;
+        }
+
+        for (String key : aggregations.keySet()) {
+            if (key.startsWith("facet_")) {
+                String name = key.substring(6);
+                parseFacetField(name, aggregations.get(key));
+            }
+        }
+    }
+
+    private void parseFacetField(String name, Map<String, Object> facetMap) {
+        @SuppressWarnings("unchecked")
+        List<Map<String, Object>> bucketsList = (List<Map<String, Object>>) facetMap
+                .get("buckets");
+        if (bucketsList == null) {
+            return;
+        }
+
+        List<Count> counts = new ArrayList<>();
+        for (Map<String, Object> bucket : bucketsList) {
+            counts.add(new BaseCount((String) bucket.get("key"),
+                    (Integer) bucket.get("doc_count")));
+        }
+
+        facetFieldsMap.put(name, new BaseSearchFacetField(name, counts));
+    }
+
+    private void parseDocumentList() {
+        documentList = new ArrayList<>();
+        highlightingMap = new HashMap<>();
+
+        @SuppressWarnings("unchecked")
+        Map<String, Object> uberHits = (Map<String, Object>) responseMap
+                .get("hits");
+        if (uberHits == null) {
+            log.warn("Didn't find a 'hits' field " + "in the query response: "
+                    + responseMap);
+            return;
+        }
+
+        Integer total = (Integer) uberHits.get("total");
+        if (total == null) {
+            log.warn("Didn't find a 'hits.total' field "
+                    + "in the query response: " + responseMap);
+            return;
+        }
+
+        @SuppressWarnings("unchecked")
+        List<Map<String, Object>> hits = (List<Map<String, Object>>) uberHits
+                .get("hits");
+        if (hits == null) {
+            log.warn("Didn't find a 'hits.hits' field "
+                    + "in the query response: " + responseMap);
+            return;
+        }
+
+        parseDocuments(hits);
+    }
+
+    private void parseDocuments(List<Map<String, Object>> hits) {
+        for (Map<String, Object> hit : hits) {
+            SearchResultDocument doc = parseDocument(hit);
+            if (doc != null) {
+                documentList.add(doc);
+
+                Map<String, List<String>> highlight = parseHighlight(hit);
+                if (highlight != null) {
+                    highlightingMap.put(doc.getUniqueId(), highlight);
+                }
+            }
+        }
+    }
+
+    private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
+        @SuppressWarnings("unchecked")
+        Map<String, Collection<Object>> sourceMap = (Map<String, Collection<Object>>) hitMap
+                .get("_source");
+        if (sourceMap == null) {
+            log.warn("Didn't find a '_source' field in the hit: " + hitMap);
+            return null;
+        }
+
+        String id = (String) hitMap.get("_id");
+        if (id == null) {
+            log.warn("Didn't find a '_id' field in the hit: " + hitMap);
+            return null;
+        }
+
+        return new BaseSearchResultDocument(id, sourceMap);
+    }
+
+    private Map<String, List<String>> parseHighlight(
+            Map<String, Object> hitMap) {
+        @SuppressWarnings("unchecked")
+        Map<String, List<String>> highlightMap = (Map<String, List<String>>) hitMap
+                .get("highlight");
+        if (highlightMap == null) {
+            log.debug("Didn't find a 'highlight' field in the hit: " + hitMap);
+            return null;
+        }
+
+        @SuppressWarnings("unchecked")
+        List<String> snippets = highlightMap.get("ALLTEXT");
+        if (snippets == null) {
+            log.warn("Didn't find a 'highlight.ALLTEXT' field in the hit: "
+                    + hitMap);
+            return null;
+        }
+
+        Map<String, List<String>> snippetMap = new HashMap<>();
+        snippetMap.put("ALLTEXT", snippets);
+        return snippetMap;
+    }
+}