initial files for ElasticSearch: ticket vivo-1587 (#85)

* initial files from ticket vivo-1587 * Add https://mvnrepository.com/artifact/org.apache.httpcomponents/fluent-hc/4.5.6 dependency * Resolves: https://jira.duraspace.org/browse/VIVO-1587
2018-09-24 20:05:42 -06:00 · 2018-09-24 20:05:42 -06:00 · 40f78e58a8
commit 40f78e58a8
parent 734b9ccf68
12 changed files with 1275 additions and 0 deletions
--- a/api/pom.xml
+++ b/api/pom.xml
@ -58,6 +58,11 @@
            <artifactId>argon2-jvm</artifactId>
            <version>2.4</version>
        </dependency>
        <dependency>
            <groupId>org.apache.httpcomponents</groupId>
            <artifactId>fluent-hc</artifactId>
            <version>4.5.6</version>
        </dependency>
        <dependency>
            <groupId>org.vivoweb</groupId>
            <artifactId>vitro-dependencies</artifactId>
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESAdder.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESAdder.java
@ -0,0 +1,92 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.http.client.fluent.Request;
 import org.apache.http.client.fluent.Response;
 import org.apache.http.entity.ContentType;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputField;
 /**
 * The nuts and bolts of adding a document to the Elasticsearch index
 */
 public class ESAdder {
    private static final Log log = LogFactory.getLog(ESAdder.class);
    private final String baseUrl;
    public ESAdder(String baseUrl) {
        this.baseUrl = baseUrl;
    }
    public void add(Collection<SearchInputDocument> docs)
            throws SearchEngineException {
        for (SearchInputDocument doc : docs) {
            addDocument(doc);
        }
    }
    private void addDocument(SearchInputDocument doc)
            throws SearchEngineException {
        try {
            Map<String, List<Object>> map = convertDocToMap(doc);
            String json = new ObjectMapper().writeValueAsString(map);
            log.debug("Adding document for '" + doc.getField("DocId") + "': "
                    + json);
            putToElastic(json, (String) doc.getField("DocId").getFirstValue());
        } catch (Exception e) {
            throw new SearchEngineException("Failed to convert to JSON", e);
        }
    }
    /**
     * Some field values are collections. Add the members of the collection
     * instead.
     */
    private Map<String, List<Object>> convertDocToMap(SearchInputDocument doc) {
        Map<String, List<Object>> map = new HashMap<>();
        for (SearchInputField field : doc.getFieldMap().values()) {
            ArrayList<Object> list = new ArrayList<>();
            for (Object value : field.getValues()) {
                if (value instanceof Collection) {
                    Collection<?> cValue = (Collection<?>) value;
                    list.addAll(cValue);
                } else {
                    list.add(value);
                }
            }
            map.put(field.getName(), list);
        }
        return map;
    }
    private void putToElastic(String json, String docId)
            throws SearchEngineException {
        try {
            String url = baseUrl + "/_doc/"
                    + URLEncoder.encode(docId, "UTF8");
            Response response = Request.Put(url)
                    .bodyString(json, ContentType.APPLICATION_JSON).execute();
            log.debug("Response from Elasticsearch: "
                    + response.returnContent().asString());
        } catch (Exception e) {
            throw new SearchEngineException("Failed to put to Elasticsearch",
                    e);
        }
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESCounter.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESCounter.java
@ -0,0 +1,42 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.http.client.fluent.Request;
 import org.apache.http.client.fluent.Response;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 /**
 * The nuts and bolts of getting the number of documents in the Elasticsearch
 * index.
 */
 public class ESCounter {
    private final String baseUrl;
    public ESCounter(String baseUrl) {
        this.baseUrl = baseUrl;
    }
    public int count() throws SearchEngineException {
        try {
            String url = baseUrl + "/_doc/_count";
            Response response = Request.Get(url).execute();
            String json = response.returnContent().asString();
            @SuppressWarnings("unchecked")
            Map<String, Object> map = new ObjectMapper().readValue(json,
                    HashMap.class);
            return (Integer) map.get("count");
        } catch (Exception e) {
            throw new SearchEngineException("Failed to put to Elasticsearch",
                    e);
        }
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESDeleter.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESDeleter.java
@ -0,0 +1,147 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.io.IOException;
 import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.http.Header;
 import org.apache.http.HttpEntity;
 import org.apache.http.StatusLine;
 import org.apache.http.client.HttpResponseException;
 import org.apache.http.client.ResponseHandler;
 import org.apache.http.client.fluent.Request;
 import org.apache.http.client.fluent.Response;
 import org.apache.http.entity.ContentType;
 import org.apache.http.util.EntityUtils;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
 import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
 /**
 * The nuts and bolts of deleting documents from the Elasticsearch index.
 */
 public class ESDeleter {
    private static final Log log = LogFactory.getLog(ESDeleter.class);
    private final String baseUrl;
    /**
     * @param baseUrl
     */
    public ESDeleter(String baseUrl) {
        this.baseUrl = baseUrl;
    }
    public void deleteByIds(Collection<String> ids)
            throws SearchEngineException {
        for (String id : ids) {
            deleteById(id);
        }
    }
    private void deleteById(String id) throws SearchEngineException {
        try {
            String url = baseUrl + "/_doc/"
                    + URLEncoder.encode(id, "UTF8");
            Response response = Request.Delete(url).execute();
            String json = response.returnContent().asString();
        } catch (HttpResponseException e) {
            if (e.getStatusCode() == 404) {
                // Don't care if it has already been deleted.
            } else {
                throw new SearchEngineException(
                        "Failed to delete Elasticsearch document " + id, e);
            }
        } catch (Exception e) {
            throw new SearchEngineException(
                    "Failed to delete Elasticsearch document " + id, e);
        }
    }
    public void deleteByQuery(String queryString) throws SearchEngineException {
        String url = baseUrl + "/_delete_by_query";
        SearchQuery query = new BaseSearchQuery().setQuery(queryString);
        String queryJson = new QueryConverter(query).asString();
        try {
            Response response = Request.Post(url)
                    .bodyString(queryJson, ContentType.APPLICATION_JSON)
                    .execute();
            BaseResponseHandler handler = new BaseResponseHandler();
            response.handleResponse(handler);
            if (handler.getStatusCode() >= 400) {
                log.warn(String.format(
                        "Failed to delete Elasticsearch documents by query: %s, %d - %s\n%s",
                        queryString, handler.getStatusCode(),
                        handler.getReasonPhrase(), handler.getContentString()));
            }
        } catch (IOException e) {
            throw new SearchEngineException("Failed to delete Elasticsearch "
                    + "documents by query " + queryString, e);
        }
    }
    // ----------------------------------------------------------------------
    // Helper class for interpreting HttpResponse errors
    // ----------------------------------------------------------------------
    private class BaseResponseHandler implements ResponseHandler<Object> {
        private int statusCode;
        private String reasonPhrase;
        private Map<String, List<String>> headers;
        private String contentString;
        @Override
        public Object handleResponse(org.apache.http.HttpResponse innerResponse)
                throws IOException {
            StatusLine statusLine = innerResponse.getStatusLine();
            statusCode = statusLine.getStatusCode();
            reasonPhrase = statusLine.getReasonPhrase();
            headers = new HashMap<>();
            for (Header header : innerResponse.getAllHeaders()) {
                String name = header.getName();
                if (!headers.containsKey(name)) {
                    headers.put(name, new ArrayList<String>());
                }
                headers.get(name).add(header.getValue());
            }
            HttpEntity entity = innerResponse.getEntity();
            if (entity == null) {
                contentString = "";
            } else {
                contentString = EntityUtils.toString(entity);
            }
            return "";
        }
        public int getStatusCode() {
            return statusCode;
        }
        public String getReasonPhrase() {
            return reasonPhrase;
        }
        public Map<String, List<String>> getHeaders() {
            return headers;
        }
        public String getContentString() {
            return contentString;
        }
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESFlusher.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESFlusher.java
@ -0,0 +1,41 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.http.client.fluent.Request;
 import org.apache.http.client.fluent.Response;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 /**
 * Just does a "commit" or "flush" to the index.
 */
 public class ESFlusher {
    private static final Log log = LogFactory.getLog(ESFlusher.class);
    private final String baseUrl;
    public ESFlusher(String baseUrl) {
        this.baseUrl = baseUrl;
    }
    public void flush() throws SearchEngineException {
        flush(false);
    }
    public void flush(boolean wait) throws SearchEngineException {
        try {
            String url = baseUrl + "/_flush"
                    + (wait ? "?wait_for_ongoing" : "");
            Response response = Request.Get(url).execute();
            String json = response.returnContent().asString();
            log.debug("flush response: " + json);
        } catch (Exception e) {
            throw new SearchEngineException("Failed to put to Elasticsearch",
                    e);
        }
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESQuery.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ESQuery.java
@ -0,0 +1,106 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.io.IOException;
 import java.net.URI;
 import java.net.URISyntaxException;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.http.HttpResponse;
 import org.apache.http.client.methods.HttpEntityEnclosingRequestBase;
 import org.apache.http.entity.ContentType;
 import org.apache.http.entity.StringEntity;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
 import edu.cornell.mannlib.vitro.webapp.utils.http.HttpClientFactory;
 /**
 * Convert a SearchQuery to JSON, send it to Elasticsearch, and convert the JSON
 * response to a SearchResponse.
 */
 public class ESQuery {
    private static final Log log = LogFactory.getLog(ESQuery.class);
    private final String baseUrl;
    public ESQuery(String baseUrl) {
        this.baseUrl = baseUrl;
    }
    public SearchResponse query(SearchQuery query)
            throws SearchEngineException {
        String queryString = new QueryConverter(query).asString();
        String response = doTheQuery(queryString);
        return new ResponseParser(response).parse();
    }
    private String doTheQuery(String queryString) {
        log.debug("QUERY: " + queryString);
        try {
            String url = baseUrl + "/_search";
            HttpResponse response = new ESFunkyGetRequest(url)
                    .bodyString(queryString, ContentType.APPLICATION_JSON)
                    .execute();
            String responseString = IOUtils
                    .toString(response.getEntity().getContent());
            log.debug("RESPONSE: " + responseString);
            return responseString;
        } catch (Exception e) {
            log.error("Failed to put to Elasticsearch", e);
            return "";
        }
    }
    // ----------------------------------------------------------------------
    // Helper class -- a GET request that accepts a body
    // ----------------------------------------------------------------------
    /**
     * The HttpClient implementations, both regular and conversational, do not
     * allow you to put a body on a GET request. In online discussion, some say
     * that the HTTP spec is ambiguous on this point, so each implementation
     * makes its own choice. For example, CURL allows it.
     * 
     * More to the point however, is that ElasticSearch requires it. So here's a
     * simple class to make that possible.
     * 
     * USE POST INSTEAD!!
     */
    private static class ESFunkyGetRequest
            extends HttpEntityEnclosingRequestBase {
        public ESFunkyGetRequest(String url) throws SearchEngineException {
            super();
            try {
                setURI(new URI(url));
            } catch (URISyntaxException e) {
                throw new SearchEngineException(e);
            }
        }
        public ESFunkyGetRequest bodyString(String contents,
                ContentType contentType) {
            setEntity(new StringEntity(contents, contentType));
            return this;
        }
        public HttpResponse execute() throws SearchEngineException {
            try {
                return HttpClientFactory.getHttpClient().execute(this);
            } catch (IOException e) {
                throw new SearchEngineException(e);
            }
        }
        @Override
        public String getMethod() {
            return "GET";
        }
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchEngine.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchEngine.java
@ -0,0 +1,142 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.util.Arrays;
 import java.util.Collection;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import edu.cornell.mannlib.vitro.webapp.modules.Application;
 import edu.cornell.mannlib.vitro.webapp.modules.ComponentStartupStatus;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchInputDocument;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
 import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchInputDocument;
 import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchQuery;
 import edu.cornell.mannlib.vitro.webapp.utils.configuration.Property;
 import edu.cornell.mannlib.vitro.webapp.utils.configuration.Validation;
 /**
 * A first draft of an Elasticsearch implementation.
 */
 public class ElasticSearchEngine implements SearchEngine {
    private static final Log log = LogFactory.getLog(ElasticSearchEngine.class);
    // ----------------------------------------------------------------------
    // Configuration
    // ----------------------------------------------------------------------
    private String baseUrl;
    @Property(uri = "http://vitro.mannlib.cornell.edu/ns/vitro/ApplicationSetup#hasBaseUrl")
    public void setBaseUrl(String url) {
        if (baseUrl == null) {
            if (url.endsWith("/")) {
                url = url.substring(0, url.length() - 1);
            }
            baseUrl = url;
        } else {
            throw new IllegalStateException(
                    "Configuration includes multiple base URLs: " + url
                            + ", and " + baseUrl);
        }
    }
    @Validation
    public void validate() throws Exception {
        if (baseUrl == null) {
            throw new IllegalStateException(
                    "Configuration did not include a base URL.");
        }
    }
    // ----------------------------------------------------------------------
    // The instance
    // ----------------------------------------------------------------------
    @Override
    public void startup(Application application, ComponentStartupStatus ss) {
        log.warn("ElasticSearchEngine.startup() not implemented."); // TODO
    }
    @Override
    public void shutdown(Application application) {
        // TODO Flush the buffers
        log.warn("ElasticSearchEngine.shutdown not implemented.");
    }
    @Override
    public void ping() throws SearchEngineException {
        // TODO What's the simplest we can do? Another smoke test?
        log.warn("ElasticSearchEngine.ping() not implemented."); // TODO
    }
    @Override
    public SearchInputDocument createInputDocument() {
        return new BaseSearchInputDocument();
    }
    @Override
    public void add(SearchInputDocument... docs) throws SearchEngineException {
        add(Arrays.asList(docs));
    }
    @Override
    public void add(Collection<SearchInputDocument> docs)
            throws SearchEngineException {
        new ESAdder(baseUrl).add(docs);
    }
    @Override
    public void commit() throws SearchEngineException {
        new ESFlusher(baseUrl).flush();
    }
    @Override
    public void commit(boolean wait) throws SearchEngineException {
        new ESFlusher(baseUrl).flush(wait);
    }
    @Override
    public void deleteById(String... ids) throws SearchEngineException {
        deleteById(Arrays.asList(ids));
    }
    @Override
    public void deleteById(Collection<String> ids)
            throws SearchEngineException {
        new ESDeleter(baseUrl).deleteByIds(ids);
    }
    @Override
    public void deleteByQuery(String query) throws SearchEngineException {
        new ESDeleter(baseUrl).deleteByQuery(query);
    }
    @Override
    public SearchQuery createQuery() {
        return new BaseSearchQuery();
    }
    @Override
    public SearchQuery createQuery(String queryText) {
        BaseSearchQuery query = new BaseSearchQuery();
        query.setQuery(queryText);
        return query;
    }
    @Override
    public SearchResponse query(SearchQuery query)
            throws SearchEngineException {
        return new ESQuery(baseUrl).query(query);
    }
    @Override
    public int documentCount() throws SearchEngineException {
        return new ESCounter(baseUrl).count();
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchResultDocumentList.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ElasticSearchResultDocumentList.java
@ -0,0 +1,53 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocumentList;
 /**
 * A simple implementation. In fact, this is so simple that perhaps it should be
 * named BaseSearchResultDocumentList.
 */
 class ElasticSearchResultDocumentList implements SearchResultDocumentList {
    private final List<SearchResultDocument> documents;
    private final long numberFound;
    public ElasticSearchResultDocumentList(List<SearchResultDocument> documents,
            long numberFound) {
        this.documents = documents;
        this.numberFound = numberFound;
    }
    @Override
    public Iterator<SearchResultDocument> iterator() {
        return documents.iterator();
    }
    @Override
    public long getNumFound() {
        return documents.size();
    }
    @Override
    public int size() {
        return documents.size();
    }
    @Override
    public SearchResultDocument get(int i) {
        return documents.get(i);
    }
    @Override
    public String toString() {
        return String.format(
                "ElasticSearchResultDocumentList[numberFound=%s, documents=%s]",
                numberFound, documents);
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/Elasticsearch_notes_on_the_first_draft.md
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/Elasticsearch_notes_on_the_first_draft.md
@ -0,0 +1,216 @@
 # What is this package?
 * The first draft of a Elasticsearch driver for VIVO
 # What has been done? 
 * Implement the `SearchEngine` interface
 	* Classes in `edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch`
 * No attempt to add new functions.
 # How to experiment with it?
 * Install elasticsearch somewhere.
 * Create a search index with the appropriate mapping (see below).
 * Check out VIVO and this branch of Vitro (see below), and do the usual installation procedure.
 * Modify `{vitro_home}/config/applicationSetup.n3` to use this driver (see below).
 * Start elasticsearch
 * Start VIVO
 # Not ready for production
 * Documentation
 	* Instructions on how to install and configure the driver.
 	* Instructions on how to setup elasticsearch?
 * Smoke test
 	* Display a warning if the elasticsearch server is not responding.
 * Functional testing
 	* Are we getting the proper search results?
 	* Are search results in the order that we would like?
 * Improved snippets
 	* Copy the technique used for Solr
 * Code improvement
 	* Rigorous closing of HTTP connections.
 	* IOC for HTTP code, to help in unit testing
 	* Consistent use of exceptions and logging
 * Unit tests
 * Automatic initialization of the index
 	* If VIVO detects an empty index, apply the mapping.
 # The next steps: adding functionality
 ## Stay within the framework
 * Add fields that enhance the contents of the search index documents (see below).
 * Add data distributors that run queries and format the output (see below).
 ## Go outside the framework
 * Add functions to the Elasticsearch driver that the Solr driver will simply ignore.
 	* Or remove Solr entirely
 * Query Elasticsearch directly
 	* Or write a data distributor that will run the query
 # The details:
 ## Check out VIVO and Vitro
 * For now, the Elasticsearch driver only lives in my fork of Vitro
 * No changes to VIVO are required (yet).
 ```
 git clone https://github.com/vivo-project/VIVO.git
 git clone -b feature/elasticsearchExperiments https://github.com/j2blake/Vitro.git
 ```
 ## A mapping for the search index
 * If the index uses the default mapping, it will not work correctly.
 * Some fields must be declared as `keyword`, some as unstemmed, etc.
 * Example mapping script:
 ```
 curl -X PUT "localhost:9200/vivo?pretty" -H 'Content-Type: application/json' -d'
 {
  "mappings": {
    "_doc": { 
      "properties": { 
        "ALLTEXT": { 
          "type": "text",
          "analyzer": "english"
        }, 
        "ALLTEXTUNSTEMMED": { 
          "type": "text",
          "analyzer": "standard"
        }, 
        "DocId": {
          "type": "keyword"  
        }, 
        "classgroup": {
          "type": "keyword"  
        }, 
        "type": {
          "type": "keyword"  
        }, 
        "mostSpecificTypeURIs": {
          "type": "keyword"  
        }, 
        "indexedTime": { 
          "type": "long" 
        },
        "nameRaw": { 
          "type": "keyword" 
        },
        "URI": { 
          "type": "keyword" 
        },
        "THUMBNAIL": { 
          "type": "integer" 
        },
        "THUMBNAIL_URL": { 
          "type": "keyword" 
        },
        "nameLowercaseSingleValued": {
          "type": "text",
          "analyzer": "standard",
          "fielddata": "true"
        },
        "BETA" : {
          "type" : "float"
        }
      }
    }
  },
  "query": {
    "default_field": "ALLTEXT"
  }
 }
 '
 ```
 * __*Note:*__ The first line of the script specifies the name of the index as `vivo`. 
 Any name may be used, but it must match the "base URL" that is specified in `applicationSetup.n3` (see below).
 * __*Note:*__ The same first line specifies the location and port number of the elasticsearch server.
 Again, any location and port may be used, but they must match the "base URL" in `applicationSetup.n3`.
 ## Modify `applicationSetup.n3`
 * Change this:
 ```
 # ----------------------------
 #
 # Search engine module: 
 #    The Solr-based implementation is the only standard option, but it can be
 #    wrapped in an "instrumented" wrapper, which provides additional logging 
 #    and more rigorous life-cycle checking.
 #
 :instrumentedSearchEngineWrapper 
    a   <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> , 
        <java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
    :wraps :solrSearchEngine .
 ```
 * To this:
 ```
 # ----------------------------
 #
 # Search engine module: 
 #    The Solr-based implementation is the only standard option, but it can be
 #    wrapped in an "instrumented" wrapper, which provides additional logging 
 #    and more rigorous life-cycle checking.
 #
 :instrumentedSearchEngineWrapper 
    a   <java:edu.cornell.mannlib.vitro.webapp.searchengine.InstrumentedSearchEngineWrapper> , 
        <java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
    :wraps :elasticSearchEngine .
 :elasticSearchEngine
    a   <java:edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.ElasticSearchEngine> ,
        <java:edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngine> ;
    :hasBaseUrl "http://localhost:9200/vivo" .
 ```
 ## Enhance the contents of the search index
 ### An example: Publication URIs in the author's search document
 * Add a keyword field to the search index
 ```
        "publicationURI": { 
          "type": "keyword" 
        },
 ```
 * Add a `DocumentModifier` to VIVO.
 ```
 :documentModifier_publications
    a   <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.SelectQueryDocumentModifier> ,
        <java:edu.cornell.mannlib.vitro.webapp.searchindex.documentBuilding.DocumentModifier> ;
    rdfs:label "URIs of publications are added to publicationURI field." ;
    :hasTargetField "publicationURI" ;
    :hasSelectQuery """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
        PREFIX vivo: <http://vivoweb.org/ontology/core#>
        PREFIX bibo: <http://purl.org/ontology/bibo/>
        SELECT ?publication 
 		WHERE {
 			?uri vivo:relatedBy ?authorship .
 			?authorship a vivo:Authorship .
 			?authorship vivo:relates ?publication .
 			?publication a bibo:Document .
 	    }
 	    """ .
 ```
 ## Use data distributors to query the search index
 * Install the Data Distribution API
 * Add a distributor:
 ```
 :drill_by_URI
    a   <java:edu.cornell.library.scholars.webapp.controller.api.distribute.DataDistributor> ,
        <java:edu.cornell.library.scholars.webapp.controller.api.distribute.search.DrillDownSearchByUriDataDistributor> ;
    :actionName "searchAndDrill" .
 ```
 * Run the query:
 ```
 http://localhost:8080/vivo/api/dataRequest/searchAndDrill?uri=http://scholars.cornell.edu/individual/mj495
 ```
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/JsonTree.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/JsonTree.java
@ -0,0 +1,77 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 /**
 * Build a Map of Objects, suitable for marshalling by Jackson.
 * 
 * Include conditional logic, so null values, empty maps, or empty lists will
 * not be added, unless you use the special values.
 */
 public class JsonTree {
    /**
     * Empty maps will not be added, except for this one.
     */
    public static final Map<String, Object> EMPTY_JSON_MAP = Collections
            .emptyMap();
    /**
     * Empty lists will not be added, except for this one.
     */
    public static final List<Object> EMPTY_JSON_LIST = Collections.emptyList();
    /**
     * Create the tree
     */
    public static JsonTree tree() {
        return new JsonTree();
    }
    /**
     * This will cause negative integers to be ignored.
     */
    public static Integer ifPositive(int i) {
        return (i > 0) ? i : null;
    }
    private Map<String, Object> map = new HashMap<>();
    public JsonTree put(String key, Object value) {
        if (isSignificant(value)) {
            storeIt(key, value);
        }
        return this;
    }
    private boolean isSignificant(Object value) {
        if (value == null) {
            return false;
        }
        if (value instanceof Map && ((Map) value).isEmpty()
                && value != EMPTY_JSON_MAP) {
            return false;
        }
        if (value instanceof List && ((List) value).isEmpty()
                && value != EMPTY_JSON_LIST) {
            return false;
        }
        return true;
    }
    private void storeIt(String key, Object value) {
        if (value instanceof JsonTree) {
            map.put(key, ((JsonTree) value).asMap());
        } else {
            map.put(key, value);
        }
    }
    public Map<String, Object> asMap() {
        return new HashMap<>(map);
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/QueryConverter.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/QueryConverter.java
@ -0,0 +1,172 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.EMPTY_JSON_MAP;
 import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.ifPositive;
 import static edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch.JsonTree.tree;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchQuery.Order;
 /**
 * Accept a SearchQuery and make it available as a JSON string, suitable for
 * Elasticsearch.
 */
 public class QueryConverter {
    private static final Log log = LogFactory.getLog(QueryConverter.class);
    private final SearchQuery query;
    private final Map<String, Object> queryAndFilters;
    private final Map<String, Object> sortFields;
    private final Map<String, Object> facets;
    private final Map<String, Object> highlighter;
    private final List<String> returnFields;
    private final Map<String, Object> fullMap;
    public QueryConverter(SearchQuery query) {
        this.query = query;
        this.queryAndFilters = filteredOrNot();
        this.sortFields = figureSortFields();
        this.facets = figureFacets();
        this.highlighter = figureHighlighter();
        this.returnFields = figureReturnFields();
        this.fullMap = figureFullMap();
    }
    private Map<String, Object> filteredOrNot() {
        if (query.getFilters().isEmpty()) {
            return new QueryStringMap(query.getQuery()).map;
        } else {
            return buildFilterStructure();
        }
    }
    private Map<String, Object> buildFilterStructure() {
        return tree() //
                .put("bool", tree() //
                        .put("must", new QueryStringMap(query.getQuery()).map) //
                        .put("filter", buildFiltersList())) //
                .asMap();
    }
    private List<Map<String, Object>> buildFiltersList() {
        List<Map<String, Object>> list = new ArrayList<>();
        for (String filter : query.getFilters()) {
            list.add(new QueryStringMap(filter).map);
        }
        return list;
    }
    private Map<String, Object> figureSortFields() {
        Map<String, Order> fields = query.getSortFields();
        Map<String, Object> map = new HashMap<>();
        for (String name : fields.keySet()) {
            String sortOrder = fields.get(name).toString().toLowerCase();
            map.put(name, sortOrder);
        }
        return map;
    }
    private Map<String, Object> figureFacets() {
        Map<String, Object> map = new HashMap<>();
        for (String field : query.getFacetFields()) {
            map.put("facet_" + field, figureFacet(field));
        }
        return map;
    }
    private Map<String, Object> figureHighlighter() {
        return tree() //
                .put("fields", tree() //
                        .put("ALLTEXT", EMPTY_JSON_MAP))
                .asMap();
    }
    private Map<String, Object> figureFacet(String field) {
        return tree() //
                .put("terms", tree() //
                        .put("field", field) //
                        .put("size", ifPositive(query.getFacetLimit())) //
                        .put("min_doc_count",
                                ifPositive(query.getFacetMinCount()))) //
                .asMap();
    }
    private List<String> figureReturnFields() {
        return new ArrayList<>(query.getFieldsToReturn());
    }
    private Map<String, Object> figureFullMap() {
        return tree() //
                .put("query", queryAndFilters) //
                .put("from", ifPositive(query.getStart())) //
                .put("highlight", highlighter)
                .put("size", ifPositive(query.getRows())) //
                .put("sort", sortFields) //
                .put("_source", returnFields) //
                .put("aggregations", facets) //
                .asMap();
    }
    public String asString() throws SearchEngineException {
        try {
            return new ObjectMapper().writeValueAsString(fullMap);
        } catch (JsonProcessingException e) {
            throw new SearchEngineException(e);
        }
    }
    private static class QueryStringMap {
        public final Map<String, Object> map;
        public QueryStringMap(String queryString) {
            map = new HashMap<>();
            map.put("query_string", makeInnerMap(escape(queryString)));
        }
        /**
         * This is a kluge, but perhaps it will work for now.
         * 
         * Apparently Solr is willing to put up with query strings that contain
         * special characters in odd places, but Elasticsearch is not.
         * 
         * So, a query string of "classgroup:http://this/that" must be escaped
         * as "classgroup:http\:\/\/this\/that". Notice that the first colon
         * delimits the field name, and so must not be escaped.
         * 
         * But what if no field is specified? Then all colons must be escaped.
         * How would we distinguish that?
         * 
         * And what if the query is more complex, and more than one field is
         * specified? What if other special characters are included?
         * 
         * This could be a real problem.
         */
        private String escape(String queryString) {
            return queryString.replace(":", "\\:").replace("/", "\\/")
                    .replaceFirst("\\\\:", ":");
        }
        private Map<String, String> makeInnerMap(String queryString) {
            Map<String, String> inner = new HashMap<>();
            inner.put("default_field", "ALLTEXT");
            inner.put("query", queryString);
            return inner;
        }
    }
 }
--- a/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ResponseParser.java
+++ b/api/src/main/java/edu/cornell/mannlib/vitro/webapp/searchengine/elasticsearch/ResponseParser.java
@ -0,0 +1,182 @@
 /* $This file is distributed under the terms of the license in /doc/license.txt$ */
 package edu.cornell.mannlib.vitro.webapp.searchengine.elasticsearch;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchEngineException;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchFacetField.Count;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResponse;
 import edu.cornell.mannlib.vitro.webapp.modules.searchEngine.SearchResultDocument;
 import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField;
 import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchFacetField.BaseCount;
 import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResponse;
 import edu.cornell.mannlib.vitro.webapp.searchengine.base.BaseSearchResultDocument;
 /**
 * Elastic search sends a JSON response to a query. parse it to a
 * SearchResponse.
 */
 class ResponseParser {
    private static final Log log = LogFactory.getLog(ResponseParser.class);
    private final Map<String, Object> responseMap;
    private Map<String, Map<String, List<String>>> highlightingMap;
    private Map<String, SearchFacetField> facetFieldsMap;
    private long totalHits;
    private List<SearchResultDocument> documentList;
    @SuppressWarnings("unchecked")
    public ResponseParser(String responseString) throws SearchEngineException {
        try {
            this.responseMap = new ObjectMapper().readValue(responseString,
                    HashMap.class);
        } catch (IOException e) {
            throw new SearchEngineException(e);
        }
    }
    public SearchResponse parse() {
        parseDocumentList();
        parseFacetFields();
        SearchResponse response = new BaseSearchResponse(highlightingMap,
                facetFieldsMap,
                new ElasticSearchResultDocumentList(documentList, totalHits));
        log.debug("ESQuery.ResponseParser.parse: " + response);
        return response;
    }
    private void parseFacetFields() {
        facetFieldsMap = new HashMap<>();
        @SuppressWarnings("unchecked")
        Map<String, Map<String, Object>> aggregations = (Map<String, Map<String, Object>>) responseMap
                .get("aggregations");
        if (aggregations == null) {
            return;
        }
        for (String key : aggregations.keySet()) {
            if (key.startsWith("facet_")) {
                String name = key.substring(6);
                parseFacetField(name, aggregations.get(key));
            }
        }
    }
    private void parseFacetField(String name, Map<String, Object> facetMap) {
        @SuppressWarnings("unchecked")
        List<Map<String, Object>> bucketsList = (List<Map<String, Object>>) facetMap
                .get("buckets");
        if (bucketsList == null) {
            return;
        }
        List<Count> counts = new ArrayList<>();
        for (Map<String, Object> bucket : bucketsList) {
            counts.add(new BaseCount((String) bucket.get("key"),
                    (Integer) bucket.get("doc_count")));
        }
        facetFieldsMap.put(name, new BaseSearchFacetField(name, counts));
    }
    private void parseDocumentList() {
        documentList = new ArrayList<>();
        highlightingMap = new HashMap<>();
        @SuppressWarnings("unchecked")
        Map<String, Object> uberHits = (Map<String, Object>) responseMap
                .get("hits");
        if (uberHits == null) {
            log.warn("Didn't find a 'hits' field " + "in the query response: "
                    + responseMap);
            return;
        }
        Integer total = (Integer) uberHits.get("total");
        if (total == null) {
            log.warn("Didn't find a 'hits.total' field "
                    + "in the query response: " + responseMap);
            return;
        }
        @SuppressWarnings("unchecked")
        List<Map<String, Object>> hits = (List<Map<String, Object>>) uberHits
                .get("hits");
        if (hits == null) {
            log.warn("Didn't find a 'hits.hits' field "
                    + "in the query response: " + responseMap);
            return;
        }
        parseDocuments(hits);
    }
    private void parseDocuments(List<Map<String, Object>> hits) {
        for (Map<String, Object> hit : hits) {
            SearchResultDocument doc = parseDocument(hit);
            if (doc != null) {
                documentList.add(doc);
                Map<String, List<String>> highlight = parseHighlight(hit);
                if (highlight != null) {
                    highlightingMap.put(doc.getUniqueId(), highlight);
                }
            }
        }
    }
    private SearchResultDocument parseDocument(Map<String, Object> hitMap) {
        @SuppressWarnings("unchecked")
        Map<String, Collection<Object>> sourceMap = (Map<String, Collection<Object>>) hitMap
                .get("_source");
        if (sourceMap == null) {
            log.warn("Didn't find a '_source' field in the hit: " + hitMap);
            return null;
        }
        String id = (String) hitMap.get("_id");
        if (id == null) {
            log.warn("Didn't find a '_id' field in the hit: " + hitMap);
            return null;
        }
        return new BaseSearchResultDocument(id, sourceMap);
    }
    private Map<String, List<String>> parseHighlight(
            Map<String, Object> hitMap) {
        @SuppressWarnings("unchecked")
        Map<String, List<String>> highlightMap = (Map<String, List<String>>) hitMap
                .get("highlight");
        if (highlightMap == null) {
            log.debug("Didn't find a 'highlight' field in the hit: " + hitMap);
            return null;
        }
        @SuppressWarnings("unchecked")
        List<String> snippets = highlightMap.get("ALLTEXT");
        if (snippets == null) {
            log.warn("Didn't find a 'highlight.ALLTEXT' field in the hit: "
                    + hitMap);
            return null;
        }
        Map<String, List<String>> snippetMap = new HashMap<>();
        snippetMap.put("ALLTEXT", snippets);
        return snippetMap;
    }
 }