VIVO-87 First attempt at a caching filter.

Appears to work with Firefox cache and Chrome cache, except for language-dependent stuff.

Is that because it isn't configured correctly, or is it because Firefox and Chrome aren't attempting to keep more than one version of a page in their caches? Even if they do see a "Vary: Accept-Language" header?
This commit is contained in:
j2blake 2013-05-14 16:37:29 -04:00
parent ddf101d6ae
commit 6123f41d95
5 changed files with 396 additions and 13 deletions

View file

@ -250,6 +250,11 @@
<copyField source="nameRaw" dest="acNameStemmed" />
<copyField source="nameRaw" dest="nameText" />
<!-- nameLowercaseSingleValued is not copied from nameRaw becasue nameRaw might have multiple values -->
<!-- field for hash signature, used for comparing to versions from external caches -->
<field name="etag" type="string" stored="true" indexed="false" multiValued="false" />
<!-- **************************** End Vitro Fields *************************** -->
<!-- **************************** Dynamic Fields *************************** -->

View file

@ -856,18 +856,14 @@
type header if posted in the body. For example, curl now
requires: -H 'Content-type:text/xml; charset=utf-8'
-->
<requestHandler name="/update"
<requestHandler name="/update"
class="solr.XmlUpdateRequestHandler">
<!-- See below for information on defining
updateRequestProcessorChains that can be used by name
on each Update Request
-->
<!--
<!-- Run the etag processor on each update request. -->
<lst name="defaults">
<str name="update.processor">dedupe</str>
<str name="update.processor">etag</str>
</lst>
-->
</requestHandler>
</requestHandler>
<!-- Binary Update Request Handler
http://wiki.apache.org/solr/javabin
-->
@ -1480,6 +1476,23 @@
</updateRequestProcessorChain>
-->
<!-- ETag generation
Creates the "etag" field on the fly based on a hash of all other
fields.
-->
<updateRequestProcessorChain name="etag">
<processor class="solr.processor.SignatureUpdateProcessorFactory">
<bool name="enabled">true</bool>
<str name="signatureField">etag</str>
<bool name="overwriteDupes">false</bool>
<str name="signatureClass">solr.processor.Lookup3Signature</str>
</processor>
<processor class="solr.LogUpdateProcessorFactory" />
<processor class="solr.RunUpdateProcessorFactory" />
</updateRequestProcessorChain>
<!-- Response Writers
http://wiki.apache.org/solr/QueryResponseWriter

View file

@ -0,0 +1,354 @@
/* $This file is distributed under the terms of the license in /doc/license.txt$ */
package edu.cornell.mannlib.vitro.webapp.filters;
import java.io.IOException;
import java.util.Enumeration;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.collections.EnumerationUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.QueryResponse;
import edu.cornell.mannlib.vedit.beans.LoginStatusBean;
import edu.cornell.mannlib.vitro.webapp.beans.UserAccount;
import edu.cornell.mannlib.vitro.webapp.config.ConfigurationProperties;
import edu.cornell.mannlib.vitro.webapp.search.solr.SolrSetup;
import edu.cornell.mannlib.vitro.webapp.utils.solr.FieldMap;
import edu.cornell.mannlib.vitro.webapp.utils.solr.SolrQueryUtils;
import edu.cornell.mannlib.vitro.webapp.utils.solr.SolrResultsParser;
/**
* Assist in cache management for individual profile pages.
*
* Only works for users who are not logged in.
*
* The Solr index must be configured to keep an ETAG on each individual's
* record. The ETAG is a hash of the record's content and is updated each time
* the individual is re-indexed.
*
* But this ETAG is not sufficient, since the page may have different versions
* for different languages. So we append a hash of the Locales from the request
* to the ETAG to make it unique. NOTE: If we allow users to choose their
* preferred languages, the LocalSelectionFilter must execute before this one.
*
* When an external cache (e.g. Squid) is asked for an individual's profile
* page, it will ask VIVO whether the version in the cache is still current, and
* to provide a new version if it is not. This is a conditional request.
*
* When a conditional request is received, this filter will check to see whether
* the request is on behalf of a logged-in user. If so, a fresh response is
* generated, with a Cache-Control header that should prevent the cache from
* storing that response.
*
* If the requesting user is not logged in, this filter will ask Solr for the
* ETAG on the requested individual. If it is the same as the ETAG supplied by
* the cache in the request, then the response is 304 Not Modified. Otherwise, a
* fresh response is generated.
*
* An unconditional request may mean that there is no external cache, or that
* the cache doesn't have a copy of this particular page.
*
* @see http://tools.ietf.org/pdf/rfc2616
*/
public class CachingResponseFilter implements Filter {
private static final Log log = LogFactory
.getLog(CachingResponseFilter.class);
private static final String PROPERTY_DEFAULT_NAMESPACE = "Vitro.defaultNamespace";
private static final String ETAG_FIELD = "etag";
private static final FieldMap parserFieldMap = SolrQueryUtils.fieldMap()
.put(ETAG_FIELD, ETAG_FIELD);
private ServletContext ctx;
private String defaultNamespace;
@Override
public void init(FilterConfig fc) throws ServletException {
ctx = fc.getServletContext();
defaultNamespace = ConfigurationProperties.getBean(ctx).getProperty(
PROPERTY_DEFAULT_NAMESPACE);
}
@Override
public void destroy() {
// Nothing to tear down.
}
/**
* Process an HTTP request.
*/
@Override
public void doFilter(ServletRequest request, ServletResponse response,
FilterChain chain) throws IOException, ServletException {
HttpServletRequest req = (HttpServletRequest) request;
HttpServletResponse resp = (HttpServletResponse) response;
/*
* If this request is not for a profile page, or if the individual
* doesn't appear in the search index, create a basic, cache-neutral
* response.
*/
String individualUri = figureIndividualUriFromRequest(req);
if (individualUri == null) {
produceBasicResponse(req, resp, chain);
return;
}
String rawEtag = findEtagForIndividual(individualUri);
String etag = produceLanguageSpecificEtag(req, rawEtag);
if (etag == null) {
produceBasicResponse(req, resp, chain);
return;
}
/*
* If a logged-in user asks for an individual profile page, the response
* should not come from the cache, nor should it be stored in the cache.
*/
if (userIsLoggedIn(req)) {
produceUncacheableResponse(req, resp, chain);
return;
}
/*
* If the request is not conditional then there is no cached version of
* this page. If the request is conditional and the condition is met,
* then the cached version is stale. In either case, create a fresh
* response to be stored in the cache.
*/
if (!isConditionalRequest(req)) {
produceCacheableResponse(req, resp, chain, etag);
return;
}
if (cacheIsStale(req, etag)) {
produceCacheableResponse(req, resp, chain, etag);
return;
}
/*
* If the request is conditional and the condition is not met
* (individual has not changed), send a "not-modified" response, so the
* cached version will be used.
*/
produceCacheHitResponse(resp, etag);
}
private boolean isConditionalRequest(HttpServletRequest req) {
if (req.getHeader("If-None-Match") == null) {
log.debug("Not conditional request.");
return false;
} else {
log.debug("Conditional request.");
return true;
}
}
private boolean userIsLoggedIn(HttpServletRequest req) {
UserAccount currentUser = LoginStatusBean.getCurrentUser(req);
if (currentUser == null) {
log.debug("Not logged in.");
return false;
} else {
log.debug("Logged in as '" + currentUser.getEmailAddress() + "'");
return true;
}
}
/**
* This rejects some of the requests as being obviously not individuals, and
* then assumes that the last part of any request is a Localname.
*
* This is not always true, of course, but it will work because we will
* prepend the default namespace and look for the resulting "URI" in the URI
* field of the search index. If we find it there, then it is valid.
*
* If we were to make this more rigorous, it would reduce the number of
* unnecessary searches.
*/
private String figureIndividualUriFromRequest(HttpServletRequest req) {
String requestPath = req.getRequestURI();
if (requestPath == null) {
return null;
}
if (!mightBeProfileRequest(requestPath)) {
return null;
}
String[] pathParts = requestPath.split("/");
String uri = defaultNamespace + pathParts[pathParts.length - 1];
log.debug("Request path = '" + requestPath + "', uri = '" + uri + "'");
return uri;
}
/**
* Requests for profile pages come in many forms, but we can still narrow
* them down.
*
* Eliminate CSS files, JavaScript files, and images.
*
* That leaves these acceptable forms:
*
* <pre>
* /individual?uri=urlencodedURI
* /individual?netId=bdc34
* /individual?netid=bdc34
* /individual/localname
* /display/localname
* /individual/localname/localname.rdf
* /individual/localname/localname.n3
* /individual/localname/localname.ttl
* </pre>
*/
private boolean mightBeProfileRequest(String requestPath) {
String path = requestPath.toLowerCase();
String[] extensions = { ".css", ".js", ".gif", ".png", ".jpg", ".jpeg" };
for (String ext : extensions) {
if (path.endsWith(ext)) {
return false;
}
}
return requestPath.endsWith("/individual")
|| requestPath.contains("/individual/")
|| requestPath.contains("/display/");
}
/**
* Ask Solr whether it has an ETAG for this URI.
*/
private String findEtagForIndividual(String individualUri) {
SolrQuery query = new SolrQuery("URI:" + individualUri)
.setFields(ETAG_FIELD);
SolrServer solr = SolrSetup.getSolrServer(ctx);
try {
QueryResponse response = solr.query(query);
List<Map<String, String>> maps = new SolrResultsParser(response,
parserFieldMap).parse();
log.debug("Solr response for '" + query.getQuery() + "' was "
+ maps);
if (maps.isEmpty()) {
return null;
} else {
return maps.get(0).get(ETAG_FIELD);
}
} catch (SolrServerException e) {
log.warn(
"Solr query '" + query.getQuery() + "' threw an exception",
e);
return null;
}
}
/**
* The ETAG from the Solr index is not specific enough, since we may have
* different versions for different languages. Add the Locales from the
* request to make it unique.
*/
private String produceLanguageSpecificEtag(HttpServletRequest req,
String rawEtag) {
if (rawEtag == null) {
return null;
}
@SuppressWarnings("unchecked")
List<Locale> locales = EnumerationUtils.toList(req.getLocales());
StringBuilder buffer = new StringBuilder('"').append(rawEtag);
for (Locale locale : locales) {
buffer.append(locale.toString()).append(" ");
}
buffer.append('"');
String etag = buffer.toString();
log.debug("Language-specific ETAG = " + etag);
return etag;
}
/**
* If the etag does not match any of the etags in any of the "If-None-Match"
* headers, then they are all stale. An asterisk matches anything.
*/
private boolean cacheIsStale(HttpServletRequest req, String etag) {
for (Enumeration<?> values = req.getHeaders("If-None-Match"); values
.hasMoreElements();) {
String value = (String) values.nextElement();
log.debug("If-None-Match: " + value);
String[] matches = value.split("\\s*,\\s*");
for (String match : matches) {
if (etag.equalsIgnoreCase(match) || "*".equals(match)) {
log.debug("Cache is not stale: etag=" + match);
return false;
}
}
}
log.debug("Cache is stale.");
return true;
}
private void produceBasicResponse(HttpServletRequest req,
HttpServletResponse resp, FilterChain chain) throws IOException,
ServletException {
chain.doFilter(req, resp);
}
private void produceUncacheableResponse(HttpServletRequest req,
HttpServletResponse resp, FilterChain chain) throws IOException,
ServletException {
String etag = generateArbitraryUniqueEtag(req);
log.debug("Produce uncacheable response: etag='" + etag + "'");
resp.addHeader("ETag", etag);
resp.addHeader("Vary", "*");
resp.addHeader("Cache-Control", "no-store");
chain.doFilter(req, resp);
}
private void produceCacheableResponse(HttpServletRequest req,
HttpServletResponse resp, FilterChain chain, String etag)
throws IOException, ServletException {
log.debug("Produce cacheable response: etag='" + etag + "'");
resp.addHeader("ETag", etag);
resp.addHeader("Vary", "Accept-Language");
chain.doFilter(req, resp);
}
/**
* Technically, if the request is not GET or HEAD, we should return 412
* PreconditionFailed. However, we usually treat GET and POST as equivalent.
*/
private void produceCacheHitResponse(HttpServletResponse resp, String etag)
throws IOException {
log.debug("Produce cache hit response: etag='" + etag + "'");
resp.addHeader("ETag", etag);
resp.addHeader("Vary", "Accept-Language");
resp.sendError(HttpServletResponse.SC_NOT_MODIFIED, "Not Modified");
}
private String generateArbitraryUniqueEtag(HttpServletRequest req) {
return String.format("%s-%d", req.getSession().getId(),
System.currentTimeMillis());
}
}

View file

@ -37,13 +37,13 @@ public class SolrResultsParser {
List<Map<String, String>> maps = new ArrayList<Map<String, String>>();
if (queryResponse == null) {
log.error("Query response for a search was null");
log.debug("Query response for a search was null");
return maps;
}
SolrDocumentList docs = queryResponse.getResults();
if (docs == null) {
log.error("Docs for a search was null");
log.debug("Docs for a search was null");
return maps;
}
log.debug("Total number of hits = " + docs.getNumFound());
@ -65,13 +65,13 @@ public class SolrResultsParser {
List<Map<String, String>> maps = new ArrayList<Map<String, String>>();
if (queryResponse == null) {
log.error("Query response for a search was null");
log.debug("Query response for a search was null");
return maps;
}
SolrDocumentList docs = queryResponse.getResults();
if (docs == null) {
log.error("Docs for a search was null");
log.debug("Docs for a search was null");
return maps;
}
log.debug("Total number of hits = " + docs.getNumFound());

View file

@ -88,6 +88,17 @@
<url-pattern>/*</url-pattern>
</filter-mapping>
<!-- If language support is enabled, this must not precede the LocaleSelectionFilter -->
<filter>
<description>Recognize conditional requests, and generate Cache-Control headers.</description>
<filter-name>Caching Response filter</filter-name>
<filter-class>edu.cornell.mannlib.vitro.webapp.filters.CachingResponseFilter</filter-class>
</filter>
<filter-mapping>
<filter-name>Caching Response filter</filter-name>
<url-pattern>/*</url-pattern>
</filter-mapping>
<filter>
<filter-name>JSession Strip Filter</filter-name>
<filter-class>edu.cornell.mannlib.vitro.webapp.filters.JSessionStripFilter</filter-class>