View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.hc.client5.http.impl.cache;
28  
29  import java.time.Duration;
30  import java.time.Instant;
31  import java.util.Iterator;
32  
33  import org.apache.hc.client5.http.cache.ResponseCacheControl;
34  import org.apache.hc.client5.http.utils.DateUtils;
35  import org.apache.hc.core5.http.HttpHeaders;
36  import org.apache.hc.core5.http.HttpRequest;
37  import org.apache.hc.core5.http.HttpResponse;
38  import org.apache.hc.core5.http.HttpStatus;
39  import org.apache.hc.core5.http.HttpVersion;
40  import org.apache.hc.core5.http.Method;
41  import org.apache.hc.core5.http.ProtocolVersion;
42  import org.apache.hc.core5.http.message.MessageSupport;
43  import org.slf4j.Logger;
44  import org.slf4j.LoggerFactory;
45  
46  class ResponseCachingPolicy {
47  
48      /**
49       * The default freshness duration for a cached object, in seconds.
50       *
51       * <p>This constant is used to set the default value for the freshness lifetime of a cached object.
52       * When a new object is added to the cache, it will be assigned this duration if no other duration
53       * is specified.</p>
54       *
55       * <p>By default, this value is set to 300 seconds (5 minutes). Applications can customize this
56       * value as needed.</p>
57       */
58       private static final Duration DEFAULT_FRESHNESS_DURATION = Duration.ofMinutes(5);
59  
60      private static final Logger LOG = LoggerFactory.getLogger(ResponseCachingPolicy.class);
61  
62      private final boolean sharedCache;
63      private final boolean neverCache1_0ResponsesWithQueryString;
64      private final boolean neverCache1_1ResponsesWithQueryString;
65  
66      /**
67       * Constructs a new ResponseCachingPolicy with the specified cache policy settings and stale-if-error support.
68       *
69       * @param sharedCache                           whether to behave as a shared cache (true) or a
70       *                                              non-shared/private cache (false)
71       * @param neverCache1_0ResponsesWithQueryString {@code true} to never cache HTTP 1.0 responses with a query string,
72       *                                              {@code false} to cache if explicit cache headers are found.
73       * @param neverCache1_1ResponsesWithQueryString {@code true} to never cache HTTP 1.1 responses with a query string,
74       *                                              {@code false} to cache if explicit cache headers are found.
75       * @since 5.4
76       */
77      public ResponseCachingPolicy(
78               final boolean sharedCache,
79               final boolean neverCache1_0ResponsesWithQueryString,
80               final boolean neverCache1_1ResponsesWithQueryString) {
81          this.sharedCache = sharedCache;
82          this.neverCache1_0ResponsesWithQueryString = neverCache1_0ResponsesWithQueryString;
83          this.neverCache1_1ResponsesWithQueryString = neverCache1_1ResponsesWithQueryString;
84      }
85  
86      /**
87       * Determine if the {@link HttpResponse} gotten from the origin is a
88       * cacheable response.
89       *
90       * @return {@code true} if response is cacheable
91       */
92      public boolean isResponseCacheable(final ResponseCacheControl cacheControl, final HttpRequest request, final HttpResponse response) {
93          final ProtocolVersion version = request.getVersion() != null ? request.getVersion() : HttpVersion.DEFAULT;
94          if (version.compareToVersion(HttpVersion.HTTP_1_1) > 0) {
95              if (LOG.isDebugEnabled()) {
96                  LOG.debug("Protocol version {} is non-cacheable", version);
97              }
98              return false;
99          }
100 
101         // Presently only GET and HEAD methods are supported
102         final String httpMethod = request.getMethod();
103         if (!Method.GET.isSame(httpMethod) && !Method.HEAD.isSame(httpMethod)) {
104             if (LOG.isDebugEnabled()) {
105                 LOG.debug("{} method response is not cacheable", httpMethod);
106             }
107             return false;
108         }
109 
110         final int code = response.getCode();
111 
112         // Should never happen but better be defensive
113         if (code <= HttpStatus.SC_INFORMATIONAL) {
114             return false;
115         }
116 
117         if (isKnownNonCacheableStatusCode(code)) {
118             if (LOG.isDebugEnabled()) {
119                 LOG.debug("{} response is not cacheable", code);
120             }
121             return false;
122         }
123 
124         if (request.getPath().contains("?")) {
125             if (neverCache1_0ResponsesWithQueryString && from1_0Origin(response)) {
126                 LOG.debug("Response is not cacheable as it had a query string");
127                 return false;
128             } else if (!neverCache1_1ResponsesWithQueryString && !isExplicitlyCacheable(cacheControl, response)) {
129                 LOG.debug("Response is not cacheable as it is missing explicit caching headers");
130                 return false;
131             }
132         }
133 
134         if (cacheControl.isMustUnderstand() && !understoodStatusCode(code)) {
135             // must-understand cache directive overrides no-store
136             LOG.debug("Response contains a status code that the cache does not understand, so it's not cacheable");
137             return false;
138         }
139 
140         if (isExplicitlyNonCacheable(cacheControl)) {
141             LOG.debug("Response is explicitly non-cacheable per cache control directive");
142             return false;
143         }
144 
145         if (sharedCache) {
146             if (request.containsHeader(HttpHeaders.AUTHORIZATION) &&
147                     cacheControl.getSharedMaxAge() == -1 &&
148                     !cacheControl.isPublic()) {
149                 LOG.debug("Request contains private credentials");
150                 return false;
151             }
152         }
153 
154         // See if the response is tainted
155         if (response.countHeaders(HttpHeaders.EXPIRES) > 1) {
156             LOG.debug("Multiple Expires headers");
157             return false;
158         }
159 
160         if (response.countHeaders(HttpHeaders.DATE) > 1) {
161             LOG.debug("Multiple Date headers");
162             return false;
163         }
164 
165         final Instant responseDate = DateUtils.parseStandardDate(response, HttpHeaders.DATE);
166         final Instant responseExpires = DateUtils.parseStandardDate(response, HttpHeaders.EXPIRES);
167 
168         if (expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(cacheControl, responseDate, responseExpires)) {
169             LOG.debug("Expires header less or equal to Date header and no cache control directives");
170             return false;
171         }
172 
173         // Treat responses with `Vary: *` as essentially non-cacheable.
174         final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VARY);
175         while (it.hasNext()) {
176             final String token = it.next();
177             if ("*".equals(token)) {
178                 if (LOG.isDebugEnabled()) {
179                     LOG.debug("Vary: * found");
180                 }
181                 return false;
182             }
183         }
184 
185         return isExplicitlyCacheable(cacheControl, response) || isHeuristicallyCacheable(cacheControl, code, responseDate, responseExpires);
186     }
187 
188     private static boolean isKnownCacheableStatusCode(final int status) {
189         return status == HttpStatus.SC_OK ||
190                 status == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION ||
191                 status == HttpStatus.SC_MULTIPLE_CHOICES ||
192                 status == HttpStatus.SC_MOVED_PERMANENTLY ||
193                 status == HttpStatus.SC_GONE;
194     }
195 
196     private static boolean isKnownNonCacheableStatusCode(final int status) {
197         return status == HttpStatus.SC_PARTIAL_CONTENT;
198     }
199 
200     private static boolean isUnknownStatusCode(final int status) {
201         if (status >= 100 && status <= 101) {
202             return false;
203         }
204         if (status >= 200 && status <= 206) {
205             return false;
206         }
207         if (status >= 300 && status <= 307) {
208             return false;
209         }
210         if (status >= 400 && status <= 417) {
211             return false;
212         }
213         return status < 500 || status > 505;
214     }
215 
216     /**
217      * Determines whether the given CacheControl object indicates that the response is explicitly non-cacheable.
218      *
219      * @param cacheControl the CacheControl object representing the cache-control directive(s) from the HTTP response.
220      * @return true if the response is explicitly non-cacheable according to the cache-control directive(s),
221      * false otherwise.
222      * <p>
223      * When cacheControl is non-null:
224      * - Returns true if the response contains "no-store" or (if sharedCache is true) "private" cache-control directives.
225      * - If the response contains the "no-cache" directive, it is considered cacheable, but requires validation against
226      * the origin server before use. In this case, the method returns false.
227      * - Returns false for other cache-control directives, implying the response is cacheable.
228      * <p>
229      * When cacheControl is null, returns false, implying the response is cacheable.
230      */
231     protected boolean isExplicitlyNonCacheable(final ResponseCacheControl cacheControl) {
232         if (cacheControl == null) {
233             return false;
234         } else {
235             // The response is considered explicitly non-cacheable if it contains
236             // "no-store" or (if sharedCache is true) "private" directives.
237             // Note that "no-cache" is considered cacheable but requires validation before use.
238             return cacheControl.isNoStore() || (sharedCache && cacheControl.isCachePrivate());
239         }
240     }
241 
242     protected boolean isExplicitlyCacheable(final ResponseCacheControl cacheControl, final HttpResponse response) {
243         if (cacheControl.isPublic()) {
244             return true;
245         }
246         if (!sharedCache && cacheControl.isCachePrivate()) {
247             return true;
248         }
249         if (response.containsHeader(HttpHeaders.EXPIRES)) {
250             return true;
251         }
252         if (cacheControl.getMaxAge() > 0) {
253             return true;
254         }
255         if (sharedCache && cacheControl.getSharedMaxAge() > 0) {
256             return true;
257         }
258         return false;
259     }
260 
261     protected boolean isHeuristicallyCacheable(final ResponseCacheControl cacheControl,
262                                                final int status,
263                                                final Instant responseDate,
264                                                final Instant responseExpires) {
265         if (isKnownCacheableStatusCode(status)) {
266             final Duration freshnessLifetime = calculateFreshnessLifetime(cacheControl, responseDate, responseExpires);
267             // calculate freshness lifetime
268             if (freshnessLifetime.isNegative()) {
269                 if (LOG.isDebugEnabled()) {
270                     LOG.debug("Freshness lifetime is invalid");
271                 }
272                 return false;
273             }
274             // If the 'immutable' directive is present and the response is still fresh,
275             // then the response is considered cacheable without further validation
276             if (cacheControl.isImmutable() && responseIsStillFresh(responseDate, freshnessLifetime)) {
277                 if (LOG.isDebugEnabled()) {
278                     LOG.debug("Response is immutable and fresh, considered cacheable without further validation");
279                 }
280                 return true;
281             }
282             if (freshnessLifetime.compareTo(Duration.ZERO) > 0) {
283                 return true;
284             }
285         } else if (isUnknownStatusCode(status)) {
286             // a response with an unknown status code MUST NOT be
287             // cached
288             if (LOG.isDebugEnabled()) {
289                 LOG.debug("{} response is unknown", status);
290             }
291             return false;
292         }
293         return false;
294     }
295 
296     private boolean expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant expires) {
297         if (!cacheControl.isUndefined()) {
298             return false;
299         }
300         if (expires == null || responseDate == null) {
301             return false;
302         }
303         return expires.compareTo(responseDate) <= 0;
304     }
305 
306     private boolean from1_0Origin(final HttpResponse response) {
307         final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VIA);
308         if (it.hasNext()) {
309             final String token = it.next();
310             return token.startsWith("1.0 ") || token.startsWith("HTTP/1.0 ");
311         }
312         final ProtocolVersion version = response.getVersion() != null ? response.getVersion() : HttpVersion.DEFAULT;
313         return HttpVersion.HTTP_1_0.equals(version);
314     }
315 
316     /**
317      * Calculates the freshness lifetime of a response, based on the headers in the response.
318      * <p>
319      * This method follows the algorithm for calculating the freshness lifetime.
320      * The freshness lifetime represents the time interval in seconds during which the response can be served without
321      * being considered stale. The freshness lifetime calculation takes into account the s-maxage, max-age, Expires, and
322      * Date headers as follows:
323      * <ul>
324      * <li>If the s-maxage directive is present in the Cache-Control header of the response, its value is used as the
325      * freshness lifetime for shared caches, which typically serve multiple users or clients.</li>
326      * <li>If the max-age directive is present in the Cache-Control header of the response, its value is used as the
327      * freshness lifetime for private caches, which serve a single user or client.</li>
328      * <li>If the Expires header is present in the response, its value is used as the expiration time of the response.
329      * The freshness lifetime is calculated as the difference between the expiration time and the time specified in the
330      * Date header of the response.</li>
331      * <li>If none of the above headers are present or if the calculated freshness lifetime is invalid, a default value of
332      * 5 minutes is returned.</li>
333      * </ul>
334      *
335      * <p>
336      * Note that caching is a complex topic and cache control directives may interact with each other in non-trivial ways.
337      * This method provides a basic implementation of the freshness lifetime calculation algorithm and may not be suitable
338      * for all use cases. Developers should consult the HTTP caching specifications for more information and consider
339      * implementing additional caching mechanisms as needed.
340      * </p>
341      */
342     private Duration calculateFreshnessLifetime(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant responseExpires) {
343 
344         if (cacheControl.isUndefined()) {
345             // If no cache-control header is present, assume no caching directives and return a default value
346             return DEFAULT_FRESHNESS_DURATION; // 5 minutes
347         }
348 
349         // Check if s-maxage is present and use its value if it is
350         if (cacheControl.getSharedMaxAge() != -1) {
351             return Duration.ofSeconds(cacheControl.getSharedMaxAge());
352         } else if (cacheControl.getMaxAge() != -1) {
353             return Duration.ofSeconds(cacheControl.getMaxAge());
354         }
355 
356         if (responseDate != null && responseExpires != null) {
357             return Duration.ofSeconds(responseExpires.getEpochSecond() - responseDate.getEpochSecond());
358         }
359 
360         // If none of the above conditions are met, a heuristic freshness lifetime might be applicable
361         return DEFAULT_FRESHNESS_DURATION; // 5 minutes
362     }
363 
364     /**
365      * Understood status codes include:
366      * - All 2xx (Successful) status codes (200-299)
367      * - All 3xx (Redirection) status codes (300-399)
368      * - All 4xx (Client Error) status codes up to 417 and 421
369      * - All 5xx (Server Error) status codes up to 505
370      *
371      * @param status The HTTP status code to be checked.
372      * @return true if the HTTP status code is understood, false otherwise.
373      */
374     private boolean understoodStatusCode(final int status) {
375         return (status >= 200 && status <= 206)    ||
376                 (status >= 300 && status <= 399)   ||
377                 (status >= 400 && status <= 417)   ||
378                 (status == 421)                    ||
379                 (status >= 500 && status <= 505);
380     }
381 
382     /**
383      * Determines if an HttpResponse is still fresh based on its Date header and calculated freshness lifetime.
384      *
385      * <p>
386      * This method calculates the age of the response from its Date header and compares it with the provided freshness
387      * lifetime. If the age is less than the freshness lifetime, the response is considered fresh.
388      * </p>
389      *
390      * <p>
391      * Note: If the Date header is missing or invalid, this method assumes the response is not fresh.
392      * </p>
393      *
394      * @param responseDate  The response date.
395      * @param freshnessLifetime The calculated freshness lifetime of the HttpResponse.
396      * @return {@code true} if the response age is less than its freshness lifetime, {@code false} otherwise.
397      */
398     private boolean responseIsStillFresh(final Instant responseDate, final Duration freshnessLifetime) {
399         if (responseDate == null) {
400             // The Date header is missing or invalid. Assuming the response is not fresh.
401             return false;
402         }
403         final Duration age = Duration.between(responseDate, Instant.now());
404         return age.compareTo(freshnessLifetime) < 0;
405     }
406 
407 }