View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.hc.client5.http.impl.cache;
28  
29  import java.time.Duration;
30  import java.time.Instant;
31  import java.util.Iterator;
32  
33  import org.apache.hc.client5.http.utils.DateUtils;
34  import org.apache.hc.core5.http.HttpHeaders;
35  import org.apache.hc.core5.http.HttpRequest;
36  import org.apache.hc.core5.http.HttpResponse;
37  import org.apache.hc.core5.http.HttpStatus;
38  import org.apache.hc.core5.http.HttpVersion;
39  import org.apache.hc.core5.http.Method;
40  import org.apache.hc.core5.http.ProtocolVersion;
41  import org.apache.hc.core5.http.message.MessageSupport;
42  import org.slf4j.Logger;
43  import org.slf4j.LoggerFactory;
44  
45  class ResponseCachingPolicy {
46  
47      /**
48       * The default freshness duration for a cached object, in seconds.
49       *
50       * <p>This constant is used to set the default value for the freshness lifetime of a cached object.
51       * When a new object is added to the cache, it will be assigned this duration if no other duration
52       * is specified.</p>
53       *
54       * <p>By default, this value is set to 300 seconds (5 minutes). Applications can customize this
55       * value as needed.</p>
56       */
57       private static final Duration DEFAULT_FRESHNESS_DURATION = Duration.ofMinutes(5);
58  
59      private static final Logger LOG = LoggerFactory.getLogger(ResponseCachingPolicy.class);
60  
61      private final boolean sharedCache;
62      private final boolean neverCache1_0ResponsesWithQueryString;
63      private final boolean neverCache1_1ResponsesWithQueryString;
64  
65      /**
66       * Constructs a new ResponseCachingPolicy with the specified cache policy settings and stale-if-error support.
67       *
68       * @param sharedCache                           whether to behave as a shared cache (true) or a
69       *                                              non-shared/private cache (false)
70       * @param neverCache1_0ResponsesWithQueryString {@code true} to never cache HTTP 1.0 responses with a query string,
71       *                                              {@code false} to cache if explicit cache headers are found.
72       * @param neverCache1_1ResponsesWithQueryString {@code true} to never cache HTTP 1.1 responses with a query string,
73       *                                              {@code false} to cache if explicit cache headers are found.
74       * @since 5.3
75       */
76      public ResponseCachingPolicy(
77               final boolean sharedCache,
78               final boolean neverCache1_0ResponsesWithQueryString,
79               final boolean neverCache1_1ResponsesWithQueryString) {
80          this.sharedCache = sharedCache;
81          this.neverCache1_0ResponsesWithQueryString = neverCache1_0ResponsesWithQueryString;
82          this.neverCache1_1ResponsesWithQueryString = neverCache1_1ResponsesWithQueryString;
83      }
84  
85      /**
86       * Determine if the {@link HttpResponse} gotten from the origin is a
87       * cacheable response.
88       *
89       * @return {@code true} if response is cacheable
90       */
91      public boolean isResponseCacheable(final ResponseCacheControl cacheControl, final HttpRequest request, final HttpResponse response) {
92          final ProtocolVersion version = request.getVersion() != null ? request.getVersion() : HttpVersion.DEFAULT;
93          if (version.compareToVersion(HttpVersion.HTTP_1_1) > 0) {
94              if (LOG.isDebugEnabled()) {
95                  LOG.debug("Protocol version {} is non-cacheable", version);
96              }
97              return false;
98          }
99  
100         // Presently only GET and HEAD methods are supported
101         final String httpMethod = request.getMethod();
102         if (!Method.GET.isSame(httpMethod) && !Method.HEAD.isSame(httpMethod)) {
103             if (LOG.isDebugEnabled()) {
104                 LOG.debug("{} method response is not cacheable", httpMethod);
105             }
106             return false;
107         }
108 
109         final int code = response.getCode();
110 
111         // Should never happen but better be defensive
112         if (code <= HttpStatus.SC_INFORMATIONAL) {
113             return false;
114         }
115 
116         if (isKnownNonCacheableStatusCode(code)) {
117             if (LOG.isDebugEnabled()) {
118                 LOG.debug("{} response is not cacheable", code);
119             }
120             return false;
121         }
122 
123         if (request.getPath().contains("?")) {
124             if (neverCache1_0ResponsesWithQueryString && from1_0Origin(response)) {
125                 LOG.debug("Response is not cacheable as it had a query string");
126                 return false;
127             } else if (!neverCache1_1ResponsesWithQueryString && !isExplicitlyCacheable(cacheControl, response)) {
128                 LOG.debug("Response is not cacheable as it is missing explicit caching headers");
129                 return false;
130             }
131         }
132 
133         if (cacheControl.isMustUnderstand() && !understoodStatusCode(code)) {
134             // must-understand cache directive overrides no-store
135             LOG.debug("Response contains a status code that the cache does not understand, so it's not cacheable");
136             return false;
137         }
138 
139         if (isExplicitlyNonCacheable(cacheControl)) {
140             LOG.debug("Response is explicitly non-cacheable per cache control directive");
141             return false;
142         }
143 
144         if (sharedCache) {
145             if (request.containsHeader(HttpHeaders.AUTHORIZATION) &&
146                     cacheControl.getSharedMaxAge() == -1 &&
147                     !cacheControl.isPublic()) {
148                 LOG.debug("Request contains private credentials");
149                 return false;
150             }
151         }
152 
153         // See if the response is tainted
154         if (response.countHeaders(HttpHeaders.EXPIRES) > 1) {
155             LOG.debug("Multiple Expires headers");
156             return false;
157         }
158 
159         if (response.countHeaders(HttpHeaders.DATE) > 1) {
160             LOG.debug("Multiple Date headers");
161             return false;
162         }
163 
164         final Instant responseDate = DateUtils.parseStandardDate(response, HttpHeaders.DATE);
165         final Instant responseExpires = DateUtils.parseStandardDate(response, HttpHeaders.EXPIRES);
166 
167         if (expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(cacheControl, responseDate, responseExpires)) {
168             LOG.debug("Expires header less or equal to Date header and no cache control directives");
169             return false;
170         }
171 
172         // Treat responses with `Vary: *` as essentially non-cacheable.
173         final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VARY);
174         while (it.hasNext()) {
175             final String token = it.next();
176             if ("*".equals(token)) {
177                 if (LOG.isDebugEnabled()) {
178                     LOG.debug("Vary: * found");
179                 }
180                 return false;
181             }
182         }
183 
184         return isExplicitlyCacheable(cacheControl, response) || isHeuristicallyCacheable(cacheControl, code, responseDate, responseExpires);
185     }
186 
187     private static boolean isKnownCacheableStatusCode(final int status) {
188         return status == HttpStatus.SC_OK ||
189                 status == HttpStatus.SC_NON_AUTHORITATIVE_INFORMATION ||
190                 status == HttpStatus.SC_MULTIPLE_CHOICES ||
191                 status == HttpStatus.SC_MOVED_PERMANENTLY ||
192                 status == HttpStatus.SC_GONE;
193     }
194 
195     private static boolean isKnownNonCacheableStatusCode(final int status) {
196         return status == HttpStatus.SC_PARTIAL_CONTENT;
197     }
198 
199     private static boolean isUnknownStatusCode(final int status) {
200         if (status >= 100 && status <= 101) {
201             return false;
202         }
203         if (status >= 200 && status <= 206) {
204             return false;
205         }
206         if (status >= 300 && status <= 307) {
207             return false;
208         }
209         if (status >= 400 && status <= 417) {
210             return false;
211         }
212         return status < 500 || status > 505;
213     }
214 
215     /**
216      * Determines whether the given CacheControl object indicates that the response is explicitly non-cacheable.
217      *
218      * @param cacheControl the CacheControl object representing the cache-control directive(s) from the HTTP response.
219      * @return true if the response is explicitly non-cacheable according to the cache-control directive(s),
220      * false otherwise.
221      * <p>
222      * When cacheControl is non-null:
223      * - Returns true if the response contains "no-store" or (if sharedCache is true) "private" cache-control directives.
224      * - If the response contains the "no-cache" directive, it is considered cacheable, but requires validation against
225      * the origin server before use. In this case, the method returns false.
226      * - Returns false for other cache-control directives, implying the response is cacheable.
227      * <p>
228      * When cacheControl is null, returns false, implying the response is cacheable.
229      */
230     protected boolean isExplicitlyNonCacheable(final ResponseCacheControl cacheControl) {
231         if (cacheControl == null) {
232             return false;
233         } else {
234             // The response is considered explicitly non-cacheable if it contains
235             // "no-store" or (if sharedCache is true) "private" directives.
236             // Note that "no-cache" is considered cacheable but requires validation before use.
237             return cacheControl.isNoStore() || (sharedCache && cacheControl.isCachePrivate());
238         }
239     }
240 
241     protected boolean isExplicitlyCacheable(final ResponseCacheControl cacheControl, final HttpResponse response) {
242         if (cacheControl.isPublic()) {
243             return true;
244         }
245         if (!sharedCache && cacheControl.isCachePrivate()) {
246             return true;
247         }
248         if (response.containsHeader(HttpHeaders.EXPIRES)) {
249             return true;
250         }
251         if (cacheControl.getMaxAge() > 0) {
252             return true;
253         }
254         if (sharedCache && cacheControl.getSharedMaxAge() > 0) {
255             return true;
256         }
257         return false;
258     }
259 
260     protected boolean isHeuristicallyCacheable(final ResponseCacheControl cacheControl,
261                                                final int status,
262                                                final Instant responseDate,
263                                                final Instant responseExpires) {
264         if (isKnownCacheableStatusCode(status)) {
265             final Duration freshnessLifetime = calculateFreshnessLifetime(cacheControl, responseDate, responseExpires);
266             // calculate freshness lifetime
267             if (freshnessLifetime.isNegative()) {
268                 if (LOG.isDebugEnabled()) {
269                     LOG.debug("Freshness lifetime is invalid");
270                 }
271                 return false;
272             }
273             // If the 'immutable' directive is present and the response is still fresh,
274             // then the response is considered cacheable without further validation
275             if (cacheControl.isImmutable() && responseIsStillFresh(responseDate, freshnessLifetime)) {
276                 if (LOG.isDebugEnabled()) {
277                     LOG.debug("Response is immutable and fresh, considered cacheable without further validation");
278                 }
279                 return true;
280             }
281             if (freshnessLifetime.compareTo(Duration.ZERO) > 0) {
282                 return true;
283             }
284         } else if (isUnknownStatusCode(status)) {
285             // a response with an unknown status code MUST NOT be
286             // cached
287             if (LOG.isDebugEnabled()) {
288                 LOG.debug("{} response is unknown", status);
289             }
290             return false;
291         }
292         return false;
293     }
294 
295     private boolean expiresHeaderLessOrEqualToDateHeaderAndNoCacheControl(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant expires) {
296         if (!cacheControl.isUndefined()) {
297             return false;
298         }
299         if (expires == null || responseDate == null) {
300             return false;
301         }
302         return expires.compareTo(responseDate) <= 0;
303     }
304 
305     private boolean from1_0Origin(final HttpResponse response) {
306         final Iterator<String> it = MessageSupport.iterateTokens(response, HttpHeaders.VIA);
307         if (it.hasNext()) {
308             final String token = it.next();
309             return token.startsWith("1.0 ") || token.startsWith("HTTP/1.0 ");
310         }
311         final ProtocolVersion version = response.getVersion() != null ? response.getVersion() : HttpVersion.DEFAULT;
312         return HttpVersion.HTTP_1_0.equals(version);
313     }
314 
315     /**
316      * Calculates the freshness lifetime of a response, based on the headers in the response.
317      * <p>
318      * This method follows the algorithm for calculating the freshness lifetime.
319      * The freshness lifetime represents the time interval in seconds during which the response can be served without
320      * being considered stale. The freshness lifetime calculation takes into account the s-maxage, max-age, Expires, and
321      * Date headers as follows:
322      * <ul>
323      * <li>If the s-maxage directive is present in the Cache-Control header of the response, its value is used as the
324      * freshness lifetime for shared caches, which typically serve multiple users or clients.</li>
325      * <li>If the max-age directive is present in the Cache-Control header of the response, its value is used as the
326      * freshness lifetime for private caches, which serve a single user or client.</li>
327      * <li>If the Expires header is present in the response, its value is used as the expiration time of the response.
328      * The freshness lifetime is calculated as the difference between the expiration time and the time specified in the
329      * Date header of the response.</li>
330      * <li>If none of the above headers are present or if the calculated freshness lifetime is invalid, a default value of
331      * 5 minutes is returned.</li>
332      * </ul>
333      *
334      * <p>
335      * Note that caching is a complex topic and cache control directives may interact with each other in non-trivial ways.
336      * This method provides a basic implementation of the freshness lifetime calculation algorithm and may not be suitable
337      * for all use cases. Developers should consult the HTTP caching specifications for more information and consider
338      * implementing additional caching mechanisms as needed.
339      * </p>
340      */
341     private Duration calculateFreshnessLifetime(final ResponseCacheControl cacheControl, final Instant responseDate, final Instant responseExpires) {
342 
343         if (cacheControl.isUndefined()) {
344             // If no cache-control header is present, assume no caching directives and return a default value
345             return DEFAULT_FRESHNESS_DURATION; // 5 minutes
346         }
347 
348         // Check if s-maxage is present and use its value if it is
349         if (cacheControl.getSharedMaxAge() != -1) {
350             return Duration.ofSeconds(cacheControl.getSharedMaxAge());
351         } else if (cacheControl.getMaxAge() != -1) {
352             return Duration.ofSeconds(cacheControl.getMaxAge());
353         }
354 
355         if (responseDate != null && responseExpires != null) {
356             return Duration.ofSeconds(responseExpires.getEpochSecond() - responseDate.getEpochSecond());
357         }
358 
359         // If none of the above conditions are met, a heuristic freshness lifetime might be applicable
360         return DEFAULT_FRESHNESS_DURATION; // 5 minutes
361     }
362 
363     /**
364      * Understood status codes include:
365      * - All 2xx (Successful) status codes (200-299)
366      * - All 3xx (Redirection) status codes (300-399)
367      * - All 4xx (Client Error) status codes up to 417 and 421
368      * - All 5xx (Server Error) status codes up to 505
369      *
370      * @param status The HTTP status code to be checked.
371      * @return true if the HTTP status code is understood, false otherwise.
372      */
373     private boolean understoodStatusCode(final int status) {
374         return (status >= 200 && status <= 206)    ||
375                 (status >= 300 && status <= 399)   ||
376                 (status >= 400 && status <= 417)   ||
377                 (status == 421)                    ||
378                 (status >= 500 && status <= 505);
379     }
380 
381     /**
382      * Determines if an HttpResponse is still fresh based on its Date header and calculated freshness lifetime.
383      *
384      * <p>
385      * This method calculates the age of the response from its Date header and compares it with the provided freshness
386      * lifetime. If the age is less than the freshness lifetime, the response is considered fresh.
387      * </p>
388      *
389      * <p>
390      * Note: If the Date header is missing or invalid, this method assumes the response is not fresh.
391      * </p>
392      *
393      * @param responseDate  The response date.
394      * @param freshnessLifetime The calculated freshness lifetime of the HttpResponse.
395      * @return {@code true} if the response age is less than its freshness lifetime, {@code false} otherwise.
396      */
397     private boolean responseIsStillFresh(final Instant responseDate, final Duration freshnessLifetime) {
398         if (responseDate == null) {
399             // The Date header is missing or invalid. Assuming the response is not fresh.
400             return false;
401         }
402         final Duration age = Duration.between(responseDate, Instant.now());
403         return age.compareTo(freshnessLifetime) < 0;
404     }
405 
406 }