View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  package org.apache.hc.client5.http.impl.cache;
28  
29  import java.time.Instant;
30  import java.util.ArrayList;
31  import java.util.List;
32  import java.util.regex.Matcher;
33  import java.util.regex.Pattern;
34  
35  import org.apache.hc.client5.http.utils.DateUtils;
36  import org.apache.hc.core5.http.Header;
37  
38  /** This class provides for parsing and understanding Warning headers. As
39   * the Warning header can be multi-valued, but the values can contain
40   * separators like commas inside quoted strings, we cannot use the regular
41   * {@link Header#getValue()} } call to access the values.
42   */
43  class WarningValue {
44  
45      private int offs;
46      private final int init_offs;
47      private final String src;
48      private int warnCode;
49      private String warnAgent;
50      private String warnText;
51      private Instant warnDate;
52  
53      WarningValue(final String s) {
54          this(s, 0);
55      }
56  
57      WarningValue(final String s, final int offs) {
58          this.offs = this.init_offs = offs;
59          this.src = s;
60          consumeWarnValue();
61      }
62  
63      /** Returns an array of the parsable warning values contained
64       * in the given header value, which is assumed to be a
65       * Warning header. Improperly formatted warning values will be
66       * skipped, in keeping with the philosophy of "ignore what you
67       * cannot understand."
68       * @param h Warning {@link Header} to parse
69       * @return array of {@code WarnValue} objects
70       */
71      public static WarningValue[] getWarningValues(final Header h) {
72          final List<WarningValue> out = new ArrayList<>();
73          final String src = h.getValue();
74          int offs = 0;
75          while(offs < src.length()) {
76              try {
77                  final WarningValue wv = new WarningValue(src, offs);
78                  out.add(wv);
79                  offs = wv.offs;
80              } catch (final IllegalArgumentException e) {
81                  final int nextComma = src.indexOf(',', offs);
82                  if (nextComma == -1) {
83                      break;
84                  }
85                  offs = nextComma + 1;
86              }
87          }
88          final WarningValue[] wvs = {};
89          return out.toArray(wvs);
90      }
91  
92      /*
93       * LWS            = [CRLF] 1*( SP | HT )
94       * CRLF           = CR LF
95       */
96      protected void consumeLinearWhitespace() {
97          while(offs < src.length()) {
98              switch(src.charAt(offs)) {
99              case '\r':
100                 if (offs+2 >= src.length()
101                     || src.charAt(offs+1) != '\n'
102                     || (src.charAt(offs+2) != ' '
103                         && src.charAt(offs+2) != '\t')) {
104                     return;
105                 }
106                 offs += 2;
107                 break;
108             case ' ':
109             case '\t':
110                 break;
111             default:
112                 return;
113             }
114             offs++;
115         }
116     }
117 
118     /*
119      * CHAR           = <any US-ASCII character (octets 0 - 127)>
120      */
121     private boolean isChar(final char c) {
122         return ((int) c >= 0 && (int) c <= 127);
123     }
124 
125     /*
126      * CTL            = <any US-ASCII control character
127                         (octets 0 - 31) and DEL (127)>
128      */
129     private boolean isControl(final char c) {
130         return ((int) c == 127 || ((int) c >=0 && (int) c <= 31));
131     }
132 
133     /*
134      * separators     = "(" | ")" | "<" | ">" | "@"
135      *                | "," | ";" | ":" | "\" | <">
136      *                | "/" | "[" | "]" | "?" | "="
137      *                | "{" | "}" | SP | HT
138      */
139     private boolean isSeparator(final char c) {
140         return (c == '(' || c == ')' || c == '<' || c == '>'
141                 || c == '@' || c == ',' || c == ';' || c == ':'
142                 || c == '\\' || c == '\"' || c == '/'
143                 || c == '[' || c == ']' || c == '?' || c == '='
144                 || c == '{' || c == '}' || c == ' ' || c == '\t');
145     }
146 
147     /*
148      * token          = 1*<any CHAR except CTLs or separators>
149      */
150     protected void consumeToken() {
151         if (!isTokenChar(src.charAt(offs))) {
152             parseError();
153         }
154         while(offs < src.length()) {
155             if (!isTokenChar(src.charAt(offs))) {
156                 break;
157             }
158             offs++;
159         }
160     }
161 
162     private boolean isTokenChar(final char c) {
163         return (isChar(c) && !isControl(c) && !isSeparator(c));
164     }
165 
166     private static final String TOPLABEL = "\\p{Alpha}([\\p{Alnum}-]*\\p{Alnum})?";
167     private static final String DOMAINLABEL = "\\p{Alnum}([\\p{Alnum}-]*\\p{Alnum})?";
168     private static final String HOSTNAME = "(" + DOMAINLABEL + "\\.)*" + TOPLABEL + "\\.?";
169     private static final String IPV4ADDRESS = "\\d+\\.\\d+\\.\\d+\\.\\d+";
170     private static final String HOST = "(" + HOSTNAME + ")|(" + IPV4ADDRESS + ")";
171     private static final String PORT = "\\d*";
172     private static final String HOSTPORT = "(" + HOST + ")(\\:" + PORT + ")?";
173     private static final Pattern HOSTPORT_PATTERN = Pattern.compile(HOSTPORT);
174 
175     protected void consumeHostPort() {
176         final Matcher m = HOSTPORT_PATTERN.matcher(src.substring(offs));
177         if (!m.find()) {
178             parseError();
179         }
180         if (m.start() != 0) {
181             parseError();
182         }
183         offs += m.end();
184     }
185 
186 
187     /*
188      * warn-agent = ( host [ ":" port ] ) | pseudonym
189      * pseudonym         = token
190      */
191     protected void consumeWarnAgent() {
192         final int curr_offs = offs;
193         try {
194             consumeHostPort();
195             warnAgent = src.substring(curr_offs, offs);
196             consumeCharacter(' ');
197             return;
198         } catch (final IllegalArgumentException e) {
199             offs = curr_offs;
200         }
201         consumeToken();
202         warnAgent = src.substring(curr_offs, offs);
203         consumeCharacter(' ');
204     }
205 
206     /*
207      * quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
208      * qdtext         = <any TEXT except <">>
209      */
210     protected void consumeQuotedString() {
211         if (src.charAt(offs) != '\"') {
212             parseError();
213         }
214         offs++;
215         boolean foundEnd = false;
216         while(offs < src.length() && !foundEnd) {
217             final char c = src.charAt(offs);
218             if (offs + 1 < src.length() && c == '\\'
219                 && isChar(src.charAt(offs+1))) {
220                 offs += 2;    // consume quoted-pair
221             } else if (c == '\"') {
222                 foundEnd = true;
223                 offs++;
224             } else if (!isControl(c)) {
225                 offs++;
226             } else {
227                 parseError();
228             }
229         }
230         if (!foundEnd) {
231             parseError();
232         }
233     }
234 
235     /*
236      * warn-text  = quoted-string
237      */
238     protected void consumeWarnText() {
239         final int curr = offs;
240         consumeQuotedString();
241         warnText = src.substring(curr, offs);
242     }
243 
244     private static final String MONTH = "Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec";
245     private static final String WEEKDAY = "Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday";
246     private static final String WKDAY = "Mon|Tue|Wed|Thu|Fri|Sat|Sun";
247     private static final String TIME = "\\d{2}:\\d{2}:\\d{2}";
248     private static final String DATE3 = "(" + MONTH + ") ( |\\d)\\d";
249     private static final String DATE2 = "\\d{2}-(" + MONTH + ")-\\d{2}";
250     private static final String DATE1 = "\\d{2} (" + MONTH + ") \\d{4}";
251     private static final String ASCTIME_DATE = "(" + WKDAY + ") (" + DATE3 + ") (" + TIME + ") \\d{4}";
252     private static final String RFC850_DATE = "(" + WEEKDAY + "), (" + DATE2 + ") (" + TIME + ") GMT";
253     private static final String RFC1123_DATE = "(" + WKDAY + "), (" + DATE1 + ") (" + TIME + ") GMT";
254     private static final String HTTP_DATE = "(" + RFC1123_DATE + ")|(" + RFC850_DATE + ")|(" + ASCTIME_DATE + ")";
255     private static final String WARN_DATE = "\"(" + HTTP_DATE + ")\"";
256     private static final Pattern WARN_DATE_PATTERN = Pattern.compile(WARN_DATE);
257 
258     /*
259      * warn-date  = <"> HTTP-date <">
260      */
261     protected void consumeWarnDate() {
262         final int curr = offs;
263         final Matcher m = WARN_DATE_PATTERN.matcher(src.substring(offs));
264         if (!m.lookingAt()) {
265             parseError();
266         }
267         offs += m.end();
268         warnDate = DateUtils.parseStandardDate(src.substring(curr+1,offs-1));
269     }
270 
271     /*
272      * warning-value = warn-code SP warn-agent SP warn-text [SP warn-date]
273      */
274     protected void consumeWarnValue() {
275         consumeLinearWhitespace();
276         consumeWarnCode();
277         consumeWarnAgent();
278         consumeWarnText();
279         if (offs + 1 < src.length() && src.charAt(offs) == ' ' && src.charAt(offs+1) == '\"') {
280             consumeCharacter(' ');
281             consumeWarnDate();
282         }
283         consumeLinearWhitespace();
284         if (offs != src.length()) {
285             consumeCharacter(',');
286         }
287     }
288 
289     protected void consumeCharacter(final char c) {
290         if (offs + 1 > src.length()
291             || c != src.charAt(offs)) {
292             parseError();
293         }
294         offs++;
295     }
296 
297     /*
298      * warn-code  = 3DIGIT
299      */
300     protected void consumeWarnCode() {
301         if (offs + 4 > src.length()
302             || !Character.isDigit(src.charAt(offs))
303             || !Character.isDigit(src.charAt(offs + 1))
304             || !Character.isDigit(src.charAt(offs + 2))
305             || src.charAt(offs + 3) != ' ') {
306             parseError();
307         }
308         warnCode = Integer.parseInt(src.substring(offs,offs+3));
309         offs += 4;
310     }
311 
312     private void parseError() {
313         final String s = src.substring(init_offs);
314         throw new IllegalArgumentException("Bad warn code \"" + s + "\"");
315     }
316 
317     /** Returns the 3-digit code associated with this warning.
318      * @return {@code int}
319      */
320     public int getWarnCode() { return warnCode; }
321 
322     /** Returns the "warn-agent" string associated with this warning,
323      * which is either the name or pseudonym of the server that added
324      * this particular Warning header.
325      * @return {@link String}
326      */
327     public String getWarnAgent() { return warnAgent; }
328 
329     /** Returns the human-readable warning text for this warning. Note
330      * that the original quoted-string is returned here, including
331      * escaping for any contained characters. In other words, if the
332      * header was:
333      * <pre>
334      *   Warning: 110 fred "Response is stale"
335      * </pre>
336      * then this method will return {@code "\"Response is stale\""}
337      * (surrounding quotes included).
338      * @return {@link String}
339      */
340     public String getWarnText() { return warnText; }
341 
342     /** Returns the date and time when this warning was added, or
343      * {@code null} if a warning date was not supplied in the
344      * header.
345      * @return {@link Instant}
346      */
347     public Instant getWarnDate() { return warnDate; }
348 
349     /** Formats a {@code WarningValue} as a {@link String}
350      * suitable for including in a header. For example, you can:
351      * <pre>
352      *   WarningValue wv = ...;
353      *   HttpResponse resp = ...;
354      *   resp.addHeader("Warning", wv.toString());
355      * </pre>
356      * @return {@link String}
357      */
358     @Override
359     public String toString() {
360         if (warnDate != null) {
361             return String.format("%d %s %s \"%s\"", warnCode,
362                     warnAgent, warnText, DateUtils.formatStandardDate(warnDate));
363         } else {
364             return String.format("%d %s %s", warnCode, warnAgent, warnText);
365         }
366     }
367 
368 }