View Javadoc
1   /*
2    * ====================================================================
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing,
14   * software distributed under the License is distributed on an
15   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16   * KIND, either express or implied.  See the License for the
17   * specific language governing permissions and limitations
18   * under the License.
19   * ====================================================================
20   *
21   * This software consists of voluntary contributions made by many
22   * individuals on behalf of the Apache Software Foundation.  For more
23   * information on the Apache Software Foundation, please see
24   * <http://www.apache.org/>.
25   *
26   */
27  
28  package org.apache.http.message;
29  
30  import java.util.BitSet;
31  
32  import org.apache.http.annotation.ThreadingBehavior;
33  import org.apache.http.annotation.Contract;
34  import org.apache.http.util.CharArrayBuffer;
35  
36  /**
37   * Low level parser for header field elements. The parsing routines of this class are designed
38   * to produce near zero intermediate garbage and make no intermediate copies of input data.
39   * <p>
40   * This class is immutable and thread safe.
41   *
42   * @since 4.4
43   */
44  @Contract(threading = ThreadingBehavior.IMMUTABLE)
45  public class TokenParser {
46  
47      public static BitSet INIT_BITSET(final int ... b) {
48          final BitSet bitset = new BitSet();
49          for (final int aB : b) {
50              bitset.set(aB);
51          }
52          return bitset;
53      }
54  
55      /** US-ASCII CR, carriage return (13) */
56      public static final char CR = '\r';
57  
58      /** US-ASCII LF, line feed (10) */
59      public static final char LF = '\n';
60  
61      /** US-ASCII SP, space (32) */
62      public static final char SP = ' ';
63  
64      /** US-ASCII HT, horizontal-tab (9) */
65      public static final char HT = '\t';
66  
67      /** Double quote */
68      public static final char DQUOTE = '\"';
69  
70      /** Backward slash / escape character */
71      public static final char ESCAPE = '\\';
72  
73      public static boolean isWhitespace(final char ch) {
74          return ch == SP || ch == HT || ch == CR || ch == LF;
75      }
76  
77      public static final TokenParser.html#TokenParser">TokenParser INSTANCE = new TokenParser();
78  
79      /**
80       * Extracts from the sequence of chars a token terminated with any of the given delimiters
81       * discarding semantically insignificant whitespace characters.
82       *
83       * @param buf buffer with the sequence of chars to be parsed
84       * @param cursor defines the bounds and current position of the buffer
85       * @param delimiters set of delimiting characters. Can be {@code null} if the token
86       *  is not delimited by any character.
87       */
88      public String parseToken(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
89          final StringBuilder dst = new StringBuilder();
90          boolean whitespace = false;
91          while (!cursor.atEnd()) {
92              final char current = buf.charAt(cursor.getPos());
93              if (delimiters != null && delimiters.get(current)) {
94                  break;
95              } else if (isWhitespace(current)) {
96                  skipWhiteSpace(buf, cursor);
97                  whitespace = true;
98              } else {
99                  if (whitespace && dst.length() > 0) {
100                     dst.append(' ');
101                 }
102                 copyContent(buf, cursor, delimiters, dst);
103                 whitespace = false;
104             }
105         }
106         return dst.toString();
107     }
108 
109     /**
110      * Extracts from the sequence of chars a value which can be enclosed in quote marks and
111      * terminated with any of the given delimiters discarding semantically insignificant
112      * whitespace characters.
113      *
114      * @param buf buffer with the sequence of chars to be parsed
115      * @param cursor defines the bounds and current position of the buffer
116      * @param delimiters set of delimiting characters. Can be {@code null} if the value
117      *  is not delimited by any character.
118      */
119     public String parseValue(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters) {
120         final StringBuilder dst = new StringBuilder();
121         boolean whitespace = false;
122         while (!cursor.atEnd()) {
123             final char current = buf.charAt(cursor.getPos());
124             if (delimiters != null && delimiters.get(current)) {
125                 break;
126             } else if (isWhitespace(current)) {
127                 skipWhiteSpace(buf, cursor);
128                 whitespace = true;
129             } else if (current == DQUOTE) {
130                 if (whitespace && dst.length() > 0) {
131                     dst.append(' ');
132                 }
133                 copyQuotedContent(buf, cursor, dst);
134                 whitespace = false;
135             } else {
136                 if (whitespace && dst.length() > 0) {
137                     dst.append(' ');
138                 }
139                 copyUnquotedContent(buf, cursor, delimiters, dst);
140                 whitespace = false;
141             }
142         }
143         return dst.toString();
144     }
145 
146     /**
147      * Skips semantically insignificant whitespace characters and moves the cursor to the closest
148      * non-whitespace character.
149      *
150      * @param buf buffer with the sequence of chars to be parsed
151      * @param cursor defines the bounds and current position of the buffer
152      */
153     public void skipWhiteSpace(final CharArrayBuffer buf, final ParserCursor cursor) {
154         int pos = cursor.getPos();
155         final int indexFrom = cursor.getPos();
156         final int indexTo = cursor.getUpperBound();
157         for (int i = indexFrom; i < indexTo; i++) {
158             final char current = buf.charAt(i);
159             if (!isWhitespace(current)) {
160                 break;
161             }
162             pos++;
163         }
164         cursor.updatePos(pos);
165     }
166 
167     /**
168      * Transfers content into the destination buffer until a whitespace character or any of
169      * the given delimiters is encountered.
170      *
171      * @param buf buffer with the sequence of chars to be parsed
172      * @param cursor defines the bounds and current position of the buffer
173      * @param delimiters set of delimiting characters. Can be {@code null} if the value
174      *  is delimited by a whitespace only.
175      * @param dst destination buffer
176      */
177     public void copyContent(final CharArrayBuffer buf, final ParserCursor cursor, final BitSet delimiters,
178             final StringBuilder dst) {
179         int pos = cursor.getPos();
180         final int indexFrom = cursor.getPos();
181         final int indexTo = cursor.getUpperBound();
182         for (int i = indexFrom; i < indexTo; i++) {
183             final char current = buf.charAt(i);
184             if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
185                 break;
186             }
187             pos++;
188             dst.append(current);
189         }
190         cursor.updatePos(pos);
191     }
192 
193     /**
194      * Transfers content into the destination buffer until a whitespace character,  a quote,
195      * or any of the given delimiters is encountered.
196      *
197      * @param buf buffer with the sequence of chars to be parsed
198      * @param cursor defines the bounds and current position of the buffer
199      * @param delimiters set of delimiting characters. Can be {@code null} if the value
200      *  is delimited by a whitespace or a quote only.
201      * @param dst destination buffer
202      */
203     public void copyUnquotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
204             final BitSet delimiters, final StringBuilder dst) {
205         int pos = cursor.getPos();
206         final int indexFrom = cursor.getPos();
207         final int indexTo = cursor.getUpperBound();
208         for (int i = indexFrom; i < indexTo; i++) {
209             final char current = buf.charAt(i);
210             if ((delimiters != null && delimiters.get(current))
211                     || isWhitespace(current) || current == DQUOTE) {
212                 break;
213             }
214             pos++;
215             dst.append(current);
216         }
217         cursor.updatePos(pos);
218     }
219 
220     /**
221      * Transfers content enclosed with quote marks into the destination buffer.
222      *
223      * @param buf buffer with the sequence of chars to be parsed
224      * @param cursor defines the bounds and current position of the buffer
225      * @param dst destination buffer
226      */
227     public void copyQuotedContent(final CharArrayBuffer buf, final ParserCursor cursor,
228             final StringBuilder dst) {
229         if (cursor.atEnd()) {
230             return;
231         }
232         int pos = cursor.getPos();
233         int indexFrom = cursor.getPos();
234         final int indexTo = cursor.getUpperBound();
235         char current = buf.charAt(pos);
236         if (current != DQUOTE) {
237             return;
238         }
239         pos++;
240         indexFrom++;
241         boolean escaped = false;
242         for (int i = indexFrom; i < indexTo; i++, pos++) {
243             current = buf.charAt(i);
244             if (escaped) {
245                 if (current != DQUOTE && current != ESCAPE) {
246                     dst.append(ESCAPE);
247                 }
248                 dst.append(current);
249                 escaped = false;
250             } else {
251                 if (current == DQUOTE) {
252                     pos++;
253                     break;
254                 }
255                 if (current == ESCAPE) {
256                     escaped = true;
257                 } else if (current != CR && current != LF) {
258                     dst.append(current);
259                 }
260             }
261         }
262         cursor.updatePos(pos);
263     }
264 
265 }