1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package org.apache.hc.core5.util;
29
30 import java.util.BitSet;
31
32 import org.apache.hc.core5.annotation.Contract;
33 import org.apache.hc.core5.annotation.ThreadingBehavior;
34
35
36
37
38
39
40
41
42
43
44 @Contract(threading = ThreadingBehavior.IMMUTABLE)
45 public class Tokenizer {
46
47 public static class Cursor {
48
49 private final int lowerBound;
50 private final int upperBound;
51 private int pos;
52
53 public Cursor(final int lowerBound, final int upperBound) {
54 super();
55 Args.notNegative(lowerBound, "lowerBound");
56 Args.check(lowerBound <= upperBound, "lowerBound cannot be greater than upperBound");
57 this.lowerBound = lowerBound;
58 this.upperBound = upperBound;
59 this.pos = lowerBound;
60 }
61
62 public int getLowerBound() {
63 return this.lowerBound;
64 }
65
66 public int getUpperBound() {
67 return this.upperBound;
68 }
69
70 public int getPos() {
71 return this.pos;
72 }
73
74 public void updatePos(final int pos) {
75 Args.check(pos >= this.lowerBound, "pos: %s < lowerBound: %s", pos, this.lowerBound);
76 Args.check(pos <= this.upperBound, "pos: %s > upperBound: %s", pos, this.upperBound);
77 this.pos = pos;
78 }
79
80 public boolean atEnd() {
81 return this.pos >= this.upperBound;
82 }
83
84 @Override
85 public String toString() {
86 final StringBuilder buffer = new StringBuilder();
87 buffer.append('[');
88 buffer.append(this.lowerBound);
89 buffer.append('>');
90 buffer.append(this.pos);
91 buffer.append('>');
92 buffer.append(this.upperBound);
93 buffer.append(']');
94 return buffer.toString();
95 }
96
97 }
98
99 public static BitSet INIT_BITSET(final int ... b) {
100 final BitSet bitset = new BitSet();
101 for (final int aB : b) {
102 bitset.set(aB);
103 }
104 return bitset;
105 }
106
107
108 public static final char DQUOTE = '\"';
109
110
111 public static final char ESCAPE = '\\';
112
113 public static final int CR = 13;
114 public static final int LF = 10;
115 public static final int SP = 32;
116 public static final int HT = 9;
117
118 public static boolean isWhitespace(final char ch) {
119 return ch == SP || ch == HT || ch == CR || ch == LF;
120 }
121
122 public static final Tokenizer INSTANCE = new Tokenizer();
123
124
125
126
127
128
129
130
131
132
133 public String parseContent(final CharSequence buf, final Cursor cursor, final BitSet delimiters) {
134 Args.notNull(buf, "Char sequence");
135 Args.notNull(cursor, "Parser cursor");
136 final StringBuilder dst = new StringBuilder();
137 copyContent(buf, cursor, delimiters, dst);
138 return dst.toString();
139 }
140
141
142
143
144
145
146
147
148
149
150 public String parseToken(final CharSequence buf, final Cursor cursor, final BitSet delimiters) {
151 Args.notNull(buf, "Char sequence");
152 Args.notNull(cursor, "Parser cursor");
153 final StringBuilder dst = new StringBuilder();
154 boolean whitespace = false;
155 while (!cursor.atEnd()) {
156 final char current = buf.charAt(cursor.getPos());
157 if (delimiters != null && delimiters.get(current)) {
158 break;
159 } else if (isWhitespace(current)) {
160 skipWhiteSpace(buf, cursor);
161 whitespace = true;
162 } else {
163 if (whitespace && dst.length() > 0) {
164 dst.append(' ');
165 }
166 copyContent(buf, cursor, delimiters, dst);
167 whitespace = false;
168 }
169 }
170 return dst.toString();
171 }
172
173
174
175
176
177
178
179
180
181
182
183 public String parseValue(final CharSequence buf, final Cursor cursor, final BitSet delimiters) {
184 Args.notNull(buf, "Char sequence");
185 Args.notNull(cursor, "Parser cursor");
186 final StringBuilder dst = new StringBuilder();
187 boolean whitespace = false;
188 while (!cursor.atEnd()) {
189 final char current = buf.charAt(cursor.getPos());
190 if (delimiters != null && delimiters.get(current)) {
191 break;
192 } else if (isWhitespace(current)) {
193 skipWhiteSpace(buf, cursor);
194 whitespace = true;
195 } else if (current == DQUOTE) {
196 if (whitespace && dst.length() > 0) {
197 dst.append(' ');
198 }
199 copyQuotedContent(buf, cursor, dst);
200 whitespace = false;
201 } else {
202 if (whitespace && dst.length() > 0) {
203 dst.append(' ');
204 }
205 copyUnquotedContent(buf, cursor, delimiters, dst);
206 whitespace = false;
207 }
208 }
209 return dst.toString();
210 }
211
212
213
214
215
216
217
218
219 public void skipWhiteSpace(final CharSequence buf, final Cursor cursor) {
220 Args.notNull(buf, "Char sequence");
221 Args.notNull(cursor, "Parser cursor");
222 int pos = cursor.getPos();
223 final int indexFrom = cursor.getPos();
224 final int indexTo = cursor.getUpperBound();
225 for (int i = indexFrom; i < indexTo; i++) {
226 final char current = buf.charAt(i);
227 if (!isWhitespace(current)) {
228 break;
229 }
230 pos++;
231 }
232 cursor.updatePos(pos);
233 }
234
235
236
237
238
239
240
241
242
243
244
245 public void copyContent(final CharSequence buf, final Cursor cursor, final BitSet delimiters,
246 final StringBuilder dst) {
247 Args.notNull(buf, "Char sequence");
248 Args.notNull(cursor, "Parser cursor");
249 Args.notNull(dst, "String builder");
250 int pos = cursor.getPos();
251 final int indexFrom = cursor.getPos();
252 final int indexTo = cursor.getUpperBound();
253 for (int i = indexFrom; i < indexTo; i++) {
254 final char current = buf.charAt(i);
255 if ((delimiters != null && delimiters.get(current)) || isWhitespace(current)) {
256 break;
257 }
258 pos++;
259 dst.append(current);
260 }
261 cursor.updatePos(pos);
262 }
263
264
265
266
267
268
269
270
271
272
273
274 public void copyUnquotedContent(final CharSequence buf, final Cursor cursor,
275 final BitSet delimiters, final StringBuilder dst) {
276 Args.notNull(buf, "Char sequence");
277 Args.notNull(cursor, "Parser cursor");
278 Args.notNull(dst, "String builder");
279 int pos = cursor.getPos();
280 final int indexFrom = cursor.getPos();
281 final int indexTo = cursor.getUpperBound();
282 for (int i = indexFrom; i < indexTo; i++) {
283 final char current = buf.charAt(i);
284 if ((delimiters != null && delimiters.get(current))
285 || isWhitespace(current) || current == DQUOTE) {
286 break;
287 }
288 pos++;
289 dst.append(current);
290 }
291 cursor.updatePos(pos);
292 }
293
294
295
296
297
298
299
300
301 public void copyQuotedContent(final CharSequence buf, final Cursor cursor,
302 final StringBuilder dst) {
303 Args.notNull(buf, "Char sequence");
304 Args.notNull(cursor, "Parser cursor");
305 Args.notNull(dst, "String builder");
306 if (cursor.atEnd()) {
307 return;
308 }
309 int pos = cursor.getPos();
310 int indexFrom = cursor.getPos();
311 final int indexTo = cursor.getUpperBound();
312 char current = buf.charAt(pos);
313 if (current != DQUOTE) {
314 return;
315 }
316 pos++;
317 indexFrom++;
318 boolean escaped = false;
319 for (int i = indexFrom; i < indexTo; i++, pos++) {
320 current = buf.charAt(i);
321 if (escaped) {
322 if (current != DQUOTE && current != ESCAPE) {
323 dst.append(ESCAPE);
324 }
325 dst.append(current);
326 escaped = false;
327 } else {
328 if (current == DQUOTE) {
329 pos++;
330 break;
331 }
332 if (current == ESCAPE) {
333 escaped = true;
334 } else if (current != CR && current != LF) {
335 dst.append(current);
336 }
337 }
338 }
339 cursor.updatePos(pos);
340 }
341
342 }