1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 package org.apache.hc.core5.net;
29
30 import java.net.URI;
31 import java.nio.ByteBuffer;
32 import java.nio.CharBuffer;
33 import java.nio.charset.Charset;
34 import java.nio.charset.StandardCharsets;
35 import java.util.ArrayList;
36 import java.util.Arrays;
37 import java.util.BitSet;
38 import java.util.Collections;
39 import java.util.List;
40
41 import org.apache.hc.core5.http.NameValuePair;
42 import org.apache.hc.core5.http.message.BasicNameValuePair;
43 import org.apache.hc.core5.http.message.ParserCursor;
44 import org.apache.hc.core5.http.message.TokenParser;
45 import org.apache.hc.core5.util.Args;
46
47
48
49
50
51
52 public class URLEncodedUtils {
53
54 private static final char QP_SEP_A = '&';
55 private static final char QP_SEP_S = ';';
56 private static final String NAME_VALUE_SEPARATOR = "=";
57 private static final char PATH_SEPARATOR = '/';
58
59 private static final BitSet PATH_SEPARATORS = new BitSet(256);
60 static {
61 PATH_SEPARATORS.set(PATH_SEPARATOR);
62 }
63
64
65
66
67
68
69
70
71
72
73
74 public static List<NameValuePair> parse(final URI uri, final Charset charset) {
75 Args.notNull(uri, "URI");
76 final String query = uri.getRawQuery();
77 if (query != null && !query.isEmpty()) {
78 return parse(query, charset);
79 }
80 return createEmptyList();
81 }
82
83
84
85
86
87
88
89
90
91
92
93 public static List<NameValuePair> parse(final CharSequence s, final Charset charset) {
94 if (s == null) {
95 return createEmptyList();
96 }
97 return parse(s, charset, QP_SEP_A, QP_SEP_S);
98 }
99
100
101
102
103
104
105
106
107
108
109
110 public static List<NameValuePair> parse(
111 final CharSequence s, final Charset charset, final char... separators) {
112 Args.notNull(s, "Char sequence");
113 final TokenParser tokenParser = TokenParser.INSTANCE;
114 final BitSet delimSet = new BitSet();
115 for (final char separator: separators) {
116 delimSet.set(separator);
117 }
118 final ParserCursore/ParserCursor.html#ParserCursor">ParserCursor cursor = new ParserCursor(0, s.length());
119 final List<NameValuePair> list = new ArrayList<>();
120 while (!cursor.atEnd()) {
121 delimSet.set('=');
122 final String name = tokenParser.parseToken(s, cursor, delimSet);
123 String value = null;
124 if (!cursor.atEnd()) {
125 final int delim = s.charAt(cursor.getPos());
126 cursor.updatePos(cursor.getPos() + 1);
127 if (delim == '=') {
128 delimSet.clear('=');
129 value = tokenParser.parseToken(s, cursor, delimSet);
130 if (!cursor.atEnd()) {
131 cursor.updatePos(cursor.getPos() + 1);
132 }
133 }
134 }
135 if (!name.isEmpty()) {
136 list.add(new BasicNameValuePair(
137 decodeFormFields(name, charset),
138 decodeFormFields(value, charset)));
139 }
140 }
141 return list;
142 }
143
144 static List<String> splitSegments(final CharSequence s, final BitSet separators) {
145 final ParserCursore/ParserCursor.html#ParserCursor">ParserCursor cursor = new ParserCursor(0, s.length());
146
147 if (cursor.atEnd()) {
148 return Collections.emptyList();
149 }
150 if (separators.get(s.charAt(cursor.getPos()))) {
151 cursor.updatePos(cursor.getPos() + 1);
152 }
153 final List<String> list = new ArrayList<>();
154 final StringBuilder buf = new StringBuilder();
155 for (;;) {
156 if (cursor.atEnd()) {
157 list.add(buf.toString());
158 break;
159 }
160 final char current = s.charAt(cursor.getPos());
161 if (separators.get(current)) {
162 list.add(buf.toString());
163 buf.setLength(0);
164 } else {
165 buf.append(current);
166 }
167 cursor.updatePos(cursor.getPos() + 1);
168 }
169 return list;
170 }
171
172 static List<String> splitPathSegments(final CharSequence s) {
173 return splitSegments(s, PATH_SEPARATORS);
174 }
175
176
177
178
179
180
181
182
183
184
185 public static List<String> parsePathSegments(final CharSequence s, final Charset charset) {
186 Args.notNull(s, "Char sequence");
187 final List<String> list = splitPathSegments(s);
188 for (int i = 0; i < list.size(); i++) {
189 list.set(i, urlDecode(list.get(i), charset != null ? charset : StandardCharsets.UTF_8, false));
190 }
191 return list;
192 }
193
194
195
196
197
198
199
200
201
202 public static List<String> parsePathSegments(final CharSequence s) {
203 return parsePathSegments(s, StandardCharsets.UTF_8);
204 }
205
206 static void formatSegments(final StringBuilder buf, final Iterable<String> segments, final Charset charset) {
207 for (final String segment : segments) {
208 buf.append(PATH_SEPARATOR);
209 urlEncode(buf, segment, charset, PATHSAFE, false);
210 }
211 }
212
213
214
215
216
217
218
219
220
221
222 public static String formatSegments(final Iterable<String> segments, final Charset charset) {
223 Args.notNull(segments, "Segments");
224 final StringBuilder buf = new StringBuilder();
225 formatSegments(buf, segments, charset);
226 return buf.toString();
227 }
228
229
230
231
232
233
234
235
236
237 public static String formatSegments(final String... segments) {
238 return formatSegments(Arrays.asList(segments), StandardCharsets.UTF_8);
239 }
240
241 static void formatNameValuePairs(
242 final StringBuilder buf,
243 final Iterable<? extends NameValuePair> parameters,
244 final char parameterSeparator,
245 final Charset charset) {
246 int i = 0;
247 for (final NameValuePair parameter : parameters) {
248 if (i > 0) {
249 buf.append(parameterSeparator);
250 }
251 encodeFormFields(buf, parameter.getName(), charset);
252 if (parameter.getValue() != null) {
253 buf.append(NAME_VALUE_SEPARATOR);
254 encodeFormFields(buf, parameter.getValue(), charset);
255 }
256 i++;
257 }
258 }
259
260 static void formatParameters(
261 final StringBuilder buf,
262 final Iterable<? extends NameValuePair> parameters,
263 final Charset charset) {
264 formatNameValuePairs(buf, parameters, QP_SEP_A, charset);
265 }
266
267
268
269
270
271
272
273
274
275
276
277
278 public static String format(
279 final Iterable<? extends NameValuePair> parameters,
280 final char parameterSeparator,
281 final Charset charset) {
282 Args.notNull(parameters, "Parameters");
283 final StringBuilder buf = new StringBuilder();
284 formatNameValuePairs(buf, parameters, parameterSeparator, charset);
285 return buf.toString();
286 }
287
288
289
290
291
292
293
294
295
296
297
298 public static String format(
299 final Iterable<? extends NameValuePair> parameters,
300 final Charset charset) {
301 return format(parameters, QP_SEP_A, charset);
302 }
303
304
305
306
307
308
309
310 private static final BitSet UNRESERVED = new BitSet(256);
311
312
313
314
315
316 private static final BitSet PUNCT = new BitSet(256);
317
318
319 private static final BitSet USERINFO = new BitSet(256);
320
321
322 private static final BitSet PATHSAFE = new BitSet(256);
323
324
325 private static final BitSet URIC = new BitSet(256);
326
327
328
329
330
331
332
333
334
335 private static final BitSet RESERVED = new BitSet(256);
336
337
338
339
340
341
342 private static final BitSet URLENCODER = new BitSet(256);
343
344 private static final BitSet PATH_SPECIAL = new BitSet(256);
345
346 static {
347
348
349 for (int i = 'a'; i <= 'z'; i++) {
350 UNRESERVED.set(i);
351 }
352 for (int i = 'A'; i <= 'Z'; i++) {
353 UNRESERVED.set(i);
354 }
355
356 for (int i = '0'; i <= '9'; i++) {
357 UNRESERVED.set(i);
358 }
359 UNRESERVED.set('_');
360 UNRESERVED.set('-');
361 UNRESERVED.set('.');
362 UNRESERVED.set('*');
363 URLENCODER.or(UNRESERVED);
364 UNRESERVED.set('!');
365 UNRESERVED.set('~');
366 UNRESERVED.set('\'');
367 UNRESERVED.set('(');
368 UNRESERVED.set(')');
369
370 PUNCT.set(',');
371 PUNCT.set(';');
372 PUNCT.set(':');
373 PUNCT.set('$');
374 PUNCT.set('&');
375 PUNCT.set('+');
376 PUNCT.set('=');
377
378 USERINFO.or(UNRESERVED);
379 USERINFO.or(PUNCT);
380
381
382 PATHSAFE.or(UNRESERVED);
383 PATHSAFE.set(';');
384 PATHSAFE.set(':');
385 PATHSAFE.set('@');
386 PATHSAFE.set('&');
387 PATHSAFE.set('=');
388 PATHSAFE.set('+');
389 PATHSAFE.set('$');
390 PATHSAFE.set(',');
391
392 PATH_SPECIAL.or(PATHSAFE);
393 PATH_SPECIAL.set('/');
394
395 RESERVED.set(';');
396 RESERVED.set('/');
397 RESERVED.set('?');
398 RESERVED.set(':');
399 RESERVED.set('@');
400 RESERVED.set('&');
401 RESERVED.set('=');
402 RESERVED.set('+');
403 RESERVED.set('$');
404 RESERVED.set(',');
405 RESERVED.set('[');
406 RESERVED.set(']');
407
408 URIC.or(RESERVED);
409 URIC.or(UNRESERVED);
410 }
411
412 private static final int RADIX = 16;
413
414 private static List<NameValuePair> createEmptyList() {
415 return new ArrayList<>(0);
416 }
417
418 private static void urlEncode(
419 final StringBuilder buf,
420 final String content,
421 final Charset charset,
422 final BitSet safechars,
423 final boolean blankAsPlus) {
424 if (content == null) {
425 return;
426 }
427 final ByteBuffer bb = charset.encode(content);
428 while (bb.hasRemaining()) {
429 final int b = bb.get() & 0xff;
430 if (safechars.get(b)) {
431 buf.append((char) b);
432 } else if (blankAsPlus && b == ' ') {
433 buf.append('+');
434 } else {
435 buf.append("%");
436 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
437 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
438 buf.append(hex1);
439 buf.append(hex2);
440 }
441 }
442 }
443
444 private static String urlDecode(
445 final String content,
446 final Charset charset,
447 final boolean plusAsBlank) {
448 if (content == null) {
449 return null;
450 }
451 final ByteBuffer bb = ByteBuffer.allocate(content.length());
452 final CharBuffer cb = CharBuffer.wrap(content);
453 while (cb.hasRemaining()) {
454 final char c = cb.get();
455 if (c == '%' && cb.remaining() >= 2) {
456 final char uc = cb.get();
457 final char lc = cb.get();
458 final int u = Character.digit(uc, 16);
459 final int l = Character.digit(lc, 16);
460 if (u != -1 && l != -1) {
461 bb.put((byte) ((u << 4) + l));
462 } else {
463 bb.put((byte) '%');
464 bb.put((byte) uc);
465 bb.put((byte) lc);
466 }
467 } else if (plusAsBlank && c == '+') {
468 bb.put((byte) ' ');
469 } else {
470 bb.put((byte) c);
471 }
472 }
473 bb.flip();
474 return charset.decode(bb).toString();
475 }
476
477 static String decodeFormFields(final String content, final Charset charset) {
478 if (content == null) {
479 return null;
480 }
481 return urlDecode(content, charset != null ? charset : StandardCharsets.UTF_8, true);
482 }
483
484 static void encodeFormFields(final StringBuilder buf, final String content, final Charset charset) {
485 if (content == null) {
486 return;
487 }
488 urlEncode(buf, content, charset != null ? charset : StandardCharsets.UTF_8, URLENCODER, true);
489 }
490
491 static void encUserInfo(final StringBuilder buf, final String content, final Charset charset) {
492 urlEncode(buf, content, charset != null ? charset : StandardCharsets.UTF_8, USERINFO, false);
493 }
494
495 static void encUric(final StringBuilder buf, final String content, final Charset charset) {
496 urlEncode(buf, content, charset != null ? charset : StandardCharsets.UTF_8, URIC, false);
497 }
498
499 }