1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 package org.apache.commons.httpclient.util;
32
33 import java.util.BitSet;
34
35 import org.apache.commons.codec.DecoderException;
36 import org.apache.commons.codec.net.URLCodec;
37 import org.apache.commons.httpclient.URI;
38 import org.apache.commons.httpclient.URIException;
39
40 /***
41 * The URI escape and character encoding and decoding utility.
42 * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
43 * than {@link org.apache.commons.httpclient.URI}.
44 *
45 * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
46 * @version $Revision$ $Date: 2002/03/14 15:14:01
47 */
48 public class URIUtil {
49
50
51
52 protected static final BitSet empty = new BitSet(1);
53
54
55
56 /***
57 * Get the basename of an URI. It's possibly an empty string.
58 *
59 * @param uri a string regarded an URI
60 * @return the basename string; an empty string if the path ends with slash
61 */
62 public static String getName(String uri) {
63 if (uri == null || uri.length() == 0) { return uri; }
64 String path = URIUtil.getPath(uri);
65 int at = path.lastIndexOf("/");
66 int to = path.length();
67 return (at >= 0) ? path.substring(at + 1, to) : path;
68 }
69
70
71 /***
72 * Get the query of an URI.
73 *
74 * @param uri a string regarded an URI
75 * @return the query string; <code>null</code> if empty or undefined
76 */
77 public static String getQuery(String uri) {
78 if (uri == null || uri.length() == 0) { return null; }
79
80 int at = uri.indexOf("//");
81 int from = uri.indexOf(
82 "/",
83 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
84 );
85
86 int to = uri.length();
87
88 at = uri.indexOf("?", from);
89 if (at >= 0) {
90 from = at + 1;
91 } else {
92 return null;
93 }
94
95 if (uri.lastIndexOf("#") > from) {
96 to = uri.lastIndexOf("#");
97 }
98
99 return (from < 0 || from == to) ? null : uri.substring(from, to);
100 }
101
102
103 /***
104 * Get the path of an URI.
105 *
106 * @param uri a string regarded an URI
107 * @return the path string
108 */
109 public static String getPath(String uri) {
110 if (uri == null) {
111 return null;
112 }
113
114 int at = uri.indexOf("//");
115 int from = uri.indexOf(
116 "/",
117 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
118 );
119
120 int to = uri.length();
121
122 if (uri.indexOf('?', from) != -1) {
123 to = uri.indexOf('?', from);
124 }
125
126 if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
127 to = uri.lastIndexOf("#");
128 }
129
130 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
131 }
132
133
134 /***
135 * Get the path and query of an URI.
136 *
137 * @param uri a string regarded an URI
138 * @return the path and query string
139 */
140 public static String getPathQuery(String uri) {
141 if (uri == null) {
142 return null;
143 }
144
145 int at = uri.indexOf("//");
146 int from = uri.indexOf(
147 "/",
148 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
149 );
150
151 int to = uri.length();
152
153
154 if (uri.lastIndexOf("#") > from) {
155 to = uri.lastIndexOf("#");
156 }
157
158 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
159 }
160
161
162 /***
163 * Get the path of an URI and its rest part.
164 *
165 * @param uri a string regarded an URI
166 * @return the string from the path part
167 */
168 public static String getFromPath(String uri) {
169 if (uri == null) {
170 return null;
171 }
172
173 int at = uri.indexOf("//");
174 int from = uri.indexOf(
175 "/",
176 at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
177 );
178
179 return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
180 }
181
182
183
184 /***
185 * Get the all escaped and encoded string with the default protocl charset.
186 * It's the same function to use <code>encode(String unescaped, Bitset
187 * empty, URI.getDefaultProtocolCharset())</code>.
188 *
189 * @param unescaped an unescaped string
190 * @return the escaped string
191 *
192 * @throws URIException if the default protocol charset is not supported
193 *
194 * @see URI#getDefaultProtocolCharset
195 * @see #encode
196 */
197 public static String encodeAll(String unescaped) throws URIException {
198 return encodeAll(unescaped, URI.getDefaultProtocolCharset());
199 }
200
201
202 /***
203 * Get the all escaped and encoded string with a given charset.
204 * It's the same function to use <code>encode(String unescaped, Bitset
205 * empty, String charset)</code>.
206 *
207 * @param unescaped an unescaped string
208 * @param charset the charset
209 * @return the escaped string
210 *
211 * @throws URIException if the charset is not supported
212 *
213 * @see #encode
214 */
215 public static String encodeAll(String unescaped, String charset)
216 throws URIException {
217
218 return encode(unescaped, empty, charset);
219 }
220
221
222 /***
223 * Escape and encode a string regarded as within the authority component of
224 * an URI with the default protocol charset.
225 * Within the authority component, the characters ";", ":", "@", "?", and
226 * "/" are reserved.
227 *
228 * @param unescaped an unescaped string
229 * @return the escaped string
230 *
231 * @throws URIException if the default protocol charset is not supported
232 *
233 * @see URI#getDefaultProtocolCharset
234 * @see #encode
235 */
236 public static String encodeWithinAuthority(String unescaped)
237 throws URIException {
238
239 return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
240 }
241
242
243 /***
244 * Escape and encode a string regarded as within the authority component of
245 * an URI with a given charset.
246 * Within the authority component, the characters ";", ":", "@", "?", and
247 * "/" are reserved.
248 *
249 * @param unescaped an unescaped string
250 * @param charset the charset
251 * @return the escaped string
252 *
253 * @throws URIException if the charset is not supported
254 *
255 * @see #encode
256 */
257 public static String encodeWithinAuthority(String unescaped, String charset)
258 throws URIException {
259
260 return encode(unescaped, URI.allowed_within_authority, charset);
261 }
262
263
264 /***
265 * Escape and encode a string regarded as the path and query components of
266 * an URI with the default protocol charset.
267 *
268 * @param unescaped an unescaped string
269 * @return the escaped string
270 *
271 * @throws URIException if the default protocol charset is not supported
272 *
273 * @see URI#getDefaultProtocolCharset
274 * @see #encode
275 */
276 public static String encodePathQuery(String unescaped) throws URIException {
277 return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
278 }
279
280
281 /***
282 * Escape and encode a string regarded as the path and query components of
283 * an URI with a given charset.
284 *
285 * @param unescaped an unescaped string
286 * @param charset the charset
287 * @return the escaped string
288 *
289 * @throws URIException if the charset is not supported
290 *
291 * @see #encode
292 */
293 public static String encodePathQuery(String unescaped, String charset)
294 throws URIException {
295
296 int at = unescaped.indexOf('?');
297 if (at < 0) {
298 return encode(unescaped, URI.allowed_abs_path, charset);
299 }
300
301 return encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
302 + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
303 }
304
305
306 /***
307 * Escape and encode a string regarded as within the path component of an
308 * URI with the default protocol charset.
309 * The path may consist of a sequence of path segments separated by a
310 * single slash "/" character. Within a path segment, the characters
311 * "/", ";", "=", and "?" are reserved.
312 *
313 * @param unescaped an unescaped string
314 * @return the escaped string
315 *
316 * @throws URIException if the default protocol charset is not supported
317 *
318 * @see URI#getDefaultProtocolCharset
319 * @see #encode
320 */
321 public static String encodeWithinPath(String unescaped)
322 throws URIException {
323
324 return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
325 }
326
327
328 /***
329 * Escape and encode a string regarded as within the path component of an
330 * URI with a given charset.
331 * The path may consist of a sequence of path segments separated by a
332 * single slash "/" character. Within a path segment, the characters
333 * "/", ";", "=", and "?" are reserved.
334 *
335 * @param unescaped an unescaped string
336 * @param charset the charset
337 * @return the escaped string
338 *
339 * @throws URIException if the charset is not supported
340 *
341 * @see #encode
342 */
343 public static String encodeWithinPath(String unescaped, String charset)
344 throws URIException {
345
346 return encode(unescaped, URI.allowed_within_path, charset);
347 }
348
349
350 /***
351 * Escape and encode a string regarded as the path component of an URI with
352 * the default protocol charset.
353 *
354 * @param unescaped an unescaped string
355 * @return the escaped string
356 *
357 * @throws URIException if the default protocol charset is not supported
358 *
359 * @see URI#getDefaultProtocolCharset
360 * @see #encode
361 */
362 public static String encodePath(String unescaped) throws URIException {
363 return encodePath(unescaped, URI.getDefaultProtocolCharset());
364 }
365
366
367 /***
368 * Escape and encode a string regarded as the path component of an URI with
369 * a given charset.
370 *
371 * @param unescaped an unescaped string
372 * @param charset the charset
373 * @return the escaped string
374 *
375 * @throws URIException if the charset is not supported
376 *
377 * @see #encode
378 */
379 public static String encodePath(String unescaped, String charset)
380 throws URIException {
381
382 return encode(unescaped, URI.allowed_abs_path, charset);
383 }
384
385
386 /***
387 * Escape and encode a string regarded as within the query component of an
388 * URI with the default protocol charset.
389 * When a query comprise the name and value pairs, it is used in order
390 * to encode each name and value string. The reserved special characters
391 * within a query component are being included in encoding the query.
392 *
393 * @param unescaped an unescaped string
394 * @return the escaped string
395 *
396 * @throws URIException if the default protocol charset is not supported
397 *
398 * @see URI#getDefaultProtocolCharset
399 * @see #encode
400 */
401 public static String encodeWithinQuery(String unescaped)
402 throws URIException {
403
404 return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
405 }
406
407
408 /***
409 * Escape and encode a string regarded as within the query component of an
410 * URI with a given charset.
411 * When a query comprise the name and value pairs, it is used in order
412 * to encode each name and value string. The reserved special characters
413 * within a query component are being included in encoding the query.
414 *
415 * @param unescaped an unescaped string
416 * @param charset the charset
417 * @return the escaped string
418 *
419 * @throws URIException if the charset is not supported
420 *
421 * @see #encode
422 */
423 public static String encodeWithinQuery(String unescaped, String charset)
424 throws URIException {
425
426 return encode(unescaped, URI.allowed_within_query, charset);
427 }
428
429
430 /***
431 * Escape and encode a string regarded as the query component of an URI with
432 * the default protocol charset.
433 * When a query string is not misunderstood the reserved special characters
434 * ("&", "=", "+", ",", and "$") within a query component, this method
435 * is recommended to use in encoding the whole query.
436 *
437 * @param unescaped an unescaped string
438 * @return the escaped string
439 *
440 * @throws URIException if the default protocol charset is not supported
441 *
442 * @see URI#getDefaultProtocolCharset
443 * @see #encode
444 */
445 public static String encodeQuery(String unescaped) throws URIException {
446 return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
447 }
448
449
450 /***
451 * Escape and encode a string regarded as the query component of an URI with
452 * a given charset.
453 * When a query string is not misunderstood the reserved special characters
454 * ("&", "=", "+", ",", and "$") within a query component, this method
455 * is recommended to use in encoding the whole query.
456 *
457 * @param unescaped an unescaped string
458 * @param charset the charset
459 * @return the escaped string
460 *
461 * @throws URIException if the charset is not supported
462 *
463 * @see #encode
464 */
465 public static String encodeQuery(String unescaped, String charset)
466 throws URIException {
467
468 return encode(unescaped, URI.allowed_query, charset);
469 }
470
471
472 /***
473 * Escape and encode a given string with allowed characters not to be
474 * escaped and the default protocol charset.
475 *
476 * @param unescaped a string
477 * @param allowed allowed characters not to be escaped
478 * @return the escaped string
479 *
480 * @throws URIException if the default protocol charset is not supported
481 *
482 * @see URI#getDefaultProtocolCharset
483 */
484 public static String encode(String unescaped, BitSet allowed)
485 throws URIException {
486
487 return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
488 }
489
490
491 /***
492 * Escape and encode a given string with allowed characters not to be
493 * escaped and a given charset.
494 *
495 * @param unescaped a string
496 * @param allowed allowed characters not to be escaped
497 * @param charset the charset
498 * @return the escaped string
499 */
500 public static String encode(String unescaped, BitSet allowed,
501 String charset) throws URIException {
502 byte[] rawdata = URLCodec.encodeUrl(allowed,
503 EncodingUtil.getBytes(unescaped, charset));
504 return EncodingUtil.getAsciiString(rawdata);
505 }
506
507
508 /***
509 * Unescape and decode a given string regarded as an escaped string with the
510 * default protocol charset.
511 *
512 * @param escaped a string
513 * @return the unescaped string
514 *
515 * @throws URIException if the string cannot be decoded (invalid)
516 *
517 * @see URI#getDefaultProtocolCharset
518 */
519 public static String decode(String escaped) throws URIException {
520 try {
521 byte[] rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(escaped));
522 return EncodingUtil.getString(rawdata, URI.getDefaultProtocolCharset());
523 } catch (DecoderException e) {
524 throw new URIException(e.getMessage());
525 }
526 }
527
528 /***
529 * Unescape and decode a given string regarded as an escaped string.
530 *
531 * @param escaped a string
532 * @param charset the charset
533 * @return the unescaped string
534 *
535 * @throws URIException if the charset is not supported
536 *
537 * @see Coder#decode
538 */
539 public static String decode(String escaped, String charset)
540 throws URIException {
541
542 return Coder.decode(escaped.toCharArray(), charset);
543 }
544
545
546
547 /***
548 * The basic and internal utility for URI escape and character encoding and
549 * decoding.
550 *
551 * @deprecated use org.apache.commons.codec.net.URLCodec
552 */
553 protected static class Coder extends URI {
554
555 /***
556 * Escape and encode a given string with allowed characters not to be
557 * escaped.
558 *
559 * @param unescapedComponent an unescaped component
560 * @param allowed allowed characters not to be escaped
561 * @param charset the charset to encode
562 * @return the escaped and encoded string
563 *
564 * @throws URIException if the charset is not supported
565 *
566 * @deprecated use org.apache.commons.codec.net.URLCodec
567 */
568 public static char[] encode(String unescapedComponent, BitSet allowed, String charset)
569 throws URIException {
570
571 return URI.encode(unescapedComponent, allowed, charset);
572 }
573
574
575 /***
576 * Unescape and decode a given string.
577 *
578 * @param escapedComponent an being-unescaped component
579 * @param charset the charset to decode
580 * @return the escaped and encoded string
581 *
582 * @throws URIException if the charset is not supported
583 *
584 * @deprecated use org.apache.commons.codec.net.URLCodec
585 */
586 public static String decode(char[] escapedComponent, String charset)
587 throws URIException {
588
589 return URI.decode(escapedComponent, charset);
590 }
591
592
593 /***
594 * Verify whether a given string is escaped or not
595 *
596 * @param original given characters
597 * @return true if the given character array is 7 bit ASCII-compatible.
598 */
599 public static boolean verifyEscaped(char[] original) {
600 for (int i = 0; i < original.length; i++) {
601 int c = original[i];
602 if (c > 128) {
603 return false;
604 } else if (c == '%') {
605 if (Character.digit(original[++i], 16) == -1
606 || Character.digit(original[++i], 16) == -1) {
607 return false;
608 }
609 }
610 }
611 return true;
612 }
613
614
615 /***
616 * Replace from a given character to given character in an array order
617 * for a given string.
618 *
619 * @param original a given string
620 * @param from a replacing character array
621 * @param to a replaced character array
622 * @return the replaced string
623 */
624 public static String replace(String original, char[] from, char[] to) {
625 for (int i = from.length; i > 0; --i) {
626 original = replace(original, from[i], to[i]);
627 }
628 return original;
629 }
630
631
632 /***
633 * Replace from a given character to given character for a given string.
634 *
635 * @param original a given string
636 * @param from a replacing character array
637 * @param to a replaced character array
638 * @return the replaced string
639 */
640 public static String replace(String original, char from, char to) {
641 StringBuffer result = new StringBuffer(original.length());
642 int at, saved = 0;
643 do {
644 at = original.indexOf(from);
645 if (at >= 0) {
646 result.append(original.substring(0, at));
647 result.append(to);
648 } else {
649 result.append(original.substring(saved));
650 }
651 saved = at;
652 } while (at >= 0);
653 return result.toString();
654 }
655 }
656
657 }
658