View Javadoc

1   /*
2    * $HeadURL: https://svn.apache.org/repos/asf/httpcomponents/oac.hc3x/trunk/src/java/org/apache/commons/httpclient/util/URIUtil.java $
3    * $Revision$
4    * $Date$
5    *
6    * ====================================================================
7    *
8    *  Licensed to the Apache Software Foundation (ASF) under one or more
9    *  contributor license agreements.  See the NOTICE file distributed with
10   *  this work for additional information regarding copyright ownership.
11   *  The ASF licenses this file to You under the Apache License, Version 2.0
12   *  (the "License"); you may not use this file except in compliance with
13   *  the License.  You may obtain a copy of the License at
14   *
15   *      http://www.apache.org/licenses/LICENSE-2.0
16   *
17   *  Unless required by applicable law or agreed to in writing, software
18   *  distributed under the License is distributed on an "AS IS" BASIS,
19   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   *  See the License for the specific language governing permissions and
21   *  limitations under the License.
22   * ====================================================================
23   *
24   * This software consists of voluntary contributions made by many
25   * individuals on behalf of the Apache Software Foundation.  For more
26   * information on the Apache Software Foundation, please see
27   * <http://www.apache.org/>.
28   *
29   */
30  
31  package org.apache.commons.httpclient.util;
32  
33  import java.util.BitSet;
34  
35  import org.apache.commons.codec.DecoderException;
36  import org.apache.commons.codec.net.URLCodec;
37  import org.apache.commons.httpclient.URI;
38  import org.apache.commons.httpclient.URIException;
39  
40  /***
41   * The URI escape and character encoding and decoding utility.
42   * It's compatible with {@link org.apache.commons.httpclient.HttpURL} rather
43   * than {@link org.apache.commons.httpclient.URI}.
44   *
45   * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
46   * @version $Revision$ $Date: 2002/03/14 15:14:01 
47   */
48  public class URIUtil {
49  
50      // ----------------------------------------------------- Instance variables
51  
52      protected static final BitSet empty = new BitSet(1);
53  
54      // ---------------------------------------------------------- URI utilities
55  
56      /***
57       * Get the basename of an URI.   It's possibly an empty string.
58       *
59       * @param uri a string regarded an URI
60       * @return the basename string; an empty string if the path ends with slash
61       */
62      public static String getName(String uri) {
63          if (uri == null || uri.length() == 0) { return uri; } 
64          String path = URIUtil.getPath(uri);
65          int at = path.lastIndexOf("/");
66          int to = path.length();
67          return (at >= 0) ? path.substring(at + 1, to) : path;
68      }
69  
70  
71      /***
72       * Get the query of an URI.
73       *
74       * @param uri a string regarded an URI
75       * @return the query string; <code>null</code> if empty or undefined
76       */
77      public static String getQuery(String uri) {
78          if (uri == null || uri.length() == 0) { return null; } 
79          // consider of net_path
80          int at = uri.indexOf("//");
81          int from = uri.indexOf(
82              "/", 
83              at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
84          );
85          // the authority part of URI ignored
86          int to = uri.length();
87          // reuse the at and from variables to consider the query
88          at = uri.indexOf("?", from);
89          if (at >= 0) {
90              from = at + 1;
91          } else {
92              return null;
93          }
94          // check the fragment
95          if (uri.lastIndexOf("#") > from) {
96              to = uri.lastIndexOf("#");
97          }
98          // get the path and query.
99          return (from < 0 || from == to) ? null : uri.substring(from, to);
100     }
101 
102 
103     /***
104      * Get the path of an URI.
105      *
106      * @param uri a string regarded an URI
107      * @return the path string
108      */
109     public static String getPath(String uri) {
110         if (uri == null) {
111             return null;
112         } 
113         // consider of net_path
114         int at = uri.indexOf("//");
115         int from = uri.indexOf(
116             "/", 
117             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
118         );
119         // the authority part of URI ignored 
120         int to = uri.length();
121         // check the query
122         if (uri.indexOf('?', from) != -1) {
123             to = uri.indexOf('?', from);
124         }
125         // check the fragment
126         if (uri.lastIndexOf("#") > from && uri.lastIndexOf("#") < to) {
127             to = uri.lastIndexOf("#");
128         }
129         // get only the path.
130         return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
131     }
132 
133 
134     /***
135      * Get the path and query of an URI.
136      *
137      * @param uri a string regarded an URI
138      * @return the path and query string
139      */
140     public static String getPathQuery(String uri) {
141         if (uri == null) {
142             return null;
143         } 
144         // consider of net_path
145         int at = uri.indexOf("//");
146         int from = uri.indexOf(
147             "/", 
148             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
149         );
150         // the authority part of URI ignored
151         int to = uri.length();
152         // Ignore the '?' mark so to ignore the query.
153         // check the fragment
154         if (uri.lastIndexOf("#") > from) {
155             to = uri.lastIndexOf("#");
156         }
157         // get the path and query.
158         return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from, to);
159     }
160 
161 
162     /***
163      * Get the path of an URI and its rest part.
164      *
165      * @param uri a string regarded an URI
166      * @return the string from the path part
167      */
168     public static String getFromPath(String uri) {
169         if (uri == null) {
170             return null;
171         } 
172         // consider of net_path
173         int at = uri.indexOf("//");
174         int from = uri.indexOf(
175             "/", 
176             at >= 0 ? (uri.lastIndexOf("/", at - 1) >= 0 ? 0 : at + 2) : 0
177         );
178         // get the path and its rest.
179         return (from < 0) ? (at >= 0 ? "/" : uri) : uri.substring(from);
180     }
181 
182     // ----------------------------------------------------- Encoding utilities
183 
184     /***
185      * Get the all escaped and encoded string with the default protocl charset.
186      * It's the same function to use <code>encode(String unescaped, Bitset
187      * empty, URI.getDefaultProtocolCharset())</code>.
188      *
189      * @param unescaped an unescaped string
190      * @return the escaped string
191      * 
192      * @throws URIException if the default protocol charset is not supported
193      *
194      * @see URI#getDefaultProtocolCharset
195      * @see #encode
196      */
197     public static String encodeAll(String unescaped) throws URIException {
198         return encodeAll(unescaped, URI.getDefaultProtocolCharset());
199     }
200  
201 
202     /***
203      * Get the all escaped and encoded string with a given charset.
204      * It's the same function to use <code>encode(String unescaped, Bitset
205      * empty, String charset)</code>.
206      *
207      * @param unescaped an unescaped string
208      * @param charset the charset
209      * @return the escaped string
210      * 
211      * @throws URIException if the charset is not supported
212      * 
213      * @see #encode
214      */
215     public static String encodeAll(String unescaped, String charset)
216         throws URIException {
217 
218         return encode(unescaped, empty, charset);
219     }
220   
221 
222     /***
223      * Escape and encode a string regarded as within the authority component of
224      * an URI with the default protocol charset.
225      * Within the authority component, the characters ";", ":", "@", "?", and
226      * "/" are reserved.
227      *
228      * @param unescaped an unescaped string
229      * @return the escaped string
230      * 
231      * @throws URIException if the default protocol charset is not supported
232      * 
233      * @see URI#getDefaultProtocolCharset
234      * @see #encode
235      */
236     public static String encodeWithinAuthority(String unescaped)
237         throws URIException {
238 
239         return encodeWithinAuthority(unescaped, URI.getDefaultProtocolCharset());
240     }
241 
242 
243     /***
244      * Escape and encode a string regarded as within the authority component of
245      * an URI with a given charset.
246      * Within the authority component, the characters ";", ":", "@", "?", and
247      * "/" are reserved.
248      *
249      * @param unescaped an unescaped string
250      * @param charset the charset
251      * @return the escaped string
252      * 
253      * @throws URIException if the charset is not supported
254      * 
255      * @see #encode
256      */
257     public static String encodeWithinAuthority(String unescaped, String charset)
258         throws URIException {
259 
260         return encode(unescaped, URI.allowed_within_authority, charset);
261     }
262 
263 
264     /***
265      * Escape and encode a string regarded as the path and query components of
266      * an URI with the default protocol charset.
267      *
268      * @param unescaped an unescaped string
269      * @return the escaped string
270      * 
271      * @throws URIException if the default protocol charset is not supported
272      * 
273      * @see URI#getDefaultProtocolCharset
274      * @see #encode
275      */
276     public static String encodePathQuery(String unescaped) throws URIException {
277         return encodePathQuery(unescaped, URI.getDefaultProtocolCharset());
278     }
279 
280 
281     /***
282      * Escape and encode a string regarded as the path and query components of
283      * an URI with a given charset.
284      *
285      * @param unescaped an unescaped string
286      * @param charset the charset
287      * @return the escaped string
288      * 
289      * @throws URIException if the charset is not supported
290      * 
291      * @see #encode
292      */
293     public static String encodePathQuery(String unescaped, String charset)
294         throws URIException {
295 
296         int at = unescaped.indexOf('?');
297         if (at < 0) {
298             return encode(unescaped, URI.allowed_abs_path, charset);
299         }
300         // else
301         return  encode(unescaped.substring(0, at), URI.allowed_abs_path, charset)
302             + '?' + encode(unescaped.substring(at + 1), URI.allowed_query, charset);
303     }
304 
305 
306     /***
307      * Escape and encode a string regarded as within the path component of an
308      * URI with the default protocol charset.
309      * The path may consist of a sequence of path segments separated by a
310      * single slash "/" character.  Within a path segment, the characters
311      * "/", ";", "=", and "?" are reserved.
312      *
313      * @param unescaped an unescaped string
314      * @return the escaped string
315      * 
316      * @throws URIException if the default protocol charset is not supported
317      * 
318      * @see URI#getDefaultProtocolCharset
319      * @see #encode
320      */
321     public static String encodeWithinPath(String unescaped)
322         throws URIException {
323 
324         return encodeWithinPath(unescaped, URI.getDefaultProtocolCharset());
325     }
326 
327 
328     /***
329      * Escape and encode a string regarded as within the path component of an
330      * URI with a given charset.
331      * The path may consist of a sequence of path segments separated by a
332      * single slash "/" character.  Within a path segment, the characters
333      * "/", ";", "=", and "?" are reserved.
334      *
335      * @param unescaped an unescaped string
336      * @param charset the charset
337      * @return the escaped string
338      * 
339      * @throws URIException if the charset is not supported
340      * 
341      * @see #encode
342      */
343     public static String encodeWithinPath(String unescaped, String charset)
344         throws URIException {
345 
346         return encode(unescaped, URI.allowed_within_path, charset);
347     }
348 
349 
350     /***
351      * Escape and encode a string regarded as the path component of an URI with
352      * the default protocol charset.
353      *
354      * @param unescaped an unescaped string
355      * @return the escaped string
356      * 
357      * @throws URIException if the default protocol charset is not supported
358      * 
359      * @see URI#getDefaultProtocolCharset
360      * @see #encode
361      */
362     public static String encodePath(String unescaped) throws URIException {
363         return encodePath(unescaped, URI.getDefaultProtocolCharset());
364     }
365 
366 
367     /***
368      * Escape and encode a string regarded as the path component of an URI with
369      * a given charset.
370      *
371      * @param unescaped an unescaped string
372      * @param charset the charset
373      * @return the escaped string
374      * 
375      * @throws URIException if the charset is not supported
376      * 
377      * @see #encode
378      */
379     public static String encodePath(String unescaped, String charset)
380         throws URIException {
381 
382         return encode(unescaped, URI.allowed_abs_path, charset);
383     }
384 
385 
386     /***
387      * Escape and encode a string regarded as within the query component of an
388      * URI with the default protocol charset.
389      * When a query comprise the name and value pairs, it is used in order
390      * to encode each name and value string.  The reserved special characters
391      * within a query component are being included in encoding the query.
392      *
393      * @param unescaped an unescaped string
394      * @return the escaped string
395      * 
396      * @throws URIException if the default protocol charset is not supported
397      * 
398      * @see URI#getDefaultProtocolCharset
399      * @see #encode
400      */
401     public static String encodeWithinQuery(String unescaped)
402         throws URIException {
403 
404         return encodeWithinQuery(unescaped, URI.getDefaultProtocolCharset());
405     }
406 
407 
408     /***
409      * Escape and encode a string regarded as within the query component of an
410      * URI with a given charset.
411      * When a query comprise the name and value pairs, it is used in order
412      * to encode each name and value string.  The reserved special characters
413      * within a query component are being included in encoding the query.
414      *
415      * @param unescaped an unescaped string
416      * @param charset the charset
417      * @return the escaped string
418      * 
419      * @throws URIException if the charset is not supported
420      * 
421      * @see #encode
422      */
423     public static String encodeWithinQuery(String unescaped, String charset)
424         throws URIException {
425 
426         return encode(unescaped, URI.allowed_within_query, charset);
427     }
428 
429 
430     /***
431      * Escape and encode a string regarded as the query component of an URI with
432      * the default protocol charset.
433      * When a query string is not misunderstood the reserved special characters
434      * ("&amp;", "=", "+", ",", and "$") within a query component, this method
435      * is recommended to use in encoding the whole query.
436      *
437      * @param unescaped an unescaped string
438      * @return the escaped string
439      * 
440      * @throws URIException if the default protocol charset is not supported
441      * 
442      * @see URI#getDefaultProtocolCharset
443      * @see #encode
444      */
445     public static String encodeQuery(String unescaped) throws URIException {
446         return encodeQuery(unescaped, URI.getDefaultProtocolCharset());
447     }
448 
449 
450     /***
451      * Escape and encode a string regarded as the query component of an URI with
452      * a given charset.
453      * When a query string is not misunderstood the reserved special characters
454      * ("&amp;", "=", "+", ",", and "$") within a query component, this method
455      * is recommended to use in encoding the whole query.
456      *
457      * @param unescaped an unescaped string
458      * @param charset the charset
459      * @return the escaped string
460      * 
461      * @throws URIException if the charset is not supported
462      * 
463      * @see #encode
464      */
465     public static String encodeQuery(String unescaped, String charset)
466         throws URIException {
467 
468         return encode(unescaped, URI.allowed_query, charset);
469     }
470 
471 
472     /***
473      * Escape and encode a given string with allowed characters not to be
474      * escaped and the default protocol charset.
475      *
476      * @param unescaped a string
477      * @param allowed allowed characters not to be escaped
478      * @return the escaped string
479      * 
480      * @throws URIException if the default protocol charset is not supported
481      * 
482      * @see URI#getDefaultProtocolCharset
483      */
484     public static String encode(String unescaped, BitSet allowed)
485         throws URIException {
486 
487         return encode(unescaped, allowed, URI.getDefaultProtocolCharset());
488     }
489 
490 
491     /***
492      * Escape and encode a given string with allowed characters not to be
493      * escaped and a given charset.
494      *
495      * @param unescaped a string
496      * @param allowed allowed characters not to be escaped
497      * @param charset the charset
498      * @return the escaped string
499      */
500     public static String encode(String unescaped, BitSet allowed,
501             String charset) throws URIException {
502         byte[] rawdata = URLCodec.encodeUrl(allowed, 
503             EncodingUtil.getBytes(unescaped, charset));
504         return EncodingUtil.getAsciiString(rawdata);
505     }
506 
507 
508     /***
509      * Unescape and decode a given string regarded as an escaped string with the
510      * default protocol charset.
511      *
512      * @param escaped a string
513      * @return the unescaped string
514      * 
515      * @throws URIException if the string cannot be decoded (invalid)
516      * 
517      * @see URI#getDefaultProtocolCharset
518      */
519     public static String decode(String escaped) throws URIException {
520         try {
521             byte[] rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(escaped));
522             return EncodingUtil.getString(rawdata, URI.getDefaultProtocolCharset());
523         } catch (DecoderException e) {
524             throw new URIException(e.getMessage());
525         }
526     }
527 
528     /***
529      * Unescape and decode a given string regarded as an escaped string.
530      *
531      * @param escaped a string
532      * @param charset the charset
533      * @return the unescaped string
534      * 
535      * @throws URIException if the charset is not supported
536      * 
537      * @see Coder#decode
538      */
539     public static String decode(String escaped, String charset)
540         throws URIException {
541 
542         return Coder.decode(escaped.toCharArray(), charset);
543     }
544 
545     // ---------------------------------------------------------- Inner classes
546 
547     /***
548      * The basic and internal utility for URI escape and character encoding and
549      * decoding.
550      * 
551      * @deprecated use org.apache.commons.codec.net.URLCodec
552      */
553     protected static class Coder extends URI {
554 
555         /***
556          * Escape and encode a given string with allowed characters not to be
557          * escaped.
558          *
559          * @param unescapedComponent an unescaped component
560          * @param allowed allowed characters not to be escaped
561          * @param charset the charset to encode
562          * @return the escaped and encoded string
563          * 
564          * @throws URIException if the charset is not supported
565          * 
566          * @deprecated use org.apache.commons.codec.net.URLCodec
567          */
568         public static char[] encode(String unescapedComponent, BitSet allowed, String charset) 
569             throws URIException {
570 
571             return URI.encode(unescapedComponent, allowed, charset);
572         }
573 
574 
575         /***
576          * Unescape and decode a given string.
577          *
578          * @param escapedComponent an being-unescaped component
579          * @param charset the charset to decode
580          * @return the escaped and encoded string
581          * 
582          * @throws URIException if the charset is not supported
583          * 
584          * @deprecated use org.apache.commons.codec.net.URLCodec
585          */
586         public static String decode(char[] escapedComponent, String charset)
587             throws URIException {
588 
589             return URI.decode(escapedComponent, charset);
590         }
591 
592 
593         /***
594          * Verify whether a given string is escaped or not
595          *
596          * @param original given characters
597          * @return true if the given character array is 7 bit ASCII-compatible.
598          */
599         public static boolean verifyEscaped(char[] original) {
600             for (int i = 0; i < original.length; i++) {
601                 int c = original[i];
602                 if (c > 128) {
603                     return false;
604                 } else if (c == '%') {
605                     if (Character.digit(original[++i], 16) == -1 
606                         || Character.digit(original[++i], 16) == -1) {
607                         return false;
608                     }
609                 }
610             }
611             return true;
612         }
613 
614 
615         /***
616          * Replace from a given character to given character in an array order
617          * for a given string.
618          *
619          * @param original a given string
620          * @param from a replacing character array
621          * @param to a replaced character array
622          * @return the replaced string
623          */
624         public static String replace(String original, char[] from, char[] to) {
625             for (int i = from.length; i > 0; --i) {
626                 original = replace(original, from[i], to[i]);
627             }
628             return original;
629         }
630 
631 
632         /***
633          * Replace from a given character to given character for a given string.
634          *
635          * @param original a given string
636          * @param from a replacing character array
637          * @param to a replaced character array
638          * @return the replaced string
639          */
640         public static String replace(String original, char from, char to) {
641             StringBuffer result = new StringBuffer(original.length());
642             int at, saved = 0;
643             do {
644                 at = original.indexOf(from);
645                 if (at >= 0) {
646                     result.append(original.substring(0, at));
647                     result.append(to);
648                 } else {
649                     result.append(original.substring(saved));
650                 }
651                 saved = at;
652             } while (at >= 0);
653             return result.toString();
654         }
655     }
656 
657 }
658