URI xref

View Javadoc

1   /*
2    * $HeadURL: https://svn.apache.org/repos/asf/httpcomponents/oac.hc3x/trunk/src/java/org/apache/commons/httpclient/URI.java $
3    * $Revision$
4    * $Date$
5    *
6    * ====================================================================
7    *
8    *  Licensed to the Apache Software Foundation (ASF) under one or more
9    *  contributor license agreements.  See the NOTICE file distributed with
10   *  this work for additional information regarding copyright ownership.
11   *  The ASF licenses this file to You under the Apache License, Version 2.0
12   *  (the "License"); you may not use this file except in compliance with
13   *  the License.  You may obtain a copy of the License at
14   *
15   *      http://www.apache.org/licenses/LICENSE-2.0
16   *
17   *  Unless required by applicable law or agreed to in writing, software
18   *  distributed under the License is distributed on an "AS IS" BASIS,
19   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   *  See the License for the specific language governing permissions and
21   *  limitations under the License.
22   * ====================================================================
23   *
24   * This software consists of voluntary contributions made by many
25   * individuals on behalf of the Apache Software Foundation.  For more
26   * information on the Apache Software Foundation, please see
27   * <http://www.apache.org/>.
28   *
29   */
30  
31  package org.apache.commons.httpclient;
32  
33  import java.io.IOException;
34  import java.io.ObjectInputStream;
35  import java.io.ObjectOutputStream;
36  import java.io.Serializable;
37  import java.util.Arrays;
38  import java.util.Locale;
39  import java.util.BitSet;
40  import java.util.Hashtable;
41  
42  import org.apache.commons.codec.DecoderException;
43  import org.apache.commons.codec.net.URLCodec;
44  import org.apache.commons.httpclient.util.EncodingUtil;
45  
46  /***
47   * The interface for the URI(Uniform Resource Identifiers) version of RFC 2396.
48   * This class has the purpose of supportting of parsing a URI reference to
49   * extend any specific protocols, the character encoding of the protocol to 
50   * be transported and the charset of the document.
51   * <p>
52   * A URI is always in an "escaped" form, since escaping or unescaping a
53   * completed URI might change its semantics.  
54   * <p>
55   * Implementers should be careful not to escape or unescape the same string
56   * more than once, since unescaping an already unescaped string might lead to
57   * misinterpreting a percent data character as another escaped character,
58   * or vice versa in the case of escaping an already escaped string.
59   * <p>
60   * In order to avoid these problems, data types used as follows:
61   * <p><blockquote><pre>
62   *   URI character sequence: char
63   *   octet sequence: byte
64   *   original character sequence: String
65   * </pre></blockquote><p>
66   *
67   * So, a URI is a sequence of characters as an array of a char type, which
68   * is not always represented as a sequence of octets as an array of byte.
69   * <p>
70   * 
71   * URI Syntactic Components
72   * <p><blockquote><pre>
73   * - In general, written as follows:
74   *   Absolute URI = &lt;scheme&gt:&lt;scheme-specific-part&gt;
75   *   Generic URI = &lt;scheme&gt;://&lt;authority&gt;&lt;path&gt;?&lt;query&gt;
76   *
77   * - Syntax
78   *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
79   *   hier_part     = ( net_path | abs_path ) [ "?" query ]
80   *   net_path      = "//" authority [ abs_path ]
81   *   abs_path      = "/"  path_segments
82   * </pre></blockquote><p>
83   *
84   * The following examples illustrate URI that are in common use.
85   * <pre>
86   * ftp://ftp.is.co.za/rfc/rfc1808.txt
87   *    -- ftp scheme for File Transfer Protocol services
88   * gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles
89   *    -- gopher scheme for Gopher and Gopher+ Protocol services
90   * http://www.math.uio.no/faq/compression-faq/part1.html
91   *    -- http scheme for Hypertext Transfer Protocol services
92   * mailto:mduerst@ifi.unizh.ch
93   *    -- mailto scheme for electronic mail addresses
94   * news:comp.infosystems.www.servers.unix
95   *    -- news scheme for USENET news groups and articles
96   * telnet://melvyl.ucop.edu/
97   *    -- telnet scheme for interactive services via the TELNET Protocol
98   * </pre>
99   * Please, notice that there are many modifications from URL(RFC 1738) and
100  * relative URL(RFC 1808).
101  * <p>
102  * <b>The expressions for a URI</b>
103  * <p><pre>
104  * For escaped URI forms
105  *  - URI(char[]) // constructor
106  *  - char[] getRawXxx() // method
107  *  - String getEscapedXxx() // method
108  *  - String toString() // method
109  * <p>
110  * For unescaped URI forms
111  *  - URI(String) // constructor
112  *  - String getXXX() // method
113  * </pre><p>
114  *
115  * @author <a href="mailto:jericho@apache.org">Sung-Gu</a>
116  * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
117  * @version $Revision$ $Date: 2002/03/14 15:14:01 
118  */
119 public class URI implements Cloneable, Comparable, Serializable {
120 
121 
122     // ----------------------------------------------------------- Constructors
123 
124     /*** Create an instance as an internal use */
125     protected URI() {
126     }
127 
128     /***
129      * Construct a URI from a string with the given charset. The input string can 
130      * be either in escaped or unescaped form. 
131      *
132      * @param s URI character sequence
133      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
134      *                <tt>false</tt> otherwise. 
135      * @param charset the charset string to do escape encoding, if required
136      * 
137      * @throws URIException If the URI cannot be created.
138      * @throws NullPointerException if input string is <code>null</code>
139      * 
140      * @see #getProtocolCharset
141      * 
142      * @since 3.0
143      */
144     public URI(String s, boolean escaped, String charset)
145         throws URIException, NullPointerException {
146         protocolCharset = charset;
147         parseUriReference(s, escaped);
148     }
149 
150     /***
151      * Construct a URI from a string with the given charset. The input string can 
152      * be either in escaped or unescaped form. 
153      *
154      * @param s URI character sequence
155      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
156      *                <tt>false</tt> otherwise. 
157      * 
158      * @throws URIException If the URI cannot be created.
159      * @throws NullPointerException if input string is <code>null</code>
160      * 
161      * @see #getProtocolCharset
162      * 
163      * @since 3.0
164      */
165     public URI(String s, boolean escaped)
166         throws URIException, NullPointerException {
167         parseUriReference(s, escaped);
168     }
169 
170     /***
171      * Construct a URI as an escaped form of a character array with the given
172      * charset.
173      *
174      * @param escaped the URI character sequence
175      * @param charset the charset string to do escape encoding
176      * @throws URIException If the URI cannot be created.
177      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
178      * @see #getProtocolCharset
179      * 
180      * @deprecated Use #URI(String, boolean, String)
181      */
182     public URI(char[] escaped, String charset) 
183         throws URIException, NullPointerException {
184         protocolCharset = charset;
185         parseUriReference(new String(escaped), true);
186     }
187 
188 
189     /***
190      * Construct a URI as an escaped form of a character array.
191      * An URI can be placed within double-quotes or angle brackets like 
192      * "http://test.com/" and &lt;http://test.com/&gt;
193      * 
194      * @param escaped the URI character sequence
195      * @throws URIException If the URI cannot be created.
196      * @throws NullPointerException if <code>escaped</code> is <code>null</code>
197      * @see #getDefaultProtocolCharset
198      * 
199      * @deprecated Use #URI(String, boolean)
200      */
201     public URI(char[] escaped) 
202         throws URIException, NullPointerException {
203         parseUriReference(new String(escaped), true);
204     }
205 
206 
207     /***
208      * Construct a URI from the given string with the given charset.
209      *
210      * @param original the string to be represented to URI character sequence
211      * It is one of absoluteURI and relativeURI.
212      * @param charset the charset string to do escape encoding
213      * @throws URIException If the URI cannot be created.
214      * @see #getProtocolCharset
215      * 
216      * @deprecated Use #URI(String, boolean, String)
217      */
218     public URI(String original, String charset) throws URIException {
219         protocolCharset = charset;
220         parseUriReference(original, false);
221     }
222 
223 
224     /***
225      * Construct a URI from the given string.
226      * <p><blockquote><pre>
227      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
228      * </pre></blockquote><p>
229      * An URI can be placed within double-quotes or angle brackets like 
230      * "http://test.com/" and &lt;http://test.com/&gt;
231      *
232      * @param original the string to be represented to URI character sequence
233      * It is one of absoluteURI and relativeURI.
234      * @throws URIException If the URI cannot be created.
235      * @see #getDefaultProtocolCharset
236      * 
237      * @deprecated Use #URI(String, boolean)
238      */
239     public URI(String original) throws URIException {
240         parseUriReference(original, false);
241     }
242 
243 
244     /***
245      * Construct a general URI from the given components.
246      * <p><blockquote><pre>
247      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
248      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
249      *   opaque_part   = uric_no_slash *uric
250      * </pre></blockquote><p>
251      * It's for absolute URI = &lt;scheme&gt;:&lt;scheme-specific-part&gt;#
252      * &lt;fragment&gt;.
253      *
254      * @param scheme the scheme string
255      * @param schemeSpecificPart scheme_specific_part
256      * @param fragment the fragment string
257      * @throws URIException If the URI cannot be created.
258      * @see #getDefaultProtocolCharset
259      */
260     public URI(String scheme, String schemeSpecificPart, String fragment)
261         throws URIException {
262 
263         // validate and contruct the URI character sequence
264         if (scheme == null) {
265            throw new URIException(URIException.PARSING, "scheme required");
266         }
267         char[] s = scheme.toLowerCase().toCharArray();
268         if (validate(s, URI.scheme)) {
269             _scheme = s; // is_absoluteURI
270         } else {
271             throw new URIException(URIException.PARSING, "incorrect scheme");
272         }
273         _opaque = encode(schemeSpecificPart, allowed_opaque_part,
274                 getProtocolCharset());
275         // Set flag
276         _is_opaque_part = true;
277         _fragment = fragment == null ? null : fragment.toCharArray(); 
278         setURI();
279     }
280 
281 
282     /***
283      * Construct a general URI from the given components.
284      * <p><blockquote><pre>
285      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
286      *   absoluteURI   = scheme ":" ( hier_part | opaque_part )
287      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
288      *   hier_part     = ( net_path | abs_path ) [ "?" query ]
289      * </pre></blockquote><p>
290      * It's for absolute URI = &lt;scheme&gt;:&lt;path&gt;?&lt;query&gt;#&lt;
291      * fragment&gt; and relative URI = &lt;path&gt;?&lt;query&gt;#&lt;fragment
292      * &gt;.
293      *
294      * @param scheme the scheme string
295      * @param authority the authority string
296      * @param path the path string
297      * @param query the query string
298      * @param fragment the fragment string
299      * @throws URIException If the new URI cannot be created.
300      * @see #getDefaultProtocolCharset
301      */
302     public URI(String scheme, String authority, String path, String query,
303                String fragment) throws URIException {
304 
305         // validate and contruct the URI character sequence
306         StringBuffer buff = new StringBuffer();
307         if (scheme != null) {
308             buff.append(scheme);
309             buff.append(':');
310         }
311         if (authority != null) {
312             buff.append("//");
313             buff.append(authority);
314         }
315         if (path != null) {  // accept empty path
316             if ((scheme != null || authority != null)
317                     && !path.startsWith("/")) {
318                 throw new URIException(URIException.PARSING,
319                         "abs_path requested");
320             }
321             buff.append(path);
322         }
323         if (query != null) {
324             buff.append('?');
325             buff.append(query);
326         }
327         if (fragment != null) {
328             buff.append('#');
329             buff.append(fragment);
330         }
331         parseUriReference(buff.toString(), false);
332     }
333 
334 
335     /***
336      * Construct a general URI from the given components.
337      *
338      * @param scheme the scheme string
339      * @param userinfo the userinfo string
340      * @param host the host string
341      * @param port the port number
342      * @throws URIException If the new URI cannot be created.
343      * @see #getDefaultProtocolCharset
344      */
345     public URI(String scheme, String userinfo, String host, int port)
346         throws URIException {
347 
348         this(scheme, userinfo, host, port, null, null, null);
349     }
350 
351 
352     /***
353      * Construct a general URI from the given components.
354      *
355      * @param scheme the scheme string
356      * @param userinfo the userinfo string
357      * @param host the host string
358      * @param port the port number
359      * @param path the path string
360      * @throws URIException If the new URI cannot be created.
361      * @see #getDefaultProtocolCharset
362      */
363     public URI(String scheme, String userinfo, String host, int port,
364             String path) throws URIException {
365 
366         this(scheme, userinfo, host, port, path, null, null);
367     }
368 
369 
370     /***
371      * Construct a general URI from the given components.
372      *
373      * @param scheme the scheme string
374      * @param userinfo the userinfo string
375      * @param host the host string
376      * @param port the port number
377      * @param path the path string
378      * @param query the query string
379      * @throws URIException If the new URI cannot be created.
380      * @see #getDefaultProtocolCharset
381      */
382     public URI(String scheme, String userinfo, String host, int port,
383             String path, String query) throws URIException {
384 
385         this(scheme, userinfo, host, port, path, query, null);
386     }
387 
388 
389     /***
390      * Construct a general URI from the given components.
391      *
392      * @param scheme the scheme string
393      * @param userinfo the userinfo string
394      * @param host the host string
395      * @param port the port number
396      * @param path the path string
397      * @param query the query string
398      * @param fragment the fragment string
399      * @throws URIException If the new URI cannot be created.
400      * @see #getDefaultProtocolCharset
401      */
402     public URI(String scheme, String userinfo, String host, int port,
403             String path, String query, String fragment) throws URIException {
404 
405         this(scheme, (host == null) ? null 
406             : ((userinfo != null) ? userinfo + '@' : "") + host 
407                 + ((port != -1) ? ":" + port : ""), path, query, fragment);
408     }
409 
410 
411     /***
412      * Construct a general URI from the given components.
413      *
414      * @param scheme the scheme string
415      * @param host the host string
416      * @param path the path string
417      * @param fragment the fragment string
418      * @throws URIException If the new URI cannot be created.
419      * @see #getDefaultProtocolCharset
420      */
421     public URI(String scheme, String host, String path, String fragment)
422         throws URIException {
423 
424         this(scheme, host, path, null, fragment);
425     }
426 
427 
428     /***
429      * Construct a general URI with the given relative URI string.
430      *
431      * @param base the base URI
432      * @param relative the relative URI string
433      * @throws URIException If the new URI cannot be created.
434      * 
435      * @deprecated Use #URI(URI, String, boolean)
436      */
437     public URI(URI base, String relative) throws URIException {
438         this(base, new URI(relative));
439     }
440 
441 
442     /***
443      * Construct a general URI with the given relative URI string.
444      *
445      * @param base the base URI
446      * @param relative the relative URI string
447      * @param escaped <tt>true</tt> if URI character sequence is in escaped form. 
448      *                <tt>false</tt> otherwise.
449      *  
450      * @throws URIException If the new URI cannot be created.
451      * 
452      * @since 3.0
453      */
454     public URI(URI base, String relative, boolean escaped) throws URIException {
455         this(base, new URI(relative, escaped));
456     }
457 
458 
459     /***
460      * Construct a general URI with the given relative URI.
461      * <p><blockquote><pre>
462      *   URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
463      *   relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
464      * </pre></blockquote><p>
465      * Resolving Relative References to Absolute Form.
466      *
467      * <strong>Examples of Resolving Relative URI References</strong>
468      *
469      * Within an object with a well-defined base URI of
470      * <p><blockquote><pre>
471      *   http://a/b/c/d;p?q
472      * </pre></blockquote><p>
473      * the relative URI would be resolved as follows:
474      *
475      * Normal Examples
476      *
477      * <p><blockquote><pre>
478      *   g:h           =  g:h
479      *   g             =  http://a/b/c/g
480      *   ./g           =  http://a/b/c/g
481      *   g/            =  http://a/b/c/g/
482      *   /g            =  http://a/g
483      *   //g           =  http://g
484      *   ?y            =  http://a/b/c/?y
485      *   g?y           =  http://a/b/c/g?y
486      *   #s            =  (current document)#s
487      *   g#s           =  http://a/b/c/g#s
488      *   g?y#s         =  http://a/b/c/g?y#s
489      *   ;x            =  http://a/b/c/;x
490      *   g;x           =  http://a/b/c/g;x
491      *   g;x?y#s       =  http://a/b/c/g;x?y#s
492      *   .             =  http://a/b/c/
493      *   ./            =  http://a/b/c/
494      *   ..            =  http://a/b/
495      *   ../           =  http://a/b/
496      *   ../g          =  http://a/b/g
497      *   ../..         =  http://a/
498      *   ../../        =  http://a/ 
499      *   ../../g       =  http://a/g
500      * </pre></blockquote><p>
501      *
502      * Some URI schemes do not allow a hierarchical syntax matching the
503      * <hier_part> syntax, and thus cannot use relative references.
504      *
505      * @param base the base URI
506      * @param relative the relative URI
507      * @throws URIException If the new URI cannot be created.
508      */
509     public URI(URI base, URI relative) throws URIException {
510 
511         if (base._scheme == null) {
512             throw new URIException(URIException.PARSING, "base URI required");
513         }
514         if (base._scheme != null) {
515             this._scheme = base._scheme;
516             this._authority = base._authority;
517             this._is_net_path = base._is_net_path; 
518         }
519         if (base._is_opaque_part || relative._is_opaque_part) {
520             this._scheme = base._scheme;
521             this._is_opaque_part = base._is_opaque_part 
522                 || relative._is_opaque_part;
523             this._opaque = relative._opaque;
524             this._fragment = relative._fragment;
525             this.setURI();
526             return;
527         }
528         boolean schemesEqual = Arrays.equals(base._scheme,relative._scheme);
529         if (relative._scheme != null 
530                 && (!schemesEqual  || relative._authority != null)) {
531             this._scheme = relative._scheme;
532             this._is_net_path = relative._is_net_path;
533             this._authority = relative._authority;
534             if (relative._is_server) {
535                 this._is_server = relative._is_server;
536                 this._userinfo = relative._userinfo;
537                 this._host = relative._host;
538                 this._port = relative._port;
539             } else if (relative._is_reg_name) {
540                 this._is_reg_name = relative._is_reg_name;
541             }
542             this._is_abs_path = relative._is_abs_path;
543             this._is_rel_path = relative._is_rel_path;
544             this._path = relative._path;
545         } else if (base._authority != null && relative._scheme == null) {
546             this._is_net_path = base._is_net_path;
547             this._authority = base._authority;
548             if (base._is_server) {
549                 this._is_server = base._is_server;
550                 this._userinfo = base._userinfo;
551                 this._host = base._host;
552                 this._port = base._port;
553             } else if (base._is_reg_name) {
554                 this._is_reg_name = base._is_reg_name;
555             }
556         }
557         if (relative._authority != null) {
558             this._is_net_path = relative._is_net_path;
559             this._authority = relative._authority;
560             if (relative._is_server) {
561                 this._is_server = relative._is_server;
562                 this._userinfo = relative._userinfo;
563                 this._host = relative._host;
564                 this._port = relative._port;
565             } else if (relative._is_reg_name) {
566                 this._is_reg_name = relative._is_reg_name;
567             }
568             this._is_abs_path = relative._is_abs_path;
569             this._is_rel_path = relative._is_rel_path;
570             this._path = relative._path;
571         }
572         // resolve the path and query if necessary
573         if (relative._authority == null 
574             && (relative._scheme == null || schemesEqual)) {
575             if ((relative._path == null || relative._path.length == 0)
576                 && relative._query == null) {
577                 // handle a reference to the current document, see RFC 2396 
578                 // section 5.2 step 2
579                 this._path = base._path;
580                 this._query = base._query;
581             } else {
582                 this._path = resolvePath(base._path, relative._path);
583             }
584         }
585         // base._query removed
586         if (relative._query != null) {
587             this._query = relative._query;
588         }
589         // base._fragment removed
590         if (relative._fragment != null) {
591             this._fragment = relative._fragment;
592         }
593         this.setURI();
594         // reparse the newly built URI, this will ensure that all flags are set correctly.
595         // TODO there must be a better way to do this
596         parseUriReference(new String(_uri), true);
597     }
598 
599     // --------------------------------------------------- Instance Variables
600 
601     /*** Version ID for serialization */
602     static final long serialVersionUID = 604752400577948726L;
603 
604 
605     /***
606      * Cache the hash code for this URI.
607      */
608     protected int hash = 0;
609 
610 
611     /***
612      * This Uniform Resource Identifier (URI).
613      * The URI is always in an "escaped" form, since escaping or unescaping
614      * a completed URI might change its semantics.  
615      */
616     protected char[] _uri = null;
617 
618 
619     /***
620      * The charset of the protocol used by this URI instance.
621      */
622     protected String protocolCharset = null;
623 
624 
625     /***
626      * The default charset of the protocol.  RFC 2277, 2396
627      */
628     protected static String defaultProtocolCharset = "UTF-8";
629 
630 
631     /***
632      * The default charset of the document.  RFC 2277, 2396
633      * The platform's charset is used for the document by default.
634      */
635     protected static String defaultDocumentCharset = null;
636     protected static String defaultDocumentCharsetByLocale = null;
637     protected static String defaultDocumentCharsetByPlatform = null;
638     // Static initializer for defaultDocumentCharset
639     static {
640         Locale locale = Locale.getDefault();
641         // in order to support backward compatiblity
642         if (locale != null) {
643             defaultDocumentCharsetByLocale =
644                 LocaleToCharsetMap.getCharset(locale);
645             // set the default document charset
646             defaultDocumentCharset = defaultDocumentCharsetByLocale;
647         }
648         // in order to support platform encoding
649         try {
650             defaultDocumentCharsetByPlatform = System.getProperty("file.encoding");
651         } catch (SecurityException ignore) {
652         }
653         if (defaultDocumentCharset == null) {
654             // set the default document charset
655             defaultDocumentCharset = defaultDocumentCharsetByPlatform;
656         }
657     }
658 
659 
660     /***
661      * The scheme.
662      */
663     protected char[] _scheme = null;
664 
665 
666     /***
667      * The opaque.
668      */
669     protected char[] _opaque = null;
670 
671 
672     /***
673      * The authority.
674      */
675     protected char[] _authority = null;
676 
677 
678     /***
679      * The userinfo.
680      */
681     protected char[] _userinfo = null;
682 
683 
684     /***
685      * The host.
686      */
687     protected char[] _host = null;
688 
689 
690     /***
691      * The port.
692      */
693     protected int _port = -1;
694 
695 
696     /***
697      * The path.
698      */
699     protected char[] _path = null;
700 
701 
702     /***
703      * The query.
704      */
705     protected char[] _query = null;
706 
707 
708     /***
709      * The fragment.
710      */
711     protected char[] _fragment = null;
712 
713 
714     /***
715      * The root path.
716      */
717     protected static final char[] rootPath = { '/' };
718 
719     // ---------------------- Generous characters for each component validation
720 
721     /***
722      * The percent "%" character always has the reserved purpose of being the
723      * escape indicator, it must be escaped as "%25" in order to be used as
724      * data within a URI.
725      */
726     protected static final BitSet percent = new BitSet(256);
727     // Static initializer for percent
728     static {
729         percent.set('%');
730     }
731 
732 
733     /***
734      * BitSet for digit.
735      * <p><blockquote><pre>
736      * digit    = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
737      *            "8" | "9"
738      * </pre></blockquote><p>
739      */
740     protected static final BitSet digit = new BitSet(256);
741     // Static initializer for digit
742     static {
743         for (int i = '0'; i <= '9'; i++) {
744             digit.set(i);
745         }
746     }
747 
748 
749     /***
750      * BitSet for alpha.
751      * <p><blockquote><pre>
752      * alpha         = lowalpha | upalpha
753      * </pre></blockquote><p>
754      */
755     protected static final BitSet alpha = new BitSet(256);
756     // Static initializer for alpha
757     static {
758         for (int i = 'a'; i <= 'z'; i++) {
759             alpha.set(i);
760         }
761         for (int i = 'A'; i <= 'Z'; i++) {
762             alpha.set(i);
763         }
764     }
765 
766 
767     /***
768      * BitSet for alphanum (join of alpha &amp; digit).
769      * <p><blockquote><pre>
770      *  alphanum      = alpha | digit
771      * </pre></blockquote><p>
772      */
773     protected static final BitSet alphanum = new BitSet(256);
774     // Static initializer for alphanum
775     static {
776         alphanum.or(alpha);
777         alphanum.or(digit);
778     }
779 
780 
781     /***
782      * BitSet for hex.
783      * <p><blockquote><pre>
784      * hex           = digit | "A" | "B" | "C" | "D" | "E" | "F" |
785      *                         "a" | "b" | "c" | "d" | "e" | "f"
786      * </pre></blockquote><p>
787      */
788     protected static final BitSet hex = new BitSet(256);
789     // Static initializer for hex
790     static {
791         hex.or(digit);
792         for (int i = 'a'; i <= 'f'; i++) {
793             hex.set(i);
794         }
795         for (int i = 'A'; i <= 'F'; i++) {
796             hex.set(i);
797         }
798     }
799 
800 
801     /***
802      * BitSet for escaped.
803      * <p><blockquote><pre>
804      * escaped       = "%" hex hex
805      * </pre></blockquote><p>
806      */
807     protected static final BitSet escaped = new BitSet(256);
808     // Static initializer for escaped
809     static {
810         escaped.or(percent);
811         escaped.or(hex);
812     }
813 
814 
815     /***
816      * BitSet for mark.
817      * <p><blockquote><pre>
818      * mark          = "-" | "_" | "." | "!" | "~" | "*" | "'" |
819      *                 "(" | ")"
820      * </pre></blockquote><p>
821      */
822     protected static final BitSet mark = new BitSet(256);
823     // Static initializer for mark
824     static {
825         mark.set('-');
826         mark.set('_');
827         mark.set('.');
828         mark.set('!');
829         mark.set('~');
830         mark.set('*');
831         mark.set('\'');
832         mark.set('(');
833         mark.set(')');
834     }
835 
836 
837     /***
838      * Data characters that are allowed in a URI but do not have a reserved
839      * purpose are called unreserved.
840      * <p><blockquote><pre>
841      * unreserved    = alphanum | mark
842      * </pre></blockquote><p>
843      */
844     protected static final BitSet unreserved = new BitSet(256);
845     // Static initializer for unreserved
846     static {
847         unreserved.or(alphanum);
848         unreserved.or(mark);
849     }
850 
851 
852     /***
853      * BitSet for reserved.
854      * <p><blockquote><pre>
855      * reserved      = ";" | "/" | "?" | ":" | "@" | "&amp;" | "=" | "+" |
856      *                 "$" | ","
857      * </pre></blockquote><p>
858      */
859     protected static final BitSet reserved = new BitSet(256);
860     // Static initializer for reserved
861     static {
862         reserved.set(';');
863         reserved.set('/');
864         reserved.set('?');
865         reserved.set(':');
866         reserved.set('@');
867         reserved.set('&');
868         reserved.set('=');
869         reserved.set('+');
870         reserved.set('$');
871         reserved.set(',');
872     }
873 
874 
875     /***
876      * BitSet for uric.
877      * <p><blockquote><pre>
878      * uric          = reserved | unreserved | escaped
879      * </pre></blockquote><p>
880      */
881     protected static final BitSet uric = new BitSet(256);
882     // Static initializer for uric
883     static {
884         uric.or(reserved);
885         uric.or(unreserved);
886         uric.or(escaped);
887     }
888 
889 
890     /***
891      * BitSet for fragment (alias for uric).
892      * <p><blockquote><pre>
893      * fragment      = *uric
894      * </pre></blockquote><p>
895      */
896     protected static final BitSet fragment = uric;
897 
898 
899     /***
900      * BitSet for query (alias for uric).
901      * <p><blockquote><pre>
902      * query         = *uric
903      * </pre></blockquote><p>
904      */
905     protected static final BitSet query = uric;
906 
907 
908     /***
909      * BitSet for pchar.
910      * <p><blockquote><pre>
911      * pchar         = unreserved | escaped |
912      *                 ":" | "@" | "&amp;" | "=" | "+" | "$" | ","
913      * </pre></blockquote><p>
914      */
915     protected static final BitSet pchar = new BitSet(256);
916     // Static initializer for pchar
917     static {
918         pchar.or(unreserved);
919         pchar.or(escaped);
920         pchar.set(':');
921         pchar.set('@');
922         pchar.set('&');
923         pchar.set('=');
924         pchar.set('+');
925         pchar.set('$');
926         pchar.set(',');
927     }
928 
929 
930     /***
931      * BitSet for param (alias for pchar).
932      * <p><blockquote><pre>
933      * param         = *pchar
934      * </pre></blockquote><p>
935      */
936     protected static final BitSet param = pchar;
937 
938 
939     /***
940      * BitSet for segment.
941      * <p><blockquote><pre>
942      * segment       = *pchar *( ";" param )
943      * </pre></blockquote><p>
944      */
945     protected static final BitSet segment = new BitSet(256);
946     // Static initializer for segment
947     static {
948         segment.or(pchar);
949         segment.set(';');
950         segment.or(param);
951     }
952 
953 
954     /***
955      * BitSet for path segments.
956      * <p><blockquote><pre>
957      * path_segments = segment *( "/" segment )
958      * </pre></blockquote><p>
959      */
960     protected static final BitSet path_segments = new BitSet(256);
961     // Static initializer for path_segments
962     static {
963         path_segments.set('/');
964         path_segments.or(segment);
965     }
966 
967 
968     /***
969      * URI absolute path.
970      * <p><blockquote><pre>
971      * abs_path      = "/"  path_segments
972      * </pre></blockquote><p>
973      */
974     protected static final BitSet abs_path = new BitSet(256);
975     // Static initializer for abs_path
976     static {
977         abs_path.set('/');
978         abs_path.or(path_segments);
979     }
980 
981 
982     /***
983      * URI bitset for encoding typical non-slash characters.
984      * <p><blockquote><pre>
985      * uric_no_slash = unreserved | escaped | ";" | "?" | ":" | "@" |
986      *                 "&amp;" | "=" | "+" | "$" | ","
987      * </pre></blockquote><p>
988      */
989     protected static final BitSet uric_no_slash = new BitSet(256);
990     // Static initializer for uric_no_slash
991     static {
992         uric_no_slash.or(unreserved);
993         uric_no_slash.or(escaped);
994         uric_no_slash.set(';');
995         uric_no_slash.set('?');
996         uric_no_slash.set(';');
997         uric_no_slash.set('@');
998         uric_no_slash.set('&');
999         uric_no_slash.set('=');
1000         uric_no_slash.set('+');
1001         uric_no_slash.set('$');
1002         uric_no_slash.set(',');
1003     }
1004     
1005 
1006     /***
1007      * URI bitset that combines uric_no_slash and uric.
1008      * <p><blockquote><pre>
1009      * opaque_part   = uric_no_slash *uric
1010      * </pre></blockquote><p>
1011      */
1012     protected static final BitSet opaque_part = new BitSet(256);
1013     // Static initializer for opaque_part
1014     static {
1015         // it's generous. because first character must not include a slash
1016         opaque_part.or(uric_no_slash);
1017         opaque_part.or(uric);
1018     }
1019     
1020 
1021     /***
1022      * URI bitset that combines absolute path and opaque part.
1023      * <p><blockquote><pre>
1024      * path          = [ abs_path | opaque_part ]
1025      * </pre></blockquote><p>
1026      */
1027     protected static final BitSet path = new BitSet(256);
1028     // Static initializer for path
1029     static {
1030         path.or(abs_path);
1031         path.or(opaque_part);
1032     }
1033 
1034 
1035     /***
1036      * Port, a logical alias for digit.
1037      */
1038     protected static final BitSet port = digit;
1039 
1040 
1041     /***
1042      * Bitset that combines digit and dot fo IPv$address.
1043      * <p><blockquote><pre>
1044      * IPv4address   = 1*digit "." 1*digit "." 1*digit "." 1*digit
1045      * </pre></blockquote><p>
1046      */
1047     protected static final BitSet IPv4address = new BitSet(256);
1048     // Static initializer for IPv4address
1049     static {
1050         IPv4address.or(digit);
1051         IPv4address.set('.');
1052     }
1053 
1054 
1055     /***
1056      * RFC 2373.
1057      * <p><blockquote><pre>
1058      * IPv6address = hexpart [ ":" IPv4address ]
1059      * </pre></blockquote><p>
1060      */
1061     protected static final BitSet IPv6address = new BitSet(256);
1062     // Static initializer for IPv6address reference
1063     static {
1064         IPv6address.or(hex); // hexpart
1065         IPv6address.set(':');
1066         IPv6address.or(IPv4address);
1067     }
1068 
1069 
1070     /***
1071      * RFC 2732, 2373.
1072      * <p><blockquote><pre>
1073      * IPv6reference   = "[" IPv6address "]"
1074      * </pre></blockquote><p>
1075      */
1076     protected static final BitSet IPv6reference = new BitSet(256);
1077     // Static initializer for IPv6reference
1078     static {
1079         IPv6reference.set('[');
1080         IPv6reference.or(IPv6address);
1081         IPv6reference.set(']');
1082     }
1083 
1084 
1085     /***
1086      * BitSet for toplabel.
1087      * <p><blockquote><pre>
1088      * toplabel      = alpha | alpha *( alphanum | "-" ) alphanum
1089      * </pre></blockquote><p>
1090      */
1091     protected static final BitSet toplabel = new BitSet(256);
1092     // Static initializer for toplabel
1093     static {
1094         toplabel.or(alphanum);
1095         toplabel.set('-');
1096     }
1097 
1098 
1099     /***
1100      * BitSet for domainlabel.
1101      * <p><blockquote><pre>
1102      * domainlabel   = alphanum | alphanum *( alphanum | "-" ) alphanum
1103      * </pre></blockquote><p>
1104      */
1105     protected static final BitSet domainlabel = toplabel;
1106 
1107 
1108     /***
1109      * BitSet for hostname.
1110      * <p><blockquote><pre>
1111      * hostname      = *( domainlabel "." ) toplabel [ "." ]
1112      * </pre></blockquote><p>
1113      */
1114     protected static final BitSet hostname = new BitSet(256);
1115     // Static initializer for hostname
1116     static {
1117         hostname.or(toplabel);
1118         // hostname.or(domainlabel);
1119         hostname.set('.');
1120     }
1121 
1122 
1123     /***
1124      * BitSet for host.
1125      * <p><blockquote><pre>
1126      * host          = hostname | IPv4address | IPv6reference
1127      * </pre></blockquote><p>
1128      */
1129     protected static final BitSet host = new BitSet(256);
1130     // Static initializer for host
1131     static {
1132         host.or(hostname);
1133         // host.or(IPv4address);
1134         host.or(IPv6reference); // IPv4address
1135     }
1136 
1137 
1138     /***
1139      * BitSet for hostport.
1140      * <p><blockquote><pre>
1141      * hostport      = host [ ":" port ]
1142      * </pre></blockquote><p>
1143      */
1144     protected static final BitSet hostport = new BitSet(256);
1145     // Static initializer for hostport
1146     static {
1147         hostport.or(host);
1148         hostport.set(':');
1149         hostport.or(port);
1150     }
1151 
1152 
1153     /***
1154      * Bitset for userinfo.
1155      * <p><blockquote><pre>
1156      * userinfo      = *( unreserved | escaped |
1157      *                    ";" | ":" | "&amp;" | "=" | "+" | "$" | "," )
1158      * </pre></blockquote><p>
1159      */
1160     protected static final BitSet userinfo = new BitSet(256);
1161     // Static initializer for userinfo
1162     static {
1163         userinfo.or(unreserved);
1164         userinfo.or(escaped);
1165         userinfo.set(';');
1166         userinfo.set(':');
1167         userinfo.set('&');
1168         userinfo.set('=');
1169         userinfo.set('+');
1170         userinfo.set('$');
1171         userinfo.set(',');
1172     }
1173 
1174 
1175     /***
1176      * BitSet for within the userinfo component like user and password.
1177      */
1178     public static final BitSet within_userinfo = new BitSet(256);
1179     // Static initializer for within_userinfo
1180     static {
1181         within_userinfo.or(userinfo);
1182         within_userinfo.clear(';'); // reserved within authority
1183         within_userinfo.clear(':');
1184         within_userinfo.clear('@');
1185         within_userinfo.clear('?');
1186         within_userinfo.clear('/');
1187     }
1188 
1189 
1190     /***
1191      * Bitset for server.
1192      * <p><blockquote><pre>
1193      * server        = [ [ userinfo "@" ] hostport ]
1194      * </pre></blockquote><p>
1195      */
1196     protected static final BitSet server = new BitSet(256);
1197     // Static initializer for server
1198     static {
1199         server.or(userinfo);
1200         server.set('@');
1201         server.or(hostport);
1202     }
1203 
1204 
1205     /***
1206      * BitSet for reg_name.
1207      * <p><blockquote><pre>
1208      * reg_name      = 1*( unreserved | escaped | "$" | "," |
1209      *                     ";" | ":" | "@" | "&amp;" | "=" | "+" )
1210      * </pre></blockquote><p>
1211      */
1212     protected static final BitSet reg_name = new BitSet(256);
1213     // Static initializer for reg_name
1214     static {
1215         reg_name.or(unreserved);
1216         reg_name.or(escaped);
1217         reg_name.set('$');
1218         reg_name.set(',');
1219         reg_name.set(';');
1220         reg_name.set(':');
1221         reg_name.set('@');
1222         reg_name.set('&');
1223         reg_name.set('=');
1224         reg_name.set('+');
1225     }
1226 
1227 
1228     /***
1229      * BitSet for authority.
1230      * <p><blockquote><pre>
1231      * authority     = server | reg_name
1232      * </pre></blockquote><p>
1233      */
1234     protected static final BitSet authority = new BitSet(256);
1235     // Static initializer for authority
1236     static {
1237         authority.or(server);
1238         authority.or(reg_name);
1239     }
1240 
1241 
1242     /***
1243      * BitSet for scheme.
1244      * <p><blockquote><pre>
1245      * scheme        = alpha *( alpha | digit | "+" | "-" | "." )
1246      * </pre></blockquote><p>
1247      */
1248     protected static final BitSet scheme = new BitSet(256);
1249     // Static initializer for scheme
1250     static {
1251         scheme.or(alpha);
1252         scheme.or(digit);
1253         scheme.set('+');
1254         scheme.set('-');
1255         scheme.set('.');
1256     }
1257 
1258 
1259     /***
1260      * BitSet for rel_segment.
1261      * <p><blockquote><pre>
1262      * rel_segment   = 1*( unreserved | escaped |
1263      *                     ";" | "@" | "&amp;" | "=" | "+" | "$" | "," )
1264      * </pre></blockquote><p>
1265      */
1266     protected static final BitSet rel_segment = new BitSet(256);
1267     // Static initializer for rel_segment
1268     static {
1269         rel_segment.or(unreserved);
1270         rel_segment.or(escaped);
1271         rel_segment.set(';');
1272         rel_segment.set('@');
1273         rel_segment.set('&');
1274         rel_segment.set('=');
1275         rel_segment.set('+');
1276         rel_segment.set('$');
1277         rel_segment.set(',');
1278     }
1279 
1280 
1281     /***
1282      * BitSet for rel_path.
1283      * <p><blockquote><pre>
1284      * rel_path      = rel_segment [ abs_path ]
1285      * </pre></blockquote><p>
1286      */
1287     protected static final BitSet rel_path = new BitSet(256);
1288     // Static initializer for rel_path
1289     static {
1290         rel_path.or(rel_segment);
1291         rel_path.or(abs_path);
1292     }
1293 
1294 
1295     /***
1296      * BitSet for net_path.
1297      * <p><blockquote><pre>
1298      * net_path      = "//" authority [ abs_path ]
1299      * </pre></blockquote><p>
1300      */
1301     protected static final BitSet net_path = new BitSet(256);
1302     // Static initializer for net_path
1303     static {
1304         net_path.set('/');
1305         net_path.or(authority);
1306         net_path.or(abs_path);
1307     }
1308     
1309 
1310     /***
1311      * BitSet for hier_part.
1312      * <p><blockquote><pre>
1313      * hier_part     = ( net_path | abs_path ) [ "?" query ]
1314      * </pre></blockquote><p>
1315      */
1316     protected static final BitSet hier_part = new BitSet(256);
1317     // Static initializer for hier_part
1318     static {
1319         hier_part.or(net_path);
1320         hier_part.or(abs_path);
1321         // hier_part.set('?'); aleady included
1322         hier_part.or(query);
1323     }
1324 
1325 
1326     /***
1327      * BitSet for relativeURI.
1328      * <p><blockquote><pre>
1329      * relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]
1330      * </pre></blockquote><p>
1331      */
1332     protected static final BitSet relativeURI = new BitSet(256);
1333     // Static initializer for relativeURI
1334     static {
1335         relativeURI.or(net_path);
1336         relativeURI.or(abs_path);
1337         relativeURI.or(rel_path);
1338         // relativeURI.set('?'); aleady included
1339         relativeURI.or(query);
1340     }
1341 
1342 
1343     /***
1344      * BitSet for absoluteURI.
1345      * <p><blockquote><pre>
1346      * absoluteURI   = scheme ":" ( hier_part | opaque_part )
1347      * </pre></blockquote><p>
1348      */
1349     protected static final BitSet absoluteURI = new BitSet(256);
1350     // Static initializer for absoluteURI
1351     static {
1352         absoluteURI.or(scheme);
1353         absoluteURI.set(':');
1354         absoluteURI.or(hier_part);
1355         absoluteURI.or(opaque_part);
1356     }
1357 
1358 
1359     /***
1360      * BitSet for URI-reference.
1361      * <p><blockquote><pre>
1362      * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1363      * </pre></blockquote><p>
1364      */
1365     protected static final BitSet URI_reference = new BitSet(256);
1366     // Static initializer for URI_reference
1367     static {
1368         URI_reference.or(absoluteURI);
1369         URI_reference.or(relativeURI);
1370         URI_reference.set('#');
1371         URI_reference.or(fragment);
1372     }
1373 
1374     // ---------------------------- Characters disallowed within the URI syntax
1375     // Excluded US-ASCII Characters are like control, space, delims and unwise
1376 
1377     /***
1378      * BitSet for control.
1379      */
1380     public static final BitSet control = new BitSet(256);
1381     // Static initializer for control
1382     static {
1383         for (int i = 0; i <= 0x1F; i++) {
1384             control.set(i);
1385         }
1386         control.set(0x7F);
1387     }
1388 
1389     /***
1390      * BitSet for space.
1391      */
1392     public static final BitSet space = new BitSet(256);
1393     // Static initializer for space
1394     static {
1395         space.set(0x20);
1396     }
1397 
1398 
1399     /***
1400      * BitSet for delims.
1401      */
1402     public static final BitSet delims = new BitSet(256);
1403     // Static initializer for delims
1404     static {
1405         delims.set('<');
1406         delims.set('>');
1407         delims.set('#');
1408         delims.set('%');
1409         delims.set('"');
1410     }
1411 
1412 
1413     /***
1414      * BitSet for unwise.
1415      */
1416     public static final BitSet unwise = new BitSet(256);
1417     // Static initializer for unwise
1418     static {
1419         unwise.set('{');
1420         unwise.set('}');
1421         unwise.set('|');
1422         unwise.set('//');
1423         unwise.set('^');
1424         unwise.set('[');
1425         unwise.set(']');
1426         unwise.set('`');
1427     }
1428 
1429 
1430     /***
1431      * Disallowed rel_path before escaping.
1432      */
1433     public static final BitSet disallowed_rel_path = new BitSet(256);
1434     // Static initializer for disallowed_rel_path
1435     static {
1436         disallowed_rel_path.or(uric);
1437         disallowed_rel_path.andNot(rel_path);
1438     }
1439 
1440 
1441     /***
1442      * Disallowed opaque_part before escaping.
1443      */
1444     public static final BitSet disallowed_opaque_part = new BitSet(256);
1445     // Static initializer for disallowed_opaque_part
1446     static {
1447         disallowed_opaque_part.or(uric);
1448         disallowed_opaque_part.andNot(opaque_part);
1449     }
1450 
1451     // ----------------------- Characters allowed within and for each component
1452 
1453     /***
1454      * Those characters that are allowed for the authority component.
1455      */
1456     public static final BitSet allowed_authority = new BitSet(256);
1457     // Static initializer for allowed_authority
1458     static {
1459         allowed_authority.or(authority);
1460         allowed_authority.clear('%');
1461     }
1462 
1463 
1464     /***
1465      * Those characters that are allowed for the opaque_part.
1466      */
1467     public static final BitSet allowed_opaque_part = new BitSet(256);
1468     // Static initializer for allowed_opaque_part 
1469     static {
1470         allowed_opaque_part.or(opaque_part);
1471         allowed_opaque_part.clear('%');
1472     }
1473 
1474 
1475     /***
1476      * Those characters that are allowed for the reg_name.
1477      */
1478     public static final BitSet allowed_reg_name = new BitSet(256);
1479     // Static initializer for allowed_reg_name 
1480     static {
1481         allowed_reg_name.or(reg_name);
1482         // allowed_reg_name.andNot(percent);
1483         allowed_reg_name.clear('%');
1484     }
1485 
1486 
1487     /***
1488      * Those characters that are allowed for the userinfo component.
1489      */
1490     public static final BitSet allowed_userinfo = new BitSet(256);
1491     // Static initializer for allowed_userinfo
1492     static {
1493         allowed_userinfo.or(userinfo);
1494         // allowed_userinfo.andNot(percent);
1495         allowed_userinfo.clear('%');
1496     }
1497 
1498 
1499     /***
1500      * Those characters that are allowed for within the userinfo component.
1501      */
1502     public static final BitSet allowed_within_userinfo = new BitSet(256);
1503     // Static initializer for allowed_within_userinfo
1504     static {
1505         allowed_within_userinfo.or(within_userinfo);
1506         allowed_within_userinfo.clear('%');
1507     }
1508 
1509 
1510     /***
1511      * Those characters that are allowed for the IPv6reference component.
1512      * The characters '[', ']' in IPv6reference should be excluded.
1513      */
1514     public static final BitSet allowed_IPv6reference = new BitSet(256);
1515     // Static initializer for allowed_IPv6reference
1516     static {
1517         allowed_IPv6reference.or(IPv6reference);
1518         // allowed_IPv6reference.andNot(unwise);
1519         allowed_IPv6reference.clear('[');
1520         allowed_IPv6reference.clear(']');
1521     }
1522 
1523 
1524     /***
1525      * Those characters that are allowed for the host component.
1526      * The characters '[', ']' in IPv6reference should be excluded.
1527      */
1528     public static final BitSet allowed_host = new BitSet(256);
1529     // Static initializer for allowed_host
1530     static {
1531         allowed_host.or(hostname);
1532         allowed_host.or(allowed_IPv6reference);
1533     }
1534 
1535 
1536     /***
1537      * Those characters that are allowed for the authority component.
1538      */
1539     public static final BitSet allowed_within_authority = new BitSet(256);
1540     // Static initializer for allowed_within_authority
1541     static {
1542         allowed_within_authority.or(server);
1543         allowed_within_authority.or(reg_name);
1544         allowed_within_authority.clear(';');
1545         allowed_within_authority.clear(':');
1546         allowed_within_authority.clear('@');
1547         allowed_within_authority.clear('?');
1548         allowed_within_authority.clear('/');
1549     }
1550 
1551 
1552     /***
1553      * Those characters that are allowed for the abs_path.
1554      */
1555     public static final BitSet allowed_abs_path = new BitSet(256);
1556     // Static initializer for allowed_abs_path
1557     static {
1558         allowed_abs_path.or(abs_path);
1559         // allowed_abs_path.set('/');  // aleady included
1560         allowed_abs_path.andNot(percent);
1561         allowed_abs_path.clear('+');
1562     }
1563 
1564 
1565     /***
1566      * Those characters that are allowed for the rel_path.
1567      */
1568     public static final BitSet allowed_rel_path = new BitSet(256);
1569     // Static initializer for allowed_rel_path
1570     static {
1571         allowed_rel_path.or(rel_path);
1572         allowed_rel_path.clear('%');
1573         allowed_rel_path.clear('+');
1574     }
1575 
1576 
1577     /***
1578      * Those characters that are allowed within the path.
1579      */
1580     public static final BitSet allowed_within_path = new BitSet(256);
1581     // Static initializer for allowed_within_path
1582     static {
1583         allowed_within_path.or(abs_path);
1584         allowed_within_path.clear('/');
1585         allowed_within_path.clear(';');
1586         allowed_within_path.clear('=');
1587         allowed_within_path.clear('?');
1588     }
1589 
1590 
1591     /***
1592      * Those characters that are allowed for the query component.
1593      */
1594     public static final BitSet allowed_query = new BitSet(256);
1595     // Static initializer for allowed_query
1596     static {
1597         allowed_query.or(uric);
1598         allowed_query.clear('%');
1599     }
1600 
1601 
1602     /***
1603      * Those characters that are allowed within the query component.
1604      */
1605     public static final BitSet allowed_within_query = new BitSet(256);
1606     // Static initializer for allowed_within_query
1607     static {
1608         allowed_within_query.or(allowed_query);
1609         allowed_within_query.andNot(reserved); // excluded 'reserved'
1610     }
1611 
1612 
1613     /***
1614      * Those characters that are allowed for the fragment component.
1615      */
1616     public static final BitSet allowed_fragment = new BitSet(256);
1617     // Static initializer for allowed_fragment
1618     static {
1619         allowed_fragment.or(uric);
1620         allowed_fragment.clear('%');
1621     }
1622 
1623     // ------------------------------------------- Flags for this URI-reference
1624 
1625     // TODO: Figure out what all these variables are for and provide javadoc
1626 
1627     // URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
1628     // absoluteURI   = scheme ":" ( hier_part | opaque_part )
1629     protected boolean _is_hier_part;
1630     protected boolean _is_opaque_part;
1631     // relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ] 
1632     // hier_part     = ( net_path | abs_path ) [ "?" query ]
1633     protected boolean _is_net_path;
1634     protected boolean _is_abs_path;
1635     protected boolean _is_rel_path;
1636     // net_path      = "//" authority [ abs_path ] 
1637     // authority     = server | reg_name
1638     protected boolean _is_reg_name;
1639     protected boolean _is_server;  // = _has_server
1640     // server        = [ [ userinfo "@" ] hostport ]
1641     // host          = hostname | IPv4address | IPv6reference
1642     protected boolean _is_hostname;
1643     protected boolean _is_IPv4address;
1644     protected boolean _is_IPv6reference;
1645 
1646     // ------------------------------------------ Character and escape encoding
1647     
1648     /***
1649      * Encodes URI string.
1650      *
1651      * This is a two mapping, one from original characters to octets, and
1652      * subsequently a second from octets to URI characters:
1653      * <p><blockquote><pre>
1654      *   original character sequence->octet sequence->URI character sequence
1655      * </pre></blockquote><p>
1656      *
1657      * An escaped octet is encoded as a character triplet, consisting of the
1658      * percent character "%" followed by the two hexadecimal digits
1659      * representing the octet code. For example, "%20" is the escaped
1660      * encoding for the US-ASCII space character.
1661      * <p>
1662      * Conversion from the local filesystem character set to UTF-8 will
1663      * normally involve a two step process. First convert the local character
1664      * set to the UCS; then convert the UCS to UTF-8.
1665      * The first step in the process can be performed by maintaining a mapping
1666      * table that includes the local character set code and the corresponding
1667      * UCS code.
1668      * The next step is to convert the UCS character code to the UTF-8 encoding.
1669      * <p>
1670      * Mapping between vendor codepages can be done in a very similar manner
1671      * as described above.
1672      * <p>
1673      * The only time escape encodings can allowedly be made is when a URI is
1674      * being created from its component parts.  The escape and validate methods
1675      * are internally performed within this method.
1676      *
1677      * @param original the original character sequence
1678      * @param allowed those characters that are allowed within a component
1679      * @param charset the protocol charset
1680      * @return URI character sequence
1681      * @throws URIException null component or unsupported character encoding
1682      */
1683         
1684     protected static char[] encode(String original, BitSet allowed,
1685             String charset) throws URIException {
1686         if (original == null) {
1687             throw new IllegalArgumentException("Original string may not be null");
1688         }
1689         if (allowed == null) {
1690             throw new IllegalArgumentException("Allowed bitset may not be null");
1691         }
1692         byte[] rawdata = URLCodec.encodeUrl(allowed, EncodingUtil.getBytes(original, charset));
1693         return EncodingUtil.getAsciiString(rawdata).toCharArray();
1694     }
1695 
1696     /***
1697      * Decodes URI encoded string.
1698      *
1699      * This is a two mapping, one from URI characters to octets, and
1700      * subsequently a second from octets to original characters:
1701      * <p><blockquote><pre>
1702      *   URI character sequence->octet sequence->original character sequence
1703      * </pre></blockquote><p>
1704      *
1705      * A URI must be separated into its components before the escaped
1706      * characters within those components can be allowedly decoded.
1707      * <p>
1708      * Notice that there is a chance that URI characters that are non UTF-8
1709      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1710      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1711      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1712      * false reading.
1713      * <p>
1714      * The percent "%" character always has the reserved purpose of being
1715      * the escape indicator, it must be escaped as "%25" in order to be used
1716      * as data within a URI.
1717      * <p>
1718      * The unescape method is internally performed within this method.
1719      *
1720      * @param component the URI character sequence
1721      * @param charset the protocol charset
1722      * @return original character sequence
1723      * @throws URIException incomplete trailing escape pattern or unsupported
1724      * character encoding
1725      */
1726     protected static String decode(char[] component, String charset) 
1727         throws URIException {
1728         if (component == null) {
1729             throw new IllegalArgumentException("Component array of chars may not be null");
1730         }
1731         return decode(new String(component), charset);
1732     }
1733 
1734     /***
1735      * Decodes URI encoded string.
1736      *
1737      * This is a two mapping, one from URI characters to octets, and
1738      * subsequently a second from octets to original characters:
1739      * <p><blockquote><pre>
1740      *   URI character sequence->octet sequence->original character sequence
1741      * </pre></blockquote><p>
1742      *
1743      * A URI must be separated into its components before the escaped
1744      * characters within those components can be allowedly decoded.
1745      * <p>
1746      * Notice that there is a chance that URI characters that are non UTF-8
1747      * may be parsed as valid UTF-8.  A recent non-scientific analysis found
1748      * that EUC encoded Japanese words had a 2.7% false reading; SJIS had a
1749      * 0.0005% false reading; other encoding such as ASCII or KOI-8 have a 0%
1750      * false reading.
1751      * <p>
1752      * The percent "%" character always has the reserved purpose of being
1753      * the escape indicator, it must be escaped as "%25" in order to be used
1754      * as data within a URI.
1755      * <p>
1756      * The unescape method is internally performed within this method.
1757      *
1758      * @param component the URI character sequence
1759      * @param charset the protocol charset
1760      * @return original character sequence
1761      * @throws URIException incomplete trailing escape pattern or unsupported
1762      * character encoding
1763      * 
1764      * @since 3.0
1765      */
1766     protected static String decode(String component, String charset) 
1767         throws URIException {
1768         if (component == null) {
1769             throw new IllegalArgumentException("Component array of chars may not be null");
1770         }
1771         byte[] rawdata = null;
1772         try { 
1773             rawdata = URLCodec.decodeUrl(EncodingUtil.getAsciiBytes(component));
1774         } catch (DecoderException e) {
1775             throw new URIException(e.getMessage());
1776         }
1777         return EncodingUtil.getString(rawdata, charset);
1778     }
1779     /***
1780      * Pre-validate the unescaped URI string within a specific component.
1781      *
1782      * @param component the component string within the component
1783      * @param disallowed those characters disallowed within the component
1784      * @return if true, it doesn't have the disallowed characters
1785      * if false, the component is undefined or an incorrect one
1786      */
1787     protected boolean prevalidate(String component, BitSet disallowed) {
1788         // prevalidate the given component by disallowed characters
1789         if (component == null) {
1790             return false; // undefined
1791         }
1792         char[] target = component.toCharArray();
1793         for (int i = 0; i < target.length; i++) {
1794             if (disallowed.get(target[i])) {
1795                 return false;
1796             }
1797         }
1798         return true;
1799     }
1800 
1801 
1802     /***
1803      * Validate the URI characters within a specific component.
1804      * The component must be performed after escape encoding. Or it doesn't
1805      * include escaped characters.
1806      *
1807      * @param component the characters sequence within the component
1808      * @param generous those characters that are allowed within a component
1809      * @return if true, it's the correct URI character sequence
1810      */
1811     protected boolean validate(char[] component, BitSet generous) {
1812         // validate each component by generous characters
1813         return validate(component, 0, -1, generous);
1814     }
1815 
1816 
1817     /***
1818      * Validate the URI characters within a specific component.
1819      * The component must be performed after escape encoding. Or it doesn't
1820      * include escaped characters.
1821      * <p>
1822      * It's not that much strict, generous.  The strict validation might be 
1823      * performed before being called this method.
1824      *
1825      * @param component the characters sequence within the component
1826      * @param soffset the starting offset of the given component
1827      * @param eoffset the ending offset of the given component
1828      * if -1, it means the length of the component
1829      * @param generous those characters that are allowed within a component
1830      * @return if true, it's the correct URI character sequence
1831      */
1832     protected boolean validate(char[] component, int soffset, int eoffset,
1833             BitSet generous) {
1834         // validate each component by generous characters
1835         if (eoffset == -1) {
1836             eoffset = component.length - 1;
1837         }
1838         for (int i = soffset; i <= eoffset; i++) {
1839             if (!generous.get(component[i])) { 
1840                 return false;
1841             }
1842         }
1843         return true;
1844     }
1845 
1846 
1847     /***
1848      * In order to avoid any possilbity of conflict with non-ASCII characters,
1849      * Parse a URI reference as a <code>String</code> with the character
1850      * encoding of the local system or the document.
1851      * <p>
1852      * The following line is the regular expression for breaking-down a URI
1853      * reference into its components.
1854      * <p><blockquote><pre>
1855      *   ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1856      *    12            3  4          5       6  7        8 9
1857      * </pre></blockquote><p>
1858      * For example, matching the above expression to
1859      *   http://jakarta.apache.org/ietf/uri/#Related
1860      * results in the following subexpression matches:
1861      * <p><blockquote><pre>
1862      *               $1 = http:
1863      *  scheme    =  $2 = http
1864      *               $3 = //jakarta.apache.org
1865      *  authority =  $4 = jakarta.apache.org
1866      *  path      =  $5 = /ietf/uri/
1867      *               $6 = <undefined>
1868      *  query     =  $7 = <undefined>
1869      *               $8 = #Related
1870      *  fragment  =  $9 = Related
1871      * </pre></blockquote><p>
1872      *
1873      * @param original the original character sequence
1874      * @param escaped <code>true</code> if <code>original</code> is escaped
1875      * @throws URIException If an error occurs.
1876      */
1877     protected void parseUriReference(String original, boolean escaped)
1878         throws URIException {
1879 
1880         // validate and contruct the URI character sequence
1881         if (original == null) {
1882             throw new URIException("URI-Reference required");
1883         }
1884 
1885         /* @
1886          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1887          */
1888         String tmp = original.trim();
1889         
1890         /*
1891          * The length of the string sequence of characters.
1892          * It may not be equal to the length of the byte array.
1893          */
1894         int length = tmp.length();
1895 
1896         /*
1897          * Remove the delimiters like angle brackets around an URI.
1898          */
1899         if (length > 0) {
1900             char[] firstDelimiter = { tmp.charAt(0) };
1901             if (validate(firstDelimiter, delims)) {
1902                 if (length >= 2) {
1903                     char[] lastDelimiter = { tmp.charAt(length - 1) };
1904                     if (validate(lastDelimiter, delims)) {
1905                         tmp = tmp.substring(1, length - 1);
1906                         length = length - 2;
1907                     }
1908                 }
1909             }
1910         }
1911 
1912         /*
1913          * The starting index
1914          */
1915         int from = 0;
1916 
1917         /*
1918          * The test flag whether the URI is started from the path component.
1919          */
1920         boolean isStartedFromPath = false;
1921         int atColon = tmp.indexOf(':');
1922         int atSlash = tmp.indexOf('/');
1923         if ((atColon <= 0 && !tmp.startsWith("//"))
1924             || (atSlash >= 0 && atSlash < atColon)) {
1925             isStartedFromPath = true;
1926         }
1927 
1928         /*
1929          * <p><blockquote><pre>
1930          *     @@@@@@@@
1931          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1932          * </pre></blockquote><p>
1933          */
1934         int at = indexFirstOf(tmp, isStartedFromPath ? "/?#" : ":/?#", from);
1935         if (at == -1) { 
1936             at = 0;
1937         }
1938 
1939         /*
1940          * Parse the scheme.
1941          * <p><blockquote><pre>
1942          *  scheme    =  $2 = http
1943          *              @
1944          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1945          * </pre></blockquote><p>
1946          */
1947         if (at > 0 && at < length && tmp.charAt(at) == ':') {
1948             char[] target = tmp.substring(0, at).toLowerCase().toCharArray();
1949             if (validate(target, scheme)) {
1950                 _scheme = target;
1951             } else {
1952                 throw new URIException("incorrect scheme");
1953             }
1954             from = ++at;
1955         }
1956 
1957         /*
1958          * Parse the authority component.
1959          * <p><blockquote><pre>
1960          *  authority =  $4 = jakarta.apache.org
1961          *                  @@
1962          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1963          * </pre></blockquote><p>
1964          */
1965         // Reset flags
1966         _is_net_path = _is_abs_path = _is_rel_path = _is_hier_part = false;
1967         if (0 <= at && at < length && tmp.charAt(at) == '/') {
1968             // Set flag
1969             _is_hier_part = true;
1970             if (at + 2 < length && tmp.charAt(at + 1) == '/' 
1971                 && !isStartedFromPath) {
1972                 // the temporary index to start the search from
1973                 int next = indexFirstOf(tmp, "/?#", at + 2);
1974                 if (next == -1) {
1975                     next = (tmp.substring(at + 2).length() == 0) ? at + 2 
1976                         : tmp.length();
1977                 }
1978                 parseAuthority(tmp.substring(at + 2, next), escaped);
1979                 from = at = next;
1980                 // Set flag
1981                 _is_net_path = true;
1982             }
1983             if (from == at) {
1984                 // Set flag
1985                 _is_abs_path = true;
1986             }
1987         }
1988 
1989         /*
1990          * Parse the path component.
1991          * <p><blockquote><pre>
1992          *  path      =  $5 = /ietf/uri/
1993          *                                @@@@@@
1994          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
1995          * </pre></blockquote><p>
1996          */
1997         if (from < length) {
1998             // rel_path = rel_segment [ abs_path ]
1999             int next = indexFirstOf(tmp, "?#", from);
2000             if (next == -1) {
2001                 next = tmp.length();
2002             }
2003             if (!_is_abs_path) {
2004                 if (!escaped 
2005                     && prevalidate(tmp.substring(from, next), disallowed_rel_path) 
2006                     || escaped 
2007                     && validate(tmp.substring(from, next).toCharArray(), rel_path)) {
2008                     // Set flag
2009                     _is_rel_path = true;
2010                 } else if (!escaped 
2011                     && prevalidate(tmp.substring(from, next), disallowed_opaque_part) 
2012                     || escaped 
2013                     && validate(tmp.substring(from, next).toCharArray(), opaque_part)) {
2014                     // Set flag
2015                     _is_opaque_part = true;
2016                 } else {
2017                     // the path component may be empty
2018                     _path = null;
2019                 }
2020             }
2021             String s = tmp.substring(from, next);
2022             if (escaped) {
2023                 setRawPath(s.toCharArray());
2024             } else {
2025                 setPath(s);
2026             }
2027             at = next;
2028         }
2029 
2030         // set the charset to do escape encoding
2031         String charset = getProtocolCharset();
2032 
2033         /*
2034          * Parse the query component.
2035          * <p><blockquote><pre>
2036          *  query     =  $7 = <undefined>
2037          *                                        @@@@@@@@@
2038          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2039          * </pre></blockquote><p>
2040          */
2041         if (0 <= at && at + 1 < length && tmp.charAt(at) == '?') {
2042             int next = tmp.indexOf('#', at + 1);
2043             if (next == -1) {
2044                 next = tmp.length();
2045             }
2046             if (escaped) {
2047                 _query = tmp.substring(at + 1, next).toCharArray();
2048                 if (!validate(_query, uric)) {
2049                     throw new URIException("Invalid query");
2050                 }
2051             } else {
2052                 _query = encode(tmp.substring(at + 1, next), allowed_query, charset);
2053             }
2054             at = next;
2055         }
2056 
2057         /*
2058          * Parse the fragment component.
2059          * <p><blockquote><pre>
2060          *  fragment  =  $9 = Related
2061          *                                                   @@@@@@@@
2062          *  ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2063          * </pre></blockquote><p>
2064          */
2065         if (0 <= at && at + 1 <= length && tmp.charAt(at) == '#') {
2066             if (at + 1 == length) { // empty fragment
2067                 _fragment = "".toCharArray();
2068             } else {
2069                 _fragment = (escaped) ? tmp.substring(at + 1).toCharArray() 
2070                     : encode(tmp.substring(at + 1), allowed_fragment, charset);
2071             }
2072         }
2073 
2074         // set this URI.
2075         setURI();
2076     }
2077 
2078 
2079     /***
2080      * Get the earlier index that to be searched for the first occurrance in
2081      * one of any of the given string.
2082      *
2083      * @param s the string to be indexed
2084      * @param delims the delimiters used to index
2085      * @return the earlier index if there are delimiters
2086      */
2087     protected int indexFirstOf(String s, String delims) {
2088         return indexFirstOf(s, delims, -1);
2089     }
2090 
2091 
2092     /***
2093      * Get the earlier index that to be searched for the first occurrance in
2094      * one of any of the given string.
2095      *
2096      * @param s the string to be indexed
2097      * @param delims the delimiters used to index
2098      * @param offset the from index
2099      * @return the earlier index if there are delimiters
2100      */
2101     protected int indexFirstOf(String s, String delims, int offset) {
2102         if (s == null || s.length() == 0) {
2103             return -1;
2104         }
2105         if (delims == null || delims.length() == 0) {
2106             return -1;
2107         }
2108         // check boundaries
2109         if (offset < 0) {
2110             offset = 0;
2111         } else if (offset > s.length()) {
2112             return -1;
2113         }
2114         // s is never null
2115         int min = s.length();
2116         char[] delim = delims.toCharArray();
2117         for (int i = 0; i < delim.length; i++) {
2118             int at = s.indexOf(delim[i], offset);
2119             if (at >= 0 && at < min) {
2120                 min = at;
2121             }
2122         }
2123         return (min == s.length()) ? -1 : min;
2124     }
2125 
2126 
2127     /***
2128      * Get the earlier index that to be searched for the first occurrance in
2129      * one of any of the given array.
2130      *
2131      * @param s the character array to be indexed
2132      * @param delim the delimiter used to index
2133      * @return the ealier index if there are a delimiter
2134      */
2135     protected int indexFirstOf(char[] s, char delim) {
2136         return indexFirstOf(s, delim, 0);
2137     }
2138 
2139 
2140     /***
2141      * Get the earlier index that to be searched for the first occurrance in
2142      * one of any of the given array.
2143      *
2144      * @param s the character array to be indexed
2145      * @param delim the delimiter used to index
2146      * @param offset The offset.
2147      * @return the ealier index if there is a delimiter
2148      */
2149     protected int indexFirstOf(char[] s, char delim, int offset) {
2150         if (s == null || s.length == 0) {
2151             return -1;
2152         }
2153         // check boundaries
2154         if (offset < 0) {
2155             offset = 0;
2156         } else if (offset > s.length) {
2157             return -1;
2158         }
2159         for (int i = offset; i < s.length; i++) {
2160             if (s[i] == delim) {
2161                 return i;
2162             }
2163         }
2164         return -1;
2165     }
2166 
2167 
2168     /***
2169      * Parse the authority component.
2170      *
2171      * @param original the original character sequence of authority component
2172      * @param escaped <code>true</code> if <code>original</code> is escaped
2173      * @throws URIException If an error occurs.
2174      */
2175     protected void parseAuthority(String original, boolean escaped)
2176         throws URIException {
2177 
2178         // Reset flags
2179         _is_reg_name = _is_server =
2180         _is_hostname = _is_IPv4address = _is_IPv6reference = false;
2181 
2182         // set the charset to do escape encoding
2183         String charset = getProtocolCharset();
2184 
2185         boolean hasPort = true;
2186         int from = 0;
2187         int next = original.indexOf('@');
2188         if (next != -1) { // neither -1 and 0
2189             // each protocol extented from URI supports the specific userinfo
2190             _userinfo = (escaped) ? original.substring(0, next).toCharArray() 
2191                 : encode(original.substring(0, next), allowed_userinfo,
2192                         charset);
2193             from = next + 1;
2194         }
2195         next = original.indexOf('[', from);
2196         if (next >= from) {
2197             next = original.indexOf(']', from);
2198             if (next == -1) {
2199                 throw new URIException(URIException.PARSING, "IPv6reference");
2200             } else {
2201                 next++;
2202             }
2203             // In IPv6reference, '[', ']' should be excluded
2204             _host = (escaped) ? original.substring(from, next).toCharArray() 
2205                 : encode(original.substring(from, next), allowed_IPv6reference,
2206                         charset);
2207             // Set flag
2208             _is_IPv6reference = true;
2209         } else { // only for !_is_IPv6reference
2210             next = original.indexOf(':', from);
2211             if (next == -1) {
2212                 next = original.length();
2213                 hasPort = false;
2214             }
2215             // REMINDME: it doesn't need the pre-validation
2216             _host = original.substring(from, next).toCharArray();
2217             if (validate(_host, IPv4address)) {
2218                 // Set flag
2219                 _is_IPv4address = true;
2220             } else if (validate(_host, hostname)) {
2221                 // Set flag
2222                 _is_hostname = true;
2223             } else {
2224                 // Set flag
2225                 _is_reg_name = true;
2226             }
2227         }
2228         if (_is_reg_name) {
2229             // Reset flags for a server-based naming authority
2230             _is_server = _is_hostname = _is_IPv4address =
2231             _is_IPv6reference = false;
2232             // set a registry-based naming authority
2233             if (escaped) {
2234                 _authority = original.toCharArray();
2235                 if (!validate(_authority, reg_name)) {
2236                     throw new URIException("Invalid authority");
2237                 }
2238             } else {
2239                 _authority = encode(original, allowed_reg_name, charset);
2240             }
2241         } else {
2242             if (original.length() - 1 > next && hasPort 
2243                 && original.charAt(next) == ':') { // not empty
2244                 from = next + 1;
2245                 try {
2246                     _port = Integer.parseInt(original.substring(from));
2247                 } catch (NumberFormatException error) {
2248                     throw new URIException(URIException.PARSING,
2249                             "invalid port number");
2250                 }
2251             }
2252             // set a server-based naming authority
2253             StringBuffer buf = new StringBuffer();
2254             if (_userinfo != null) { // has_userinfo
2255                 buf.append(_userinfo);
2256                 buf.append('@');
2257             }
2258             if (_host != null) {
2259                 buf.append(_host);
2260                 if (_port != -1) {
2261                     buf.append(':');
2262                     buf.append(_port);
2263                 }
2264             }
2265             _authority = buf.toString().toCharArray();
2266             // Set flag
2267             _is_server = true;
2268         }
2269     }
2270 
2271 
2272     /***
2273      * Once it's parsed successfully, set this URI.
2274      *
2275      * @see #getRawURI
2276      */
2277     protected void setURI() {
2278         // set _uri
2279         StringBuffer buf = new StringBuffer();
2280         // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
2281         if (_scheme != null) {
2282             buf.append(_scheme);
2283             buf.append(':');
2284         }
2285         if (_is_net_path) {
2286             buf.append("//");
2287             if (_authority != null) { // has_authority
2288                 buf.append(_authority);
2289             }
2290         }
2291         if (_opaque != null && _is_opaque_part) {
2292             buf.append(_opaque);
2293         } else if (_path != null) {
2294             // _is_hier_part or _is_relativeURI
2295             if (_path.length != 0) {
2296                 buf.append(_path);
2297             }
2298         }
2299         if (_query != null) { // has_query
2300             buf.append('?');
2301             buf.append(_query);
2302         }
2303         // ignore the fragment identifier
2304         _uri = buf.toString().toCharArray();
2305         hash = 0;
2306     }
2307 
2308     // ----------------------------------------------------------- Test methods
2309   
2310 
2311     /***
2312      * Tell whether or not this URI is absolute.
2313      *
2314      * @return true iif this URI is absoluteURI
2315      */
2316     public boolean isAbsoluteURI() {
2317         return (_scheme != null);
2318     }
2319   
2320 
2321     /***
2322      * Tell whether or not this URI is relative.
2323      *
2324      * @return true iif this URI is relativeURI
2325      */
2326     public boolean isRelativeURI() {
2327         return (_scheme == null);
2328     }
2329 
2330 
2331     /***
2332      * Tell whether or not the absoluteURI of this URI is hier_part.
2333      *
2334      * @return true iif the absoluteURI is hier_part
2335      */
2336     public boolean isHierPart() {
2337         return _is_hier_part;
2338     }
2339 
2340 
2341     /***
2342      * Tell whether or not the absoluteURI of this URI is opaque_part.
2343      *
2344      * @return true iif the absoluteURI is opaque_part
2345      */
2346     public boolean isOpaquePart() {
2347         return _is_opaque_part;
2348     }
2349 
2350 
2351     /***
2352      * Tell whether or not the relativeURI or heir_part of this URI is net_path.
2353      * It's the same function as the has_authority() method.
2354      *
2355      * @return true iif the relativeURI or heir_part is net_path
2356      * @see #hasAuthority
2357      */
2358     public boolean isNetPath() {
2359         return _is_net_path || (_authority != null);
2360     }
2361 
2362 
2363     /***
2364      * Tell whether or not the relativeURI or hier_part of this URI is abs_path.
2365      *
2366      * @return true iif the relativeURI or hier_part is abs_path
2367      */
2368     public boolean isAbsPath() {
2369         return _is_abs_path;
2370     }
2371 
2372 
2373     /***
2374      * Tell whether or not the relativeURI of this URI is rel_path.
2375      *
2376      * @return true iif the relativeURI is rel_path
2377      */
2378     public boolean isRelPath() {
2379         return _is_rel_path;
2380     }
2381 
2382 
2383     /***
2384      * Tell whether or not this URI has authority.
2385      * It's the same function as the is_net_path() method.
2386      *
2387      * @return true iif this URI has authority
2388      * @see #isNetPath
2389      */
2390     public boolean hasAuthority() {
2391         return (_authority != null) || _is_net_path;
2392     }
2393 
2394     /***
2395      * Tell whether or not the authority component of this URI is reg_name.
2396      *
2397      * @return true iif the authority component is reg_name
2398      */
2399     public boolean isRegName() {
2400         return _is_reg_name;
2401     }
2402   
2403 
2404     /***
2405      * Tell whether or not the authority component of this URI is server.
2406      *
2407      * @return true iif the authority component is server
2408      */
2409     public boolean isServer() {
2410         return _is_server;
2411     }
2412   
2413 
2414     /***
2415      * Tell whether or not this URI has userinfo.
2416      *
2417      * @return true iif this URI has userinfo
2418      */
2419     public boolean hasUserinfo() {
2420         return (_userinfo != null);
2421     }
2422   
2423 
2424     /***
2425      * Tell whether or not the host part of this URI is hostname.
2426      *
2427      * @return true iif the host part is hostname
2428      */
2429     public boolean isHostname() {
2430         return _is_hostname;
2431     }
2432 
2433 
2434     /***
2435      * Tell whether or not the host part of this URI is IPv4address.
2436      *
2437      * @return true iif the host part is IPv4address
2438      */
2439     public boolean isIPv4address() {
2440         return _is_IPv4address;
2441     }
2442 
2443 
2444     /***
2445      * Tell whether or not the host part of this URI is IPv6reference.
2446      *
2447      * @return true iif the host part is IPv6reference
2448      */
2449     public boolean isIPv6reference() {
2450         return _is_IPv6reference;
2451     }
2452 
2453 
2454     /***
2455      * Tell whether or not this URI has query.
2456      *
2457      * @return true iif this URI has query
2458      */
2459     public boolean hasQuery() {
2460         return (_query != null);
2461     }
2462    
2463 
2464     /***
2465      * Tell whether or not this URI has fragment.
2466      *
2467      * @return true iif this URI has fragment
2468      */
2469     public boolean hasFragment() {
2470         return (_fragment != null);
2471     }
2472    
2473    
2474     // ---------------------------------------------------------------- Charset
2475 
2476 
2477     /***
2478      * Set the default charset of the protocol.
2479      * <p>
2480      * The character set used to store files SHALL remain a local decision and
2481      * MAY depend on the capability of local operating systems. Prior to the
2482      * exchange of URIs they SHOULD be converted into a ISO/IEC 10646 format
2483      * and UTF-8 encoded. This approach, while allowing international exchange
2484      * of URIs, will still allow backward compatibility with older systems
2485      * because the code set positions for ASCII characters are identical to the
2486      * one byte sequence in UTF-8.
2487      * <p>
2488      * An individual URI scheme may require a single charset, define a default
2489      * charset, or provide a way to indicate the charset used.
2490      *
2491      * <p>
2492      * Always all the time, the setter method is always succeeded and throws
2493      * <code>DefaultCharsetChanged</code> exception.
2494      *
2495      * So API programmer must follow the following way:
2496      * <code><pre>
2497      *  import org.apache.util.URI$DefaultCharsetChanged;
2498      *      .
2499      *      .
2500      *      .
2501      *  try {
2502      *      URI.setDefaultProtocolCharset("UTF-8");
2503      *  } catch (DefaultCharsetChanged cc) {
2504      *      // CASE 1: the exception could be ignored, when it is set by user
2505      *      if (cc.getReasonCode() == DefaultCharsetChanged.PROTOCOL_CHARSET) {
2506      *      // CASE 2: let user know the default protocol charset changed
2507      *      } else {
2508      *      // CASE 2: let user know the default document charset changed
2509      *      }
2510      *  }
2511      *  </pre></code>
2512      *
2513      * The API programmer is responsible to set the correct charset.
2514      * And each application should remember its own charset to support.
2515      *
2516      * @param charset the default charset for each protocol
2517      * @throws DefaultCharsetChanged default charset changed
2518      */
2519     public static void setDefaultProtocolCharset(String charset) 
2520         throws DefaultCharsetChanged {
2521             
2522         defaultProtocolCharset = charset;
2523         throw new DefaultCharsetChanged(DefaultCharsetChanged.PROTOCOL_CHARSET,
2524                 "the default protocol charset changed");
2525     }
2526 
2527 
2528     /***
2529      * Get the default charset of the protocol.
2530      * <p>
2531      * An individual URI scheme may require a single charset, define a default
2532      * charset, or provide a way to indicate the charset used.
2533      * <p>
2534      * To work globally either requires support of a number of character sets
2535      * and to be able to convert between them, or the use of a single preferred
2536      * character set.
2537      * For support of global compatibility it is STRONGLY RECOMMENDED that
2538      * clients and servers use UTF-8 encoding when exchanging URIs.
2539      *
2540      * @return the default charset string
2541      */
2542     public static String getDefaultProtocolCharset() {
2543         return defaultProtocolCharset;
2544     }
2545 
2546 
2547     /***
2548      * Get the protocol charset used by this current URI instance.
2549      * It was set by the constructor for this instance. If it was not set by
2550      * contructor, it will return the default protocol charset.
2551      *
2552      * @return the protocol charset string
2553      * @see #getDefaultProtocolCharset
2554      */
2555     public String getProtocolCharset() {
2556         return (protocolCharset != null) 
2557             ? protocolCharset 
2558             : defaultProtocolCharset;
2559     }
2560 
2561 
2562     /***
2563      * Set the default charset of the document.
2564      * <p>
2565      * Notice that it will be possible to contain mixed characters (e.g.
2566      * ftp://host/KoreanNamespace/ChineseResource). To handle the Bi-directional
2567      * display of these character sets, the protocol charset could be simply
2568      * used again. Because it's not yet implemented that the insertion of BIDI
2569      * control characters at different points during composition is extracted.
2570      * <p>
2571      *
2572      * Always all the time, the setter method is always succeeded and throws
2573      * <code>DefaultCharsetChanged</code> exception.
2574      *
2575      * So API programmer must follow the following way:
2576      * <code><pre>
2577      *  import org.apache.util.URI$DefaultCharsetChanged;
2578      *      .
2579      *      .
2580      *      .
2581      *  try {
2582      *      URI.setDefaultDocumentCharset("EUC-KR");
2583      *  } catch (DefaultCharsetChanged cc) {
2584      *      // CASE 1: the exception could be ignored, when it is set by user
2585      *      if (cc.getReasonCode() == DefaultCharsetChanged.DOCUMENT_CHARSET) {
2586      *      // CASE 2: let user know the default document charset changed
2587      *      } else {
2588      *      // CASE 2: let user know the default protocol charset changed
2589      *      }
2590      *  }
2591      *  </pre></code>
2592      *
2593      * The API programmer is responsible to set the correct charset.
2594      * And each application should remember its own charset to support.
2595      *
2596      * @param charset the default charset for the document
2597      * @throws DefaultCharsetChanged default charset changed
2598      */
2599     public static void setDefaultDocumentCharset(String charset) 
2600         throws DefaultCharsetChanged {
2601             
2602         defaultDocumentCharset = charset;
2603         throw new DefaultCharsetChanged(DefaultCharsetChanged.DOCUMENT_CHARSET,
2604                 "the default document charset changed");
2605     }
2606 
2607 
2608     /***
2609      * Get the recommended default charset of the document.
2610      *
2611      * @return the default charset string
2612      */
2613     public static String getDefaultDocumentCharset() {
2614         return defaultDocumentCharset;
2615     }
2616 
2617 
2618     /***
2619      * Get the default charset of the document by locale.
2620      *
2621      * @return the default charset string by locale
2622      */
2623     public static String getDefaultDocumentCharsetByLocale() {
2624         return defaultDocumentCharsetByLocale;
2625     }
2626 
2627 
2628     /***
2629      * Get the default charset of the document by platform.
2630      *
2631      * @return the default charset string by platform
2632      */
2633     public static String getDefaultDocumentCharsetByPlatform() {
2634         return defaultDocumentCharsetByPlatform;
2635     }
2636 
2637     // ------------------------------------------------------------- The scheme
2638 
2639     /***
2640      * Get the scheme.
2641      *
2642      * @return the scheme
2643      */
2644     public char[] getRawScheme() {
2645         return _scheme;
2646     }
2647 
2648 
2649     /***
2650      * Get the scheme.
2651      *
2652      * @return the scheme
2653      * null if undefined scheme
2654      */
2655     public String getScheme() {
2656         return (_scheme == null) ? null : new String(_scheme);
2657     }
2658 
2659     // ---------------------------------------------------------- The authority
2660 
2661     /***
2662      * Set the authority.  It can be one type of server, hostport, hostname,
2663      * IPv4address, IPv6reference and reg_name.
2664      * <p><blockquote><pre>
2665      *   authority     = server | reg_name
2666      * </pre></blockquote><p>
2667      *
2668      * @param escapedAuthority the raw escaped authority
2669      * @throws URIException If {@link 
2670      * #parseAuthority(java.lang.String,boolean)} fails
2671      * @throws NullPointerException null authority
2672      */
2673     public void setRawAuthority(char[] escapedAuthority) 
2674         throws URIException, NullPointerException {
2675             
2676         parseAuthority(new String(escapedAuthority), true);
2677         setURI();
2678     }
2679 
2680 
2681     /***
2682      * Set the authority.  It can be one type of server, hostport, hostname,
2683      * IPv4address, IPv6reference and reg_name.
2684      * Note that there is no setAuthority method by the escape encoding reason.
2685      *
2686      * @param escapedAuthority the escaped authority string
2687      * @throws URIException If {@link 
2688      * #parseAuthority(java.lang.String,boolean)} fails
2689      */
2690     public void setEscapedAuthority(String escapedAuthority)
2691         throws URIException {
2692 
2693         parseAuthority(escapedAuthority, true);
2694         setURI();
2695     }
2696 
2697 
2698     /***
2699      * Get the raw-escaped authority.
2700      *
2701      * @return the raw-escaped authority
2702      */
2703     public char[] getRawAuthority() {
2704         return _authority;
2705     }
2706 
2707 
2708     /***
2709      * Get the escaped authority.
2710      *
2711      * @return the escaped authority
2712      */
2713     public String getEscapedAuthority() {
2714         return (_authority == null) ? null : new String(_authority);
2715     }
2716 
2717 
2718     /***
2719      * Get the authority.
2720      *
2721      * @return the authority
2722      * @throws URIException If {@link #decode} fails
2723      */
2724     public String getAuthority() throws URIException {
2725         return (_authority == null) ? null : decode(_authority,
2726                 getProtocolCharset());
2727     }
2728 
2729     // ----------------------------------------------------------- The userinfo
2730 
2731     /***
2732      * Get the raw-escaped userinfo.
2733      *
2734      * @return the raw-escaped userinfo
2735      * @see #getAuthority
2736      */
2737     public char[] getRawUserinfo() {
2738         return _userinfo;
2739     }
2740 
2741 
2742     /***
2743      * Get the escaped userinfo.
2744      *
2745      * @return the escaped userinfo
2746      * @see #getAuthority
2747      */
2748     public String getEscapedUserinfo() {
2749         return (_userinfo == null) ? null : new String(_userinfo);
2750     }
2751 
2752 
2753     /***
2754      * Get the userinfo.
2755      *
2756      * @return the userinfo
2757      * @throws URIException If {@link #decode} fails
2758      * @see #getAuthority
2759      */
2760     public String getUserinfo() throws URIException {
2761         return (_userinfo == null) ? null : decode(_userinfo,
2762                 getProtocolCharset());
2763     }
2764 
2765     // --------------------------------------------------------------- The host
2766 
2767     /***
2768      * Get the host.
2769      * <p><blockquote><pre>
2770      *   host          = hostname | IPv4address | IPv6reference
2771      * </pre></blockquote><p>
2772      *
2773      * @return the host
2774      * @see #getAuthority
2775      */
2776     public char[] getRawHost() {
2777         return _host;
2778     }
2779 
2780 
2781     /***
2782      * Get the host.
2783      * <p><blockquote><pre>
2784      *   host          = hostname | IPv4address | IPv6reference
2785      * </pre></blockquote><p>
2786      *
2787      * @return the host
2788      * @throws URIException If {@link #decode} fails
2789      * @see #getAuthority
2790      */
2791     public String getHost() throws URIException {
2792         if (_host != null) {
2793             return decode(_host, getProtocolCharset());
2794         } else {
2795             return null;
2796         }
2797     }
2798 
2799     // --------------------------------------------------------------- The port
2800 
2801     /***
2802      * Get the port.  In order to get the specfic default port, the specific
2803      * protocol-supported class extended from the URI class should be used.
2804      * It has the server-based naming authority.
2805      *
2806      * @return the port
2807      * if -1, it has the default port for the scheme or the server-based
2808      * naming authority is not supported in the specific URI.
2809      */
2810     public int getPort() {
2811         return _port;
2812     }
2813 
2814     // --------------------------------------------------------------- The path
2815 
2816     /***
2817      * Set the raw-escaped path.
2818      *
2819      * @param escapedPath the path character sequence
2820      * @throws URIException encoding error or not proper for initial instance
2821      * @see #encode
2822      */
2823     public void setRawPath(char[] escapedPath) throws URIException {
2824         if (escapedPath == null || escapedPath.length == 0) {
2825             _path = _opaque = escapedPath;
2826             setURI();
2827             return;
2828         }
2829         // remove the fragment identifier
2830         escapedPath = removeFragmentIdentifier(escapedPath);
2831         if (_is_net_path || _is_abs_path) {
2832             if (escapedPath[0] != '/') {
2833                 throw new URIException(URIException.PARSING,
2834                         "not absolute path");
2835             }
2836             if (!validate(escapedPath, abs_path)) {
2837                 throw new URIException(URIException.ESCAPING,
2838                         "escaped absolute path not valid");
2839             }
2840             _path = escapedPath;
2841         } else if (_is_rel_path) {
2842             int at = indexFirstOf(escapedPath, '/');
2843             if (at == 0) {
2844                 throw new URIException(URIException.PARSING, "incorrect path");
2845             }
2846             if (at > 0 && !validate(escapedPath, 0, at - 1, rel_segment) 
2847                 && !validate(escapedPath, at, -1, abs_path) 
2848                 || at < 0 && !validate(escapedPath, 0, -1, rel_segment)) {
2849             
2850                 throw new URIException(URIException.ESCAPING,
2851                         "escaped relative path not valid");
2852             }
2853             _path = escapedPath;
2854         } else if (_is_opaque_part) {
2855             if (!uric_no_slash.get(escapedPath[0]) 
2856                 && !validate(escapedPath, 1, -1, uric)) {
2857                 throw new URIException(URIException.ESCAPING,
2858                     "escaped opaque part not valid");
2859             }
2860             _opaque = escapedPath;
2861         } else {
2862             throw new URIException(URIException.PARSING, "incorrect path");
2863         }
2864         setURI();
2865     }
2866 
2867 
2868     /***
2869      * Set the escaped path.
2870      *
2871      * @param escapedPath the escaped path string
2872      * @throws URIException encoding error or not proper for initial instance
2873      * @see #encode
2874      */
2875     public void setEscapedPath(String escapedPath) throws URIException {
2876         if (escapedPath == null) {
2877             _path = _opaque = null;
2878             setURI();
2879             return;
2880         }
2881         setRawPath(escapedPath.toCharArray());
2882     }
2883 
2884 
2885     /***
2886      * Set the path.
2887      *
2888      * @param path the path string
2889      * @throws URIException set incorrectly or fragment only
2890      * @see #encode
2891      */
2892     public void setPath(String path) throws URIException {
2893 
2894         if (path == null || path.length() == 0) {
2895             _path = _opaque = (path == null) ? null : path.toCharArray();
2896             setURI();
2897             return;
2898         }
2899         // set the charset to do escape encoding
2900         String charset = getProtocolCharset();
2901 
2902         if (_is_net_path || _is_abs_path) {
2903             _path = encode(path, allowed_abs_path, charset);
2904         } else if (_is_rel_path) {
2905             StringBuffer buff = new StringBuffer(path.length());
2906             int at = path.indexOf('/');
2907             if (at == 0) { // never 0
2908                 throw new URIException(URIException.PARSING,
2909                         "incorrect relative path");
2910             }
2911             if (at > 0) {
2912                 buff.append(encode(path.substring(0, at), allowed_rel_path,
2913                             charset));
2914                 buff.append(encode(path.substring(at), allowed_abs_path,
2915                             charset));
2916             } else {
2917                 buff.append(encode(path, allowed_rel_path, charset));
2918             }
2919             _path = buff.toString().toCharArray();
2920         } else if (_is_opaque_part) {
2921             StringBuffer buf = new StringBuffer();
2922             buf.insert(0, encode(path.substring(0, 1), uric_no_slash, charset));
2923             buf.insert(1, encode(path.substring(1), uric, charset));
2924             _opaque = buf.toString().toCharArray();
2925         } else {
2926             throw new URIException(URIException.PARSING, "incorrect path");
2927         }
2928         setURI();
2929     }
2930 
2931 
2932     /***
2933      * Resolve the base and relative path.
2934      *
2935      * @param basePath a character array of the basePath
2936      * @param relPath a character array of the relPath
2937      * @return the resolved path
2938      * @throws URIException no more higher path level to be resolved
2939      */
2940     protected char[] resolvePath(char[] basePath, char[] relPath)
2941         throws URIException {
2942 
2943         // REMINDME: paths are never null
2944         String base = (basePath == null) ? "" : new String(basePath);
2945 
2946         // _path could be empty
2947         if (relPath == null || relPath.length == 0) {
2948             return normalize(basePath);
2949         } else if (relPath[0] == '/') {
2950             return normalize(relPath);
2951         } else {
2952             int at = base.lastIndexOf('/');
2953             if (at != -1) {
2954                 basePath = base.substring(0, at + 1).toCharArray();
2955             }
2956             StringBuffer buff = new StringBuffer(base.length() 
2957                 + relPath.length);
2958             buff.append((at != -1) ? base.substring(0, at + 1) : "/");
2959             buff.append(relPath);
2960             return normalize(buff.toString().toCharArray());
2961         }
2962     }
2963 
2964 
2965     /***
2966      * Get the raw-escaped current hierarchy level in the given path.
2967      * If the last namespace is a collection, the slash mark ('/') should be
2968      * ended with at the last character of the path string.
2969      *
2970      * @param path the path
2971      * @return the current hierarchy level
2972      * @throws URIException no hierarchy level
2973      */
2974     protected char[] getRawCurrentHierPath(char[] path) throws URIException {
2975 
2976         if (_is_opaque_part) {
2977             throw new URIException(URIException.PARSING, "no hierarchy level");
2978         }
2979         if (path == null) {
2980             throw new URIException(URIException.PARSING, "empty path");
2981         }
2982         String buff = new String(path);
2983         int first = buff.indexOf('/');
2984         int last = buff.lastIndexOf('/');
2985         if (last == 0) {
2986             return rootPath;
2987         } else if (first != last && last != -1) {
2988             return buff.substring(0, last).toCharArray();
2989         }
2990         // FIXME: it could be a document on the server side
2991         return path;
2992     }
2993 
2994 
2995     /***
2996      * Get the raw-escaped current hierarchy level.
2997      *
2998      * @return the raw-escaped current hierarchy level
2999      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3000      */
3001     public char[] getRawCurrentHierPath() throws URIException {
3002         return (_path == null) ? null : getRawCurrentHierPath(_path);
3003     }
3004  
3005 
3006     /***
3007      * Get the escaped current hierarchy level.
3008      *
3009      * @return the escaped current hierarchy level
3010      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3011      */
3012     public String getEscapedCurrentHierPath() throws URIException {
3013         char[] path = getRawCurrentHierPath();
3014         return (path == null) ? null : new String(path);
3015     }
3016  
3017 
3018     /***
3019      * Get the current hierarchy level.
3020      *
3021      * @return the current hierarchy level
3022      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3023      * @see #decode
3024      */
3025     public String getCurrentHierPath() throws URIException {
3026         char[] path = getRawCurrentHierPath();
3027         return (path == null) ? null : decode(path, getProtocolCharset());
3028     }
3029 
3030 
3031     /***
3032      * Get the level above the this hierarchy level.
3033      *
3034      * @return the raw above hierarchy level
3035      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3036      */
3037     public char[] getRawAboveHierPath() throws URIException {
3038         char[] path = getRawCurrentHierPath();
3039         return (path == null) ? null : getRawCurrentHierPath(path);
3040     }
3041 
3042 
3043     /***
3044      * Get the level above the this hierarchy level.
3045      *
3046      * @return the raw above hierarchy level
3047      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3048      */
3049     public String getEscapedAboveHierPath() throws URIException {
3050         char[] path = getRawAboveHierPath();
3051         return (path == null) ? null : new String(path);
3052     }
3053 
3054 
3055     /***
3056      * Get the level above the this hierarchy level.
3057      *
3058      * @return the above hierarchy level
3059      * @throws URIException If {@link #getRawCurrentHierPath(char[])} fails.
3060      * @see #decode
3061      */
3062     public String getAboveHierPath() throws URIException {
3063         char[] path = getRawAboveHierPath();
3064         return (path == null) ? null : decode(path, getProtocolCharset());
3065     }
3066 
3067 
3068     /***
3069      * Get the raw-escaped path.
3070      * <p><blockquote><pre>
3071      *   path          = [ abs_path | opaque_part ]
3072      * </pre></blockquote><p>
3073      *
3074      * @return the raw-escaped path
3075      */
3076     public char[] getRawPath() {
3077         return _is_opaque_part ? _opaque : _path;
3078     }
3079 
3080 
3081     /***
3082      * Get the escaped path.
3083      * <p><blockquote><pre>
3084      *   path          = [ abs_path | opaque_part ]
3085      *   abs_path      = "/"  path_segments 
3086      *   opaque_part   = uric_no_slash *uric
3087      * </pre></blockquote><p>
3088      *
3089      * @return the escaped path string
3090      */
3091     public String getEscapedPath() {
3092         char[] path = getRawPath();
3093         return (path == null) ? null : new String(path);
3094     }
3095 
3096 
3097     /***
3098      * Get the path.
3099      * <p><blockquote><pre>
3100      *   path          = [ abs_path | opaque_part ]
3101      * </pre></blockquote><p>
3102      * @return the path string
3103      * @throws URIException If {@link #decode} fails.
3104      * @see #decode
3105      */
3106     public String getPath() throws URIException { 
3107         char[] path =  getRawPath();
3108         return (path == null) ? null : decode(path, getProtocolCharset());
3109     }
3110 
3111 
3112     /***
3113      * Get the raw-escaped basename of the path.
3114      *
3115      * @return the raw-escaped basename
3116      */
3117     public char[] getRawName() {
3118         if (_path == null) { 
3119             return null;
3120         }
3121 
3122         int at = 0;
3123         for (int i = _path.length - 1; i >= 0; i--) {
3124             if (_path[i] == '/') {
3125                 at = i + 1;
3126                 break;
3127             }
3128         }
3129         int len = _path.length - at;
3130         char[] basename =  new char[len];
3131         System.arraycopy(_path, at, basename, 0, len);
3132         return basename;
3133     }
3134 
3135 
3136     /***
3137      * Get the escaped basename of the path.
3138      *
3139      * @return the escaped basename string
3140      */
3141     public String getEscapedName() {
3142         char[] basename = getRawName();
3143         return (basename == null) ? null : new String(basename);
3144     }
3145 
3146 
3147     /***
3148      * Get the basename of the path.
3149      *
3150      * @return the basename string
3151      * @throws URIException incomplete trailing escape pattern or unsupported
3152      * character encoding
3153      * @see #decode
3154      */
3155     public String getName() throws URIException {
3156         char[] basename = getRawName();
3157         return (basename == null) ? null : decode(getRawName(),
3158                 getProtocolCharset());
3159     }
3160 
3161     // ----------------------------------------------------- The path and query 
3162 
3163     /***
3164      * Get the raw-escaped path and query.
3165      *
3166      * @return the raw-escaped path and query
3167      */
3168     public char[] getRawPathQuery() {
3169 
3170         if (_path == null && _query == null) {
3171             return null;
3172         }
3173         StringBuffer buff = new StringBuffer();
3174         if (_path != null) {
3175             buff.append(_path);
3176         }
3177         if (_query != null) {
3178             buff.append('?');
3179             buff.append(_query);
3180         }
3181         return buff.toString().toCharArray();
3182     }
3183 
3184 
3185     /***
3186      * Get the escaped query.
3187      *
3188      * @return the escaped path and query string
3189      */
3190     public String getEscapedPathQuery() {
3191         char[] rawPathQuery = getRawPathQuery();
3192         return (rawPathQuery == null) ? null : new String(rawPathQuery);
3193     }
3194 
3195 
3196     /***
3197      * Get the path and query.
3198      *
3199      * @return the path and query string.
3200      * @throws URIException incomplete trailing escape pattern or unsupported
3201      * character encoding
3202      * @see #decode
3203      */
3204     public String getPathQuery() throws URIException {
3205         char[] rawPathQuery = getRawPathQuery();
3206         return (rawPathQuery == null) ? null : decode(rawPathQuery,
3207                 getProtocolCharset());
3208     }
3209 
3210     // -------------------------------------------------------------- The query 
3211 
3212     /***
3213      * Set the raw-escaped query.
3214      *
3215      * @param escapedQuery the raw-escaped query
3216      * @throws URIException escaped query not valid
3217      */
3218     public void setRawQuery(char[] escapedQuery) throws URIException {
3219         if (escapedQuery == null || escapedQuery.length == 0) {
3220             _query = escapedQuery;
3221             setURI();
3222             return;
3223         }
3224         // remove the fragment identifier
3225         escapedQuery = removeFragmentIdentifier(escapedQuery);
3226         if (!validate(escapedQuery, query)) {
3227             throw new URIException(URIException.ESCAPING,
3228                     "escaped query not valid");
3229         }
3230         _query = escapedQuery;
3231         setURI();
3232     }
3233 
3234 
3235     /***
3236      * Set the escaped query string.
3237      *
3238      * @param escapedQuery the escaped query string
3239      * @throws URIException escaped query not valid
3240      */
3241     public void setEscapedQuery(String escapedQuery) throws URIException {
3242         if (escapedQuery == null) {
3243             _query = null;
3244             setURI();
3245             return;
3246         }
3247         setRawQuery(escapedQuery.toCharArray());
3248     }
3249 
3250 
3251     /***
3252      * Set the query.
3253      * <p>
3254      * When a query string is not misunderstood the reserved special characters
3255      * ("&amp;", "=", "+", ",", and "$") within a query component, it is
3256      * recommended to use in encoding the whole query with this method.
3257      * <p>
3258      * The additional APIs for the special purpose using by the reserved
3259      * special characters used in each protocol are implemented in each protocol
3260      * classes inherited from <code>URI</code>.  So refer to the same-named APIs
3261      * implemented in each specific protocol instance.
3262      *
3263      * @param query the query string.
3264      * @throws URIException incomplete trailing escape pattern or unsupported
3265      * character encoding
3266      * @see #encode
3267      */
3268     public void setQuery(String query) throws URIException {
3269         if (query == null || query.length() == 0) {
3270             _query = (query == null) ? null : query.toCharArray();
3271             setURI();
3272             return;
3273         }
3274         setRawQuery(encode(query, allowed_query, getProtocolCharset()));
3275     }
3276 
3277 
3278     /***
3279      * Get the raw-escaped query.
3280      *
3281      * @return the raw-escaped query
3282      */
3283     public char[] getRawQuery() {
3284         return _query;
3285     }
3286 
3287 
3288     /***
3289      * Get the escaped query.
3290      *
3291      * @return the escaped query string
3292      */
3293     public String getEscapedQuery() {
3294         return (_query == null) ? null : new String(_query);
3295     }
3296 
3297 
3298     /***
3299      * Get the query.
3300      *
3301      * @return the query string.
3302      * @throws URIException incomplete trailing escape pattern or unsupported
3303      * character encoding
3304      * @see #decode
3305      */
3306     public String getQuery() throws URIException {
3307         return (_query == null) ? null : decode(_query, getProtocolCharset());
3308     }
3309 
3310     // ----------------------------------------------------------- The fragment 
3311 
3312     /***
3313      * Set the raw-escaped fragment.
3314      *
3315      * @param escapedFragment the raw-escaped fragment
3316      * @throws URIException escaped fragment not valid
3317      */
3318     public void setRawFragment(char[] escapedFragment) throws URIException {
3319         if (escapedFragment == null || escapedFragment.length == 0) {
3320             _fragment = escapedFragment;
3321             hash = 0;
3322             return;
3323         }
3324         if (!validate(escapedFragment, fragment)) {
3325             throw new URIException(URIException.ESCAPING,
3326                     "escaped fragment not valid");
3327         }
3328         _fragment = escapedFragment;
3329         hash = 0;
3330     }
3331 
3332 
3333     /***
3334      * Set the escaped fragment string.
3335      *
3336      * @param escapedFragment the escaped fragment string
3337      * @throws URIException escaped fragment not valid
3338      */
3339     public void setEscapedFragment(String escapedFragment) throws URIException {
3340         if (escapedFragment == null) {
3341             _fragment = null;
3342             hash = 0;
3343             return;
3344         }
3345         setRawFragment(escapedFragment.toCharArray());
3346     }
3347 
3348 
3349     /***
3350      * Set the fragment.
3351      *
3352      * @param fragment the fragment string.
3353      * @throws URIException If an error occurs.
3354      */
3355     public void setFragment(String fragment) throws URIException {
3356         if (fragment == null || fragment.length() == 0) {
3357             _fragment = (fragment == null) ? null : fragment.toCharArray();
3358             hash = 0;
3359             return;
3360         }
3361         _fragment = encode(fragment, allowed_fragment, getProtocolCharset());
3362         hash = 0;
3363     }
3364 
3365 
3366     /***
3367      * Get the raw-escaped fragment.
3368      * <p>
3369      * The optional fragment identifier is not part of a URI, but is often used
3370      * in conjunction with a URI.
3371      * <p>
3372      * The format and interpretation of fragment identifiers is dependent on
3373      * the media type [RFC2046] of the retrieval result.
3374      * <p>
3375      * A fragment identifier is only meaningful when a URI reference is
3376      * intended for retrieval and the result of that retrieval is a document
3377      * for which the identified fragment is consistently defined.
3378      *
3379      * @return the raw-escaped fragment
3380      */
3381     public char[] getRawFragment() {
3382         return _fragment;
3383     }
3384 
3385 
3386     /***
3387      * Get the escaped fragment.
3388      *
3389      * @return the escaped fragment string
3390      */
3391     public String getEscapedFragment() {
3392         return (_fragment == null) ? null : new String(_fragment);
3393     }
3394 
3395 
3396     /***
3397      * Get the fragment.
3398      *
3399      * @return the fragment string
3400      * @throws URIException incomplete trailing escape pattern or unsupported
3401      * character encoding
3402      * @see #decode
3403      */
3404     public String getFragment() throws URIException {
3405         return (_fragment == null) ? null : decode(_fragment,
3406                 getProtocolCharset());
3407     }
3408 
3409     // ------------------------------------------------------------- Utilities 
3410 
3411     /***
3412      * Remove the fragment identifier of the given component.
3413      *
3414      * @param component the component that a fragment may be included
3415      * @return the component that the fragment identifier is removed
3416      */
3417     protected char[] removeFragmentIdentifier(char[] component) {
3418         if (component == null) { 
3419             return null;
3420         }
3421         int lastIndex = new String(component).indexOf('#');
3422         if (lastIndex != -1) {
3423             component = new String(component).substring(0,
3424                     lastIndex).toCharArray();
3425         }
3426         return component;
3427     }
3428 
3429 
3430     /***
3431      * Normalize the given hier path part.
3432      * 
3433      * <p>Algorithm taken from URI reference parser at 
3434      * http://www.apache.org/~fielding/uri/rev-2002/issues.html.
3435      *
3436      * @param path the path to normalize
3437      * @return the normalized path
3438      * @throws URIException no more higher path level to be normalized
3439      */
3440     protected char[] normalize(char[] path) throws URIException {
3441 
3442         if (path == null) { 
3443             return null;
3444         }
3445 
3446         String normalized = new String(path);
3447 
3448         // If the buffer begins with "./" or "../", the "." or ".." is removed.
3449         if (normalized.startsWith("./")) {
3450             normalized = normalized.substring(1);
3451         } else if (normalized.startsWith("../")) {
3452             normalized = normalized.substring(2);
3453         } else if (normalized.startsWith("..")) {
3454             normalized = normalized.substring(2);
3455         }
3456 
3457         // All occurrences of "/./" in the buffer are replaced with "/"
3458         int index = -1;
3459         while ((index = normalized.indexOf("/./")) != -1) {
3460             normalized = normalized.substring(0, index) + normalized.substring(index + 2);
3461         }
3462 
3463         // If the buffer ends with "/.", the "." is removed.
3464         if (normalized.endsWith("/.")) {
3465             normalized = normalized.substring(0, normalized.length() - 1);
3466         }
3467 
3468         int startIndex = 0;
3469 
3470         // All occurrences of "/<segment>/../" in the buffer, where ".."
3471         // and <segment> are complete path segments, are iteratively replaced
3472         // with "/" in order from left to right until no matching pattern remains.
3473         // If the buffer ends with "/<segment>/..", that is also replaced
3474         // with "/".  Note that <segment> may be empty.
3475         while ((index = normalized.indexOf("/../", startIndex)) != -1) {
3476             int slashIndex = normalized.lastIndexOf('/', index - 1);
3477             if (slashIndex >= 0) {
3478                 normalized = normalized.substring(0, slashIndex) + normalized.substring(index + 3);
3479             } else {
3480                 startIndex = index + 3;   
3481             }
3482         }
3483         if (normalized.endsWith("/..")) {
3484             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3485             if (slashIndex >= 0) {
3486                 normalized = normalized.substring(0, slashIndex + 1);
3487             }
3488         }
3489 
3490         // All prefixes of "<segment>/../" in the buffer, where ".."
3491         // and <segment> are complete path segments, are iteratively replaced
3492         // with "/" in order from left to right until no matching pattern remains.
3493         // If the buffer ends with "<segment>/..", that is also replaced
3494         // with "/".  Note that <segment> may be empty.
3495         while ((index = normalized.indexOf("/../")) != -1) {
3496             int slashIndex = normalized.lastIndexOf('/', index - 1);
3497             if (slashIndex >= 0) {
3498                 break;
3499             } else {
3500                 normalized = normalized.substring(index + 3);
3501             }
3502         }
3503         if (normalized.endsWith("/..")) {
3504             int slashIndex = normalized.lastIndexOf('/', normalized.length() - 4);
3505             if (slashIndex < 0) {
3506                 normalized = "/";
3507             }
3508         }
3509 
3510         return normalized.toCharArray();
3511     }
3512 
3513 
3514     /***
3515      * Normalizes the path part of this URI.  Normalization is only meant to be performed on 
3516      * URIs with an absolute path.  Calling this method on a relative path URI will have no
3517      * effect.
3518      *
3519      * @throws URIException no more higher path level to be normalized
3520      * 
3521      * @see #isAbsPath()
3522      */
3523     public void normalize() throws URIException {
3524         if (isAbsPath()) {
3525             _path = normalize(_path);
3526             setURI();
3527         }
3528     }
3529 
3530 
3531     /***
3532      * Test if the first array is equal to the second array.
3533      *
3534      * @param first the first character array
3535      * @param second the second character array
3536      * @return true if they're equal
3537      */
3538     protected boolean equals(char[] first, char[] second) {
3539 
3540         if (first == null && second == null) {
3541             return true;
3542         }
3543         if (first == null || second == null) {
3544             return false;
3545         }
3546         if (first.length != second.length) {
3547             return false;
3548         }
3549         for (int i = 0; i < first.length; i++) {
3550             if (first[i] != second[i]) {
3551                 return false;
3552             }
3553         }
3554         return true;
3555     }
3556 
3557 
3558     /***
3559      * Test an object if this URI is equal to another.
3560      *
3561      * @param obj an object to compare
3562      * @return true if two URI objects are equal
3563      */
3564     public boolean equals(Object obj) {
3565 
3566         // normalize and test each components
3567         if (obj == this) {
3568             return true;
3569         }
3570         if (!(obj instanceof URI)) {
3571             return false;
3572         }
3573         URI another = (URI) obj;
3574         // scheme
3575         if (!equals(_scheme, another._scheme)) {
3576             return false;
3577         }
3578         // is_opaque_part or is_hier_part?  and opaque
3579         if (!equals(_opaque, another._opaque)) {
3580             return false;
3581         }
3582         // is_hier_part
3583         // has_authority
3584         if (!equals(_authority, another._authority)) {
3585             return false;
3586         }
3587         // path
3588         if (!equals(_path, another._path)) {
3589             return false;
3590         }
3591         // has_query
3592         if (!equals(_query, another._query)) {
3593             return false;
3594         }
3595         // has_fragment?  should be careful of the only fragment case.
3596         if (!equals(_fragment, another._fragment)) {
3597             return false;
3598         }
3599         return true;
3600     }
3601 
3602     // ---------------------------------------------------------- Serialization
3603 
3604     /***
3605      * Write the content of this URI.
3606      *
3607      * @param oos the object-output stream
3608      * @throws IOException If an IO problem occurs.
3609      */
3610     private void writeObject(ObjectOutputStream oos)
3611         throws IOException {
3612 
3613         oos.defaultWriteObject();
3614     }
3615 
3616 
3617     /***
3618      * Read a URI.
3619      *
3620      * @param ois the object-input stream
3621      * @throws ClassNotFoundException If one of the classes specified in the
3622      * input stream cannot be found.
3623      * @throws IOException If an IO problem occurs.
3624      */
3625     private void readObject(ObjectInputStream ois)
3626         throws ClassNotFoundException, IOException {
3627 
3628         ois.defaultReadObject();
3629     }
3630 
3631     // -------------------------------------------------------------- Hash code
3632 
3633     /***
3634      * Return a hash code for this URI.
3635      *
3636      * @return a has code value for this URI
3637      */
3638     public int hashCode() {
3639         if (hash == 0) {
3640             char[] c = _uri;
3641             if (c != null) {
3642                 for (int i = 0, len = c.length; i < len; i++) {
3643                     hash = 31 * hash + c[i];
3644                 }
3645             }
3646             c = _fragment;
3647             if (c != null) {
3648                 for (int i = 0, len = c.length; i < len; i++) {
3649                     hash = 31 * hash + c[i];
3650                 }
3651             }
3652         }
3653         return hash;
3654     }
3655 
3656     // ------------------------------------------------------------- Comparison 
3657 
3658     /***
3659      * Compare this URI to another object. 
3660      *
3661      * @param obj the object to be compared.
3662      * @return 0, if it's same,
3663      * -1, if failed, first being compared with in the authority component
3664      * @throws ClassCastException not URI argument
3665      */
3666     public int compareTo(Object obj) throws ClassCastException {
3667 
3668         URI another = (URI) obj;
3669         if (!equals(_authority, another.getRawAuthority())) { 
3670             return -1;
3671         }
3672         return toString().compareTo(another.toString());
3673     }
3674 
3675     // ------------------------------------------------------------------ Clone
3676 
3677     /***
3678      * Create and return a copy of this object, the URI-reference containing
3679      * the userinfo component.  Notice that the whole URI-reference including
3680      * the userinfo component counld not be gotten as a <code>String</code>.
3681      * <p>
3682      * To copy the identical <code>URI</code> object including the userinfo
3683      * component, it should be used.
3684      *
3685      * @return a clone of this instance
3686      */
3687     public synchronized Object clone() throws CloneNotSupportedException {
3688 
3689         URI instance = (URI) super.clone();
3690 
3691         instance._uri = _uri;
3692         instance._scheme = _scheme;
3693         instance._opaque = _opaque;
3694         instance._authority = _authority;
3695         instance._userinfo = _userinfo;
3696         instance._host = _host;
3697         instance._port = _port;
3698         instance._path = _path;
3699         instance._query = _query;
3700         instance._fragment = _fragment;
3701         // the charset to do escape encoding for this instance
3702         instance.protocolCharset = protocolCharset;
3703         // flags
3704         instance._is_hier_part = _is_hier_part;
3705         instance._is_opaque_part = _is_opaque_part;
3706         instance._is_net_path = _is_net_path;
3707         instance._is_abs_path = _is_abs_path;
3708         instance._is_rel_path = _is_rel_path;
3709         instance._is_reg_name = _is_reg_name;
3710         instance._is_server = _is_server;
3711         instance._is_hostname = _is_hostname;
3712         instance._is_IPv4address = _is_IPv4address;
3713         instance._is_IPv6reference = _is_IPv6reference;
3714 
3715         return instance;
3716     }
3717 
3718     // ------------------------------------------------------------ Get the URI
3719 
3720     /***
3721      * It can be gotten the URI character sequence. It's raw-escaped.
3722      * For the purpose of the protocol to be transported, it will be useful.
3723      * <p>
3724      * It is clearly unwise to use a URL that contains a password which is
3725      * intended to be secret. In particular, the use of a password within
3726      * the 'userinfo' component of a URL is strongly disrecommended except
3727      * in those rare cases where the 'password' parameter is intended to be
3728      * public.
3729      * <p>
3730      * When you want to get each part of the userinfo, you need to use the
3731      * specific methods in the specific URL. It depends on the specific URL.
3732      *
3733      * @return the URI character sequence
3734      */
3735     public char[] getRawURI() {
3736         return _uri;
3737     }
3738 
3739 
3740     /***
3741      * It can be gotten the URI character sequence. It's escaped.
3742      * For the purpose of the protocol to be transported, it will be useful.
3743      *
3744      * @return the escaped URI string
3745      */
3746     public String getEscapedURI() {
3747         return (_uri == null) ? null : new String(_uri);
3748     }
3749     
3750 
3751     /***
3752      * It can be gotten the URI character sequence.
3753      *
3754      * @return the original URI string
3755      * @throws URIException incomplete trailing escape pattern or unsupported
3756      * character encoding
3757      * @see #decode
3758      */
3759     public String getURI() throws URIException {
3760         return (_uri == null) ? null : decode(_uri, getProtocolCharset());
3761     }
3762 
3763 
3764     /***
3765      * Get the URI reference character sequence.
3766      *
3767      * @return the URI reference character sequence
3768      */
3769     public char[] getRawURIReference() {
3770         if (_fragment == null) { 
3771             return _uri;
3772         }
3773         if (_uri == null) { 
3774             return _fragment;
3775         }
3776         // if _uri != null &&  _fragment != null
3777         String uriReference = new String(_uri) + "#" + new String(_fragment);
3778         return uriReference.toCharArray();
3779     }
3780 
3781 
3782     /***
3783      * Get the escaped URI reference string.
3784      *
3785      * @return the escaped URI reference string
3786      */
3787     public String getEscapedURIReference() {
3788         char[] uriReference = getRawURIReference();
3789         return (uriReference == null) ? null : new String(uriReference);
3790     }
3791 
3792 
3793     /***
3794      * Get the original URI reference string.
3795      *
3796      * @return the original URI reference string
3797      * @throws URIException If {@link #decode} fails.
3798      */
3799     public String getURIReference() throws URIException {
3800         char[] uriReference = getRawURIReference();
3801         return (uriReference == null) ? null : decode(uriReference,
3802                 getProtocolCharset());
3803     }
3804 
3805 
3806     /***
3807      * Get the escaped URI string.
3808      * <p>
3809      * On the document, the URI-reference form is only used without the userinfo
3810      * component like http://jakarta.apache.org/ by the security reason.
3811      * But the URI-reference form with the userinfo component could be parsed.
3812      * <p>
3813      * In other words, this URI and any its subclasses must not expose the
3814      * URI-reference expression with the userinfo component like
3815      * http://user:password@hostport/restricted_zone.<br>
3816      * It means that the API client programmer should extract each user and
3817      * password to access manually.  Probably it will be supported in the each
3818      * subclass, however, not a whole URI-reference expression.
3819      *
3820      * @return the escaped URI string
3821      * @see #clone()
3822      */
3823     public String toString() {
3824         return getEscapedURI();
3825     }
3826 
3827 
3828     // ------------------------------------------------------------ Inner class
3829 
3830     /*** 
3831      * The charset-changed normal operation to represent to be required to
3832      * alert to user the fact the default charset is changed.
3833      */
3834     public static class DefaultCharsetChanged extends RuntimeException {
3835 
3836         // ------------------------------------------------------- constructors
3837 
3838         /***
3839          * The constructor with a reason string and its code arguments.
3840          *
3841          * @param reasonCode the reason code
3842          * @param reason the reason
3843          */
3844         public DefaultCharsetChanged(int reasonCode, String reason) {
3845             super(reason);
3846             this.reason = reason;
3847             this.reasonCode = reasonCode;
3848         }
3849 
3850         // ---------------------------------------------------------- constants
3851 
3852         /*** No specified reason code. */
3853         public static final int UNKNOWN = 0;
3854 
3855         /*** Protocol charset changed. */
3856         public static final int PROTOCOL_CHARSET = 1;
3857 
3858         /*** Document charset changed. */
3859         public static final int DOCUMENT_CHARSET = 2;
3860 
3861         // ------------------------------------------------- instance variables
3862 
3863         /*** The reason code. */
3864         private int reasonCode;
3865 
3866         /*** The reason message. */
3867         private String reason;
3868 
3869         // ------------------------------------------------------------ methods
3870 
3871         /***
3872          * Get the reason code.
3873          *
3874          * @return the reason code
3875          */
3876         public int getReasonCode() {
3877             return reasonCode;
3878         }
3879 
3880         /***
3881          * Get the reason message.
3882          *
3883          * @return the reason message
3884          */
3885         public String getReason() {
3886             return reason;
3887         }
3888 
3889     }
3890 
3891 
3892     /*** 
3893      * A mapping to determine the (somewhat arbitrarily) preferred charset for a
3894      * given locale.  Supports all locales recognized in JDK 1.1.
3895      * <p>
3896      * The distribution of this class is Servlets.com.    It was originally
3897      * written by Jason Hunter [jhunter at acm.org] and used by with permission.
3898      */
3899     public static class LocaleToCharsetMap {
3900 
3901         /*** A mapping of language code to charset */
3902         private static final Hashtable LOCALE_TO_CHARSET_MAP;
3903         static {
3904             LOCALE_TO_CHARSET_MAP = new Hashtable();
3905             LOCALE_TO_CHARSET_MAP.put("ar", "ISO-8859-6");
3906             LOCALE_TO_CHARSET_MAP.put("be", "ISO-8859-5");
3907             LOCALE_TO_CHARSET_MAP.put("bg", "ISO-8859-5");
3908             LOCALE_TO_CHARSET_MAP.put("ca", "ISO-8859-1");
3909             LOCALE_TO_CHARSET_MAP.put("cs", "ISO-8859-2");
3910             LOCALE_TO_CHARSET_MAP.put("da", "ISO-8859-1");
3911             LOCALE_TO_CHARSET_MAP.put("de", "ISO-8859-1");
3912             LOCALE_TO_CHARSET_MAP.put("el", "ISO-8859-7");
3913             LOCALE_TO_CHARSET_MAP.put("en", "ISO-8859-1");
3914             LOCALE_TO_CHARSET_MAP.put("es", "ISO-8859-1");
3915             LOCALE_TO_CHARSET_MAP.put("et", "ISO-8859-1");
3916             LOCALE_TO_CHARSET_MAP.put("fi", "ISO-8859-1");
3917             LOCALE_TO_CHARSET_MAP.put("fr", "ISO-8859-1");
3918             LOCALE_TO_CHARSET_MAP.put("hr", "ISO-8859-2");
3919             LOCALE_TO_CHARSET_MAP.put("hu", "ISO-8859-2");
3920             LOCALE_TO_CHARSET_MAP.put("is", "ISO-8859-1");
3921             LOCALE_TO_CHARSET_MAP.put("it", "ISO-8859-1");
3922             LOCALE_TO_CHARSET_MAP.put("iw", "ISO-8859-8");
3923             LOCALE_TO_CHARSET_MAP.put("ja", "Shift_JIS");
3924             LOCALE_TO_CHARSET_MAP.put("ko", "EUC-KR");
3925             LOCALE_TO_CHARSET_MAP.put("lt", "ISO-8859-2");
3926             LOCALE_TO_CHARSET_MAP.put("lv", "ISO-8859-2");
3927             LOCALE_TO_CHARSET_MAP.put("mk", "ISO-8859-5");
3928             LOCALE_TO_CHARSET_MAP.put("nl", "ISO-8859-1");
3929             LOCALE_TO_CHARSET_MAP.put("no", "ISO-8859-1");
3930             LOCALE_TO_CHARSET_MAP.put("pl", "ISO-8859-2");
3931             LOCALE_TO_CHARSET_MAP.put("pt", "ISO-8859-1");
3932             LOCALE_TO_CHARSET_MAP.put("ro", "ISO-8859-2");
3933             LOCALE_TO_CHARSET_MAP.put("ru", "ISO-8859-5");
3934             LOCALE_TO_CHARSET_MAP.put("sh", "ISO-8859-5");
3935             LOCALE_TO_CHARSET_MAP.put("sk", "ISO-8859-2");
3936             LOCALE_TO_CHARSET_MAP.put("sl", "ISO-8859-2");
3937             LOCALE_TO_CHARSET_MAP.put("sq", "ISO-8859-2");
3938             LOCALE_TO_CHARSET_MAP.put("sr", "ISO-8859-5");
3939             LOCALE_TO_CHARSET_MAP.put("sv", "ISO-8859-1");
3940             LOCALE_TO_CHARSET_MAP.put("tr", "ISO-8859-9");
3941             LOCALE_TO_CHARSET_MAP.put("uk", "ISO-8859-5");
3942             LOCALE_TO_CHARSET_MAP.put("zh", "GB2312");
3943             LOCALE_TO_CHARSET_MAP.put("zh_TW", "Big5");
3944         }
3945        
3946         /***
3947          * Get the preferred charset for the given locale.
3948          *
3949          * @param locale the locale
3950          * @return the preferred charset or null if the locale is not
3951          * recognized.
3952          */
3953         public static String getCharset(Locale locale) {
3954             // try for an full name match (may include country)
3955             String charset =
3956                 (String) LOCALE_TO_CHARSET_MAP.get(locale.toString());
3957             if (charset != null) { 
3958                 return charset;
3959             }
3960            
3961             // if a full name didn't match, try just the language
3962             charset = (String) LOCALE_TO_CHARSET_MAP.get(locale.getLanguage());
3963             return charset;  // may be null
3964         }
3965 
3966     }
3967 
3968 }
3969