Coverage Report - org.apache.myfaces.shared.renderkit.html.util.HTMLEncoder
 
Classes in this File Line Coverage Branch Coverage Complexity
HTMLEncoder
79%
272/341
63%
291/461
14.15
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one
 3  
  * or more contributor license agreements.  See the NOTICE file
 4  
  * distributed with this work for additional information
 5  
  * regarding copyright ownership.  The ASF licenses this file
 6  
  * to you under the Apache License, Version 2.0 (the
 7  
  * "License"); you may not use this file except in compliance
 8  
  * with the License.  You may obtain a copy of the License at
 9  
  *
 10  
  *   http://www.apache.org/licenses/LICENSE-2.0
 11  
  *
 12  
  * Unless required by applicable law or agreed to in writing,
 13  
  * software distributed under the License is distributed on an
 14  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15  
  * KIND, either express or implied.  See the License for the
 16  
  * specific language governing permissions and limitations
 17  
  * under the License.
 18  
  */
 19  
 package org.apache.myfaces.shared.renderkit.html.util;
 20  
 
 21  
 import java.io.ByteArrayOutputStream;
 22  
 import java.io.IOException;
 23  
 import java.io.OutputStreamWriter;
 24  
 import java.io.Writer;
 25  
 
 26  
 /**
 27  
  * Converts Strings so that they can be used within HTML-Code.
 28  
  */
 29  0
 public abstract class HTMLEncoder
 30  
 {
 31  
     /**
 32  
      * Variant of {@link #encode} where encodeNewline is false and encodeNbsp is true.
 33  
      */
 34  
     public static String encode (String string)
 35  
     {
 36  5
         return encode(string, false, true);
 37  
     }
 38  
 
 39  
     /**
 40  
      * Variant of {@link #encode} where encodeNbsp is true.
 41  
      */
 42  
     public static String encode (String string, boolean encodeNewline)
 43  
     {
 44  2
         return encode(string, encodeNewline, true);
 45  
     }
 46  
 
 47  
     /**
 48  
      * Variant of {@link #encode} where encodeNbsp and encodeNonLatin are true 
 49  
      */
 50  
     public static String encode (String string, boolean encodeNewline, boolean encodeSubsequentBlanksToNbsp)
 51  
     {
 52  7
         return encode(string, encodeNewline, encodeSubsequentBlanksToNbsp, true);
 53  
     }
 54  
 
 55  
     /**
 56  
      * Encodes the given string, so that it can be used within a html page.
 57  
      * @param string the string to convert
 58  
      * @param encodeNewline if true newline characters are converted to <br>'s
 59  
      * @param encodeSubsequentBlanksToNbsp if true subsequent blanks are converted to  's
 60  
      * @param encodeNonLatin if true encode non-latin characters as numeric character references
 61  
      */
 62  
     public static String encode (String string,
 63  
                                  boolean encodeNewline,
 64  
                                  boolean encodeSubsequentBlanksToNbsp,
 65  
                                  boolean encodeNonLatin)
 66  
     {
 67  7
         if (string == null)
 68  
         {
 69  1
             return "";
 70  
         }
 71  
 
 72  6
         StringBuilder sb = null;    //create later on demand
 73  
         String app;
 74  6
         char c = ' ';
 75  
         char prevC;
 76  6
         int length = string.length();
 77  172
         for (int i = 0; i < length; ++i)
 78  
         {
 79  166
             app = null;
 80  166
             prevC = c;
 81  166
             c = string.charAt(i);
 82  
             
 83  
             // All characters before letters
 84  166
             if ((int)c < 0x41)
 85  
             {
 86  36
                 switch (c)
 87  
                 {
 88  2
                     case '"': app = "&quot;"; break;    //"
 89  1
                     case '&': app = "&amp;"; break;     //&
 90  1
                     case '<': app = "&lt;"; break;      //<
 91  4
                     case '>': app = "&gt;"; break;      //>
 92  
                     case ' ':
 93  19
                         if (encodeSubsequentBlanksToNbsp &&
 94  
                                 prevC == ' ')
 95  
                         {
 96  
                             //Space at beginning or after another space
 97  0
                             app = "&#160;";
 98  
                         }
 99  
                         break;
 100  
                     case '\n':
 101  2
                         if (encodeNewline)
 102  
                         {
 103  1
                             app = "<br/>";
 104  
                         }
 105  
                         break;
 106  
                     default:
 107  
                         break;
 108  
                 }
 109  
                 // http://www.w3.org/MarkUp/html3/specialchars.html
 110  
                 // From C0 extension U+0000-U+001F only U+0009, U+000A and
 111  
                 // U+000D are valid control characters
 112  36
                 if (c <= 0x1F && c != 0x09 && c != 0x0A && c != 0x0D)
 113  
                 {
 114  
                     // Ignore escape character
 115  0
                     app = "";
 116  
                 }
 117  
             }
 118  130
             else if (encodeNonLatin && (int)c > 0x80)
 119  
             {
 120  2
                  switch(c)
 121  
                  {
 122  
                     //german umlauts
 123  0
                     case '\u00E4' : app = "&auml;";  break;
 124  0
                     case '\u00C4' : app = "&Auml;";  break;
 125  2
                     case '\u00F6' : app = "&ouml;";  break;
 126  0
                     case '\u00D6' : app = "&Ouml;";  break;
 127  0
                     case '\u00FC' : app = "&uuml;";  break;
 128  0
                     case '\u00DC' : app = "&Uuml;";  break;
 129  0
                     case '\u00DF' : app = "&szlig;"; break;
 130  
 
 131  
                     //misc
 132  
                     //case 0x80: app = "&euro;"; break;  sometimes euro symbol is ascii 128, should we suport it?
 133  0
                     case '\u20AC': app = "&euro;";  break;
 134  0
                     case '\u00AB': app = "&laquo;"; break;
 135  0
                     case '\u00BB': app = "&raquo;"; break;
 136  0
                     case '\u00A0': app = "&#160;"; break;
 137  
 
 138  
                     default :
 139  
                         //encode all non basic latin characters
 140  0
                         app = "&#" + ((int)c) + ";";
 141  
                     break;
 142  
                 }
 143  
             }
 144  166
             if (app != null)
 145  
             {
 146  11
                 if (sb == null)
 147  
                 {
 148  4
                     sb = new StringBuilder(string.substring(0, i));
 149  
                 }
 150  11
                 sb.append(app);
 151  
             }
 152  
             else
 153  
             {
 154  155
                 if (sb != null)
 155  
                 {
 156  109
                     sb.append(c);
 157  
                 }
 158  
             }
 159  
         }
 160  
 
 161  6
         if (sb == null)
 162  
         {
 163  2
             return string;
 164  
         }
 165  
         else
 166  
         {
 167  4
             return sb.toString();
 168  
         }
 169  
     }
 170  
     
 171  
     /**
 172  
      * Variant of {@link #encode} where encodeNewline is false and encodeNbsp is true.
 173  
      */
 174  
     public static void encode (Writer writer, String string) throws IOException
 175  
     {
 176  5
         encode(writer, string, false, true);
 177  5
     }
 178  
 
 179  
     /**
 180  
      * Variant of {@link #encode} where encodeNbsp is true.
 181  
      */
 182  
     public static void encode (Writer writer, String string, boolean encodeNewline) throws IOException
 183  
     {
 184  2
         encode(writer, string, encodeNewline, true);
 185  2
     }
 186  
 
 187  
     /**
 188  
      * Variant of {@link #encode} where encodeNbsp and encodeNonLatin are true 
 189  
      */
 190  
     public static void encode (Writer writer, String string, 
 191  
             boolean encodeNewline, boolean encodeSubsequentBlanksToNbsp) throws IOException
 192  
     {
 193  7
         encode(writer, string, encodeNewline, encodeSubsequentBlanksToNbsp, true);
 194  7
     }
 195  
     
 196  
     public static void encode (Writer writer, String string,
 197  
                                  boolean encodeNewline,
 198  
                                  boolean encodeSubsequentBlanksToNbsp,
 199  
                                  boolean encodeNonLatin) throws IOException
 200  
     {
 201  11
         if (string == null)
 202  
         {
 203  1
             return;
 204  
         }
 205  
 
 206  10
         int start = 0;
 207  
         String app;
 208  10
         char c = ' ';
 209  
         char prevC;
 210  10
         int length = string.length();
 211  196
         for (int i = 0; i < length; ++i)
 212  
         {
 213  186
             app = null;
 214  186
             prevC = c;
 215  186
             c = string.charAt(i);
 216  
             
 217  
             // All characters before letters
 218  186
             if ((int)c < 0x41)
 219  
             {
 220  36
                 switch (c)
 221  
                 {
 222  2
                     case '"': app = "&quot;"; break;    //"
 223  1
                     case '&': app = "&amp;"; break;     //&
 224  1
                     case '<': app = "&lt;"; break;      //<
 225  4
                     case '>': app = "&gt;"; break;      //>
 226  
                     case ' ':
 227  19
                         if (encodeSubsequentBlanksToNbsp &&
 228  
                                 prevC == ' ')
 229  
                         {
 230  
                             //Space at beginning or after another space
 231  0
                             app = "&#160;";
 232  
                         }
 233  
                         break;
 234  
                     case '\n':
 235  2
                         if (encodeNewline)
 236  
                         {
 237  1
                             app = "<br/>";
 238  
                         }
 239  
                         break;
 240  
                     default:
 241  
                         break;
 242  
                 }
 243  
                 // http://www.w3.org/MarkUp/html3/specialchars.html
 244  
                 // From C0 extension U+0000-U+001F only U+0009, U+000A and
 245  
                 // U+000D are valid control characters
 246  36
                 if (c <= 0x1F && c != 0x09 && c != 0x0A && c != 0x0D)
 247  
                 {
 248  
                     // Ignore escape character
 249  0
                     app = "";
 250  
                 }
 251  
             }
 252  150
             else if (encodeNonLatin && (int)c > 0x80)
 253  
             {
 254  2
                  switch(c)
 255  
                  {
 256  
                     //german umlauts
 257  0
                     case '\u00E4' : app = "&auml;";  break;
 258  0
                     case '\u00C4' : app = "&Auml;";  break;
 259  2
                     case '\u00F6' : app = "&ouml;";  break;
 260  0
                     case '\u00D6' : app = "&Ouml;";  break;
 261  0
                     case '\u00FC' : app = "&uuml;";  break;
 262  0
                     case '\u00DC' : app = "&Uuml;";  break;
 263  0
                     case '\u00DF' : app = "&szlig;"; break;
 264  
 
 265  
                     //misc
 266  
                     //case 0x80: app = "&euro;"; break;  sometimes euro symbol is ascii 128, should we suport it?
 267  0
                     case '\u20AC': app = "&euro;";  break;
 268  0
                     case '\u00AB': app = "&laquo;"; break;
 269  0
                     case '\u00BB': app = "&raquo;"; break;
 270  0
                     case '\u00A0': app = "&#160;"; break;
 271  
 
 272  
                     default :
 273  
                         //encode all non basic latin characters
 274  0
                         app = "&#" + ((int)c) + ";";
 275  
                     break;
 276  
                 }
 277  
             }
 278  186
             if (app != null)
 279  
             {
 280  
                 //if (sb == null)
 281  
                 //{
 282  
                 //    sb = new StringBuilder(string.substring(0, i));
 283  
                 //}
 284  
                 //sb.append(app);
 285  11
                 if (start < i)
 286  
                 {
 287  8
                     writer.write(string, start, i-start);
 288  
                 }
 289  11
                 start = i+1;
 290  11
                 writer.write(app);
 291  
             }
 292  
             //else
 293  
             //{
 294  
             //    if (sb != null)
 295  
             //    {
 296  
             //        sb.append(c);
 297  
             //    }
 298  
             //}
 299  
         }
 300  
 
 301  
         //if (sb == null)
 302  
         //{
 303  
         //    return string;
 304  
         //}
 305  
         //else
 306  
         //{
 307  
         //    return sb.toString();
 308  
         //}
 309  10
         if (start == 0)
 310  
         {
 311  6
             writer.write(string);
 312  
         }
 313  4
         else if (start < length)
 314  
         {
 315  0
             writer.write(string,start,length-start);
 316  
         }
 317  10
     }
 318  
 
 319  
 
 320  
     /**
 321  
      * Variant of {@link #encode} where encodeNewline is false and encodeNbsp is true.
 322  
      */
 323  
     public static void encode (char[] string, int offset, int length, Writer writer) throws IOException
 324  
     {
 325  20
         encode(string, offset, length, false, true, writer);
 326  20
     }
 327  
 
 328  
     /**
 329  
      * Variant of {@link #encode} where encodeNbsp is true.
 330  
      */
 331  
     public static void encode (char[] string, int offset, int length, boolean encodeNewline, Writer writer)
 332  
         throws IOException
 333  
     {
 334  6
         encode(string, offset, length, encodeNewline, true, writer);
 335  6
     }
 336  
 
 337  
     /**
 338  
      * Variant of {@link #encode} where encodeNbsp and encodeNonLatin are true 
 339  
      */
 340  
     public static void encode (char[] string, int offset, int length, boolean encodeNewline, 
 341  
             boolean encodeSubsequentBlanksToNbsp, Writer writer) throws IOException
 342  
     {
 343  26
         encode(string, offset, length, encodeNewline, encodeSubsequentBlanksToNbsp, true, writer);
 344  26
     }
 345  
 
 346  
 
 347  
     /**
 348  
      * Encodes the given string, so that it can be used within a html page.
 349  
      * @param string the string to convert
 350  
      * @param encodeNewline if true newline characters are converted to &lt;br&gt;'s
 351  
      * @param encodeSubsequentBlanksToNbsp if true subsequent blanks are converted to &amp;nbsp;'s
 352  
      * @param encodeNonLatin if true encode non-latin characters as numeric character references
 353  
      */
 354  
     public static void encode (char[] string, int offset, int length,
 355  
                                  boolean encodeNewline,
 356  
                                  boolean encodeSubsequentBlanksToNbsp,
 357  
                                  boolean encodeNonLatin, Writer writer) throws IOException
 358  
     {
 359  26
         if (string == null || length < 0 || offset >= string.length)
 360  
         {
 361  8
             return;
 362  
         }
 363  18
         offset = Math.max(0, offset);
 364  18
         int realLength = Math.min(length, string.length - offset);
 365  
 
 366  
         //StringBuilder sb = null;    //create later on demand
 367  
         String app;
 368  18
         char c = ' ';
 369  
         char prevC;
 370  18
         int start = offset;
 371  
         
 372  590
         for (int i = offset; i < offset + realLength; ++i)
 373  
         {
 374  572
             app = null;
 375  572
             prevC = c;
 376  572
             c = string[i];
 377  
 
 378  
             // All characters before letters
 379  572
             if ((int)c < 0x41)
 380  
             {
 381  120
                 switch (c)
 382  
                 {
 383  4
                     case '"': app = "&quot;"; break;    //"
 384  4
                     case '&': app = "&amp;"; break;     //&
 385  2
                     case '<': app = "&lt;"; break;      //<
 386  12
                     case '>': app = "&gt;"; break;      //>
 387  
                     case ' ':
 388  68
                         if (encodeSubsequentBlanksToNbsp &&
 389  
                                 prevC == ' ')
 390  
                         {
 391  
                             //Space at beginning or after another space
 392  0
                             app = "&#160;";
 393  
                         }
 394  
                         break;
 395  
                     case '\n':
 396  8
                         if (encodeNewline)
 397  
                         {
 398  2
                             app = "<br/>";
 399  
                         }
 400  
                         break;
 401  
                     default:
 402  
                         break;
 403  
                 }
 404  
                 // http://www.w3.org/MarkUp/html3/specialchars.html
 405  
                 // From C0 extension U+0000-U+001F only U+0009, U+000A and
 406  
                 // U+000D are valid control characters
 407  120
                 if (c <= 0x1F && c != 0x09 && c != 0x0A && c != 0x0D)
 408  
                 {
 409  
                     // Ignore escape character
 410  0
                     app = "";
 411  
                 }
 412  
             }
 413  452
             else if (encodeNonLatin && (int)c > 0x80)
 414  
             {
 415  8
                  switch(c)
 416  
                  {
 417  
                     //german umlauts
 418  0
                     case '\u00E4' : app = "&auml;";  break;
 419  0
                     case '\u00C4' : app = "&Auml;";  break;
 420  8
                     case '\u00F6' : app = "&ouml;";  break;
 421  0
                     case '\u00D6' : app = "&Ouml;";  break;
 422  0
                     case '\u00FC' : app = "&uuml;";  break;
 423  0
                     case '\u00DC' : app = "&Uuml;";  break;
 424  0
                     case '\u00DF' : app = "&szlig;"; break;
 425  
 
 426  
                     //misc
 427  
                     //case 0x80: app = "&euro;"; break;  sometimes euro symbol is ascii 128, should we suport it?
 428  0
                     case '\u20AC': app = "&euro;";  break;
 429  0
                     case '\u00AB': app = "&laquo;"; break;
 430  0
                     case '\u00BB': app = "&raquo;"; break;
 431  0
                     case '\u00A0': app = "&#160;"; break;
 432  
 
 433  
                     default :
 434  
                         //encode all non basic latin characters
 435  0
                         app = "&#" + ((int)c) + ";";
 436  
                     break;
 437  
                 }
 438  
             }
 439  572
             if (app != null)
 440  
             {
 441  
                 //if (sb == null)
 442  
                 //{
 443  
                 //    sb = new StringBuilder(realLength*2);
 444  
                 //    sb.append(string, offset, i - offset);
 445  
                 //}
 446  
                 //sb.append(app);
 447  32
                 if (start < i)
 448  
                 {
 449  24
                     writer.write(string, start, i-start);
 450  
                 }
 451  32
                 start = i+1;
 452  32
                 writer.write(app);
 453  
             }
 454  
             /*
 455  
             else
 456  
             {
 457  
                 if (sb != null)
 458  
                 {
 459  
                     sb.append(c);
 460  
                 }
 461  
             }*/
 462  
         }
 463  
 
 464  
         //if (sb == null)
 465  
         //{
 466  
         //    writer.write(string, offset, realLength);
 467  
         //}
 468  
         //else
 469  
         //{
 470  
         //    writer.write(sb.toString());
 471  
         //}
 472  18
         if (start == offset)
 473  
         {
 474  4
             writer.write(string, offset, realLength);
 475  
         }
 476  14
         else if (start < offset+realLength)
 477  
         {
 478  2
             writer.write(string,start,offset+realLength-start);
 479  
         }
 480  18
     }
 481  
     
 482  
     private static final String HEX_CHARSET = "0123456789ABCDEF";
 483  
     
 484  
     private static final String UTF8 = "UTF-8";
 485  
     
 486  
     /**
 487  
      * Encode an URI, escaping or percent-encoding all required characters and
 488  
      * following the rules mentioned on RFC 3986.  
 489  
      * 
 490  
      * @param string
 491  
      * @param encodeNonLatin
 492  
      * @return
 493  
      * @throws IOException
 494  
      */
 495  
     public static String encodeURIAttribute(final String string, final String characterEncoding)
 496  
         throws IOException
 497  
     {
 498  13
         StringBuilder sb = null;    //create later on demand
 499  
         String app;
 500  
         char c;
 501  13
         boolean endLoop = false;
 502  13
         int length = string.length();
 503  174
         for (int i = 0; i < length; ++i)
 504  
         {
 505  168
             app = null;
 506  168
             c = string.charAt(i);
 507  
             
 508  
             // This are the guidelines to be taken into account by this algorithm to encode:
 509  
             
 510  
             // RFC 2396 Section 2.4.3 Excluded US-ASCII Characters
 511  
             //
 512  
             // control     = <US-ASCII coded characters 00-1F and 7F hexadecimal>
 513  
             // space       = <US-ASCII coded character 20 hexadecimal>
 514  
             // delims      = "<" | ">" | "#" | "%" | <">
 515  
             //               %3C   %3E   %23   %25   %22
 516  
             // unwise      = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
 517  
             //               %7D   %7B   %7C   %5C   %5E   %5B   %5D   %60
 518  
             //
 519  
             // ".... Data corresponding to excluded characters must be escaped in order to
 520  
             // be properly represented within a URI....."
 521  
             
 522  
             // RFC 3986 Section 3.  Syntax Components
 523  
             //
 524  
             // "... The generic URI syntax consists of a hierarchical sequence of
 525  
             // components referred to as the scheme, authority, path, query, and
 526  
             // fragment.
 527  
             //
 528  
             //   URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 529  
             //
 530  
             //   hier-part   = "//" authority path-abempty
 531  
             //               / path-absolute
 532  
             //               / path-rootless
 533  
             //               / path-empty
 534  
             // ...."
 535  
             
 536  
             // RFC 3986 Section 2.2:
 537  
             // Reserved characters (should not be percent-encoded)
 538  
             // reserved    = gen-delims / sub-delims
 539  
             // gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 540  
             //               %3A   %2F   %3F   %23   %5B   %5D   %40
 541  
             // sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
 542  
             //               %21   %24   %26   %27   %28   %29   %2A   %2B   %2C   %3B   %3D
 543  
             
 544  
             // Note than chars "[" and "]" are mentioned as they should be escaped on RFC 2396,
 545  
             // but on the part D. Changes from RFC 2396 says about this chars (used on IPv6) 
 546  
             // "...those rules were redefined to directly specify the characters allowed...."
 547  
             // There is also other characters moved from excluded list to reserved:
 548  
             // "[" / "]" / "#"  
 549  
             
 550  
             // RFC 3986 Section 2.3:
 551  
             // "... for consistency, percent-encoded octets in the ranges of ALPHA
 552  
             // (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
 553  
             // underscore (%5F), or tilde (%7E) should not be created by URI
 554  
             // producers...."
 555  
             
 556  
             // RFC 3986 Section  3.2.2.  Host
 557  
 
 558  
             // host = IP-literal / IPv4address / reg-name
 559  
 
 560  
             // The reg-name syntax allows percent-encoded octets in order to
 561  
             // represent non-ASCII registered names in a uniform way that is
 562  
             // independent of the underlying name resolution technology.  Non-ASCII
 563  
             // characters must first be encoded according to UTF-8 [STD63], and then
 564  
             // each octet of the corresponding UTF-8 sequence must be percent-
 565  
             // encoded to be represented as URI characters.  URI producing
 566  
             // applications must not use percent-encoding in host unless it is used
 567  
             // to represent a UTF-8 character sequence.
 568  
             
 569  
             // RFC 3986 Section 3.4 Query 
 570  
             //         query       = *( pchar / "/" / "?" )
 571  
             //
 572  
             // "...  However, as query components are often used to carry identifying information 
 573  
             // in the form of "key=value" pairs and one frequently used value is a reference to
 574  
             // another URI, it is sometimes better for usability to avoid percent-encoding those characters....."
 575  
             //
 576  
             // RFC 3986 Section 2.5 Identifying Data (Apply to query section)
 577  
             //
 578  
             // When a new URI scheme defines a component that represents textual
 579  
             // data consisting of characters from the Universal Character Set [UCS],
 580  
             // the data should first be encoded as octets according to the UTF-8
 581  
             // character encoding [STD63]; then only those octets that do not
 582  
             // correspond to characters in the unreserved set should be percent-
 583  
             // encoded.  For example, the character A would be represented as "A",
 584  
             // the character LATIN CAPITAL LETTER A WITH GRAVE would be represented
 585  
             // as "%C3%80", and the character KATAKANA LETTER A would be represented
 586  
             // as "%E3%82%A2".
 587  
             //
 588  
             // RFC 3986 Section 3.5 Fragment
 589  
             //         fragment    = *( pchar / "/" / "?" )
 590  
             //
 591  
             // Note that follows the same as query
 592  
             
 593  
             // Based on the extracts the strategy to apply on this method is:
 594  
             // 
 595  
             // On scheme ":" hier-part
 596  
             //
 597  
             // Escape or percent encode chars inside :
 598  
             // 
 599  
             // - From %00 to %20, 
 600  
             // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of 
 601  
             //                     duplicate encoding, encode it when we are sure 
 602  
             //                     that there are not encoded twice)
 603  
             // - "<" %3C, ">" %3E
 604  
             // - "\" %5C, "^" %5E, "`" %60 
 605  
             // - "{" %7B, "|" %7C, "}" %7D
 606  
             // - From %7F ad infinitum (characters from %100 to infinitum should not be used in this
 607  
             //   part of an URI, but it is preferred to encode it that omit it).
 608  
             //
 609  
             // The remaining characters must not be encoded
 610  
             //
 611  
             // Characters after ? or # should be percent encoding but only the necessary ones:
 612  
             //
 613  
             // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
 614  
             // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of 
 615  
             //                     duplicate encoding, encode it when we are sure 
 616  
             //                     that there are not encoded twice)
 617  
             // - "<" %3C, ">" %3E,
 618  
             // - "\" %5C, "^" %5E, "`" %60 
 619  
             // - "{" %7B, "|" %7C, "}" %7D
 620  
             // - From %7F ad infinitum (each character as many bytes as necessary but take into account
 621  
             //   that a single char should contain 2,3 or more bytes!. This data should be encoded 
 622  
             //   translating from the document character encoding to percent encoding, because this values
 623  
             //   could be retrieved from httpRequest.getParameter() and it uses the current character encoding
 624  
             //   for decode values)
 625  
             //
 626  
             // "&" should be encoded as "&amp;" because this link is inside an html page, and 
 627  
             // put only & is invalid in this context.
 628  
 
 629  168
             if (   (c <= (char)0x20) || (c >= (char)0x7F) || 
 630  
                     c == '"' || c == '<' ||
 631  
                     c == '>' || c == '\\' || c == '^' || c == '`' ||
 632  
                     c == '{' || c == '|' || c == '}')
 633  
             {
 634  
                 // The percent encoding on this part should be done using UTF-8 charset
 635  
                 // as RFC 3986 Section 3.2.2 says.
 636  
                 // Also there is a reference on 
 637  
                 // http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
 638  
                 // that recommend use of UTF-8 instead the document character encoding.
 639  
                 // Jetty set by default UTF-8 (see http://jira.codehaus.org/browse/JETTY-113)
 640  42
                 app = percentEncode(c, "UTF-8");
 641  
             }
 642  126
             else if (c == '%')
 643  
             {
 644  2
                 if (i + 2 < length)
 645  
                 {
 646  2
                     char c1 = string.charAt(i+1);
 647  2
                     char c2 = string.charAt(i+2);
 648  2
                     if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z') || (c1 >='a' && c1 <='z')) &&
 649  
                         (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z') || (c2 >='a' && c2 <='z')))
 650  
                     {
 651  
                         // do not percent encode, because it could be already encoded
 652  
                         // and we don't want encode it twice
 653  
                     }
 654  
                     else
 655  
                     {
 656  2
                         app = percentEncode(c, UTF8);
 657  
                     }
 658  2
                 }
 659  
                 else
 660  
                 {
 661  0
                     app = percentEncode(c, UTF8);
 662  
                 }
 663  
             }
 664  124
             else if (c == '?' || c == '#')
 665  
             {
 666  7
                 if (i+1 < length)
 667  
                 {
 668  
                     // The remaining part of the URI are data that should be encoded
 669  
                     // using the document character encoding.
 670  7
                     app = c + encodeURIQuery(string.substring(i+1), characterEncoding);
 671  7
                     endLoop = true;
 672  
                 }
 673  
             }
 674  
             else
 675  
             {
 676  
                 //No encoding, just do nothing, char will be added later.
 677  
             }
 678  
                         
 679  168
             if (app != null)
 680  
             {
 681  51
                 if (sb == null)
 682  
                 {
 683  11
                     sb = new StringBuilder(string.substring(0, i));
 684  
                 }
 685  51
                 sb.append(app);
 686  
             }
 687  
             else
 688  
             {
 689  117
                 if (sb != null)
 690  
                 {
 691  0
                     sb.append(c);
 692  
                 }
 693  
             }
 694  168
             if (endLoop)
 695  
             {
 696  7
                 break;
 697  
             }
 698  
         }
 699  13
         if (sb == null)
 700  
         {
 701  2
             return string;
 702  
         }
 703  
         else
 704  
         {
 705  11
             return sb.toString();
 706  
         }
 707  
     }
 708  
     
 709  
     /**
 710  
      * Encode a unicode char value in percentEncode, decoding its bytes using a specified 
 711  
      * characterEncoding.
 712  
      * 
 713  
      * @param c
 714  
      * @param characterEncoding
 715  
      * @return
 716  
      */
 717  
     private static String percentEncode(char c, String characterEncoding)
 718  
     {
 719  87
         String app = null;
 720  87
         if (c > (char)((short)0x007F))
 721  
         {
 722  
             //percent encode in the proper encoding to be consistent
 723  39
             app = percentEncodeNonUsAsciiCharacter(c, characterEncoding);
 724  
         }
 725  
         else
 726  
         {
 727  
             //percent encode US-ASCII char (0x00-0x7F range)
 728  48
             app = "%" + HEX_CHARSET.charAt( ((c >> 0x4) % 0x10)) +HEX_CHARSET.charAt(c % 0x10);
 729  
         }
 730  87
         return app;
 731  
     }
 732  
     
 733  
     private static String percentEncodeNonUsAsciiCharacter(char c, String characterEncoding)
 734  
     {
 735  39
         ByteArrayOutputStream baos = new ByteArrayOutputStream(10);
 736  39
         StringBuilder builder = new StringBuilder();
 737  
         try
 738  
         {
 739  39
             OutputStreamWriter writer = new OutputStreamWriter(baos,characterEncoding);
 740  39
             writer.write(c);
 741  39
             writer.flush();
 742  
         }
 743  0
         catch(IOException e)
 744  
         {
 745  0
             baos.reset();
 746  0
             return null;
 747  39
         }
 748  
         
 749  39
         byte [] byteArray =  baos.toByteArray();
 750  117
         for (int i=0; i < byteArray.length; i++)
 751  
         {
 752  78
             builder.append('%');
 753  78
             builder.append(HEX_CHARSET.charAt( (( ((short) byteArray[i] & 0xFF ) >> 0x4) % 0x10)) );
 754  78
             builder.append(HEX_CHARSET.charAt( ((short) byteArray[i] & 0xFF ) % 0x10));
 755  
         }
 756  
         
 757  39
         return builder.toString();
 758  
     }
 759  
 
 760  
     /**
 761  
      * Encode the query part using the document charset encoding provided.
 762  
      * 
 763  
      * 
 764  
      * @param string
 765  
      * @param characterEncoding
 766  
      * @return
 767  
      */
 768  
     private static String encodeURIQuery(final String string, final String characterEncoding)
 769  
     {
 770  7
         StringBuilder sb = null;    //create later on demand
 771  
         String app;
 772  
         char c;
 773  7
         boolean endLoop = false;
 774  7
         int length = string.length();
 775  251
         for (int i = 0; i < length; ++i)
 776  
         {
 777  244
             app = null;
 778  244
             c = string.charAt(i);
 779  
             
 780  
             // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
 781  
             // - <"> %22 (If there is encode of "%", there is a risk of duplicate encoding, so 
 782  
             //            we make easier and omit this one)
 783  
             // - "<" %3C, ">" %3E,
 784  
             // - "\" %5C, "^" %5E, "`" %60 
 785  
             // - "{" %7B, "|" %7C, "}" %7D
 786  
             // - From %7F ad infinitum (each character as many bytes as necessary but take into account
 787  
             //   that a single char should contain 2,3 or more bytes!. This data should be encoded 
 788  
             //   translating from the document character encoding to percent encoding)
 789  
             //
 790  
             // "&" should be encoded as "&amp;" because this link is inside an html page, and 
 791  
             // put & is invalid in this context   
 792  
             
 793  244
             if (   (c <= (char)0x20) || (c >= (char)0x7F) || 
 794  
                     c == '"' || c == '<' ||
 795  
                     c == '>' || c == '\\' || c == '^' || c == '`' ||
 796  
                     c == '{' || c == '|' || c == '}')
 797  
             {
 798  
                 // The percent encoding on this part should be done using UTF-8 charset
 799  
                 // as RFC 3986 Section 3.2.2 says
 800  41
                 app = percentEncode(c, characterEncoding);
 801  
             }
 802  203
             else if (c == '%')
 803  
             {
 804  2
                 if (i + 2 < length)
 805  
                 {
 806  2
                     char c1 = string.charAt(i+1);
 807  2
                     char c2 = string.charAt(i+2);
 808  2
                     if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z') || (c1 >='a' && c1 <='z')) &&
 809  
                         (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z') || (c2 >='a' && c2 <='z')))
 810  
                     {
 811  
                         // do not percent encode, because it could be already encoded
 812  
                     }
 813  
                     else
 814  
                     {
 815  2
                         app = percentEncode(c, characterEncoding);
 816  
                     }
 817  2
                 }
 818  
                 else
 819  
                 {
 820  0
                     app = percentEncode(c, characterEncoding);
 821  
                 }
 822  
             }
 823  201
             else if (c == '&')
 824  
             {
 825  1
                 if (i+4 < length )
 826  
                 {
 827  1
                     if ('a' == string.charAt(i+1) &&
 828  
                         'm' == string.charAt(i+2) &&
 829  
                         'p' == string.charAt(i+3) &&
 830  
                         ';' == string.charAt(i+4))
 831  
                     {
 832  
                         //Skip
 833  
                     }
 834  
                     else
 835  
                     {
 836  1
                         app = "&amp;";
 837  
                     }
 838  
                 }
 839  
                 else
 840  
                 {
 841  0
                     app = "&amp;";
 842  
                 }
 843  
             }
 844  
             else
 845  
             {
 846  
                 //No encoding, just do nothing, char will be added later.
 847  
             }
 848  
                         
 849  244
             if (app != null)
 850  
             {
 851  44
                 if (sb == null)
 852  
                 {
 853  4
                     sb = new StringBuilder(string.substring(0, i));
 854  
                 }
 855  44
                 sb.append(app);
 856  
             }
 857  
             else
 858  
             {
 859  200
                 if (sb != null)
 860  
                 {
 861  12
                     sb.append(c);
 862  
                 }
 863  
             }
 864  244
             if (endLoop)
 865  
             {
 866  0
                 break;
 867  
             }
 868  
         }
 869  7
         if (sb == null)
 870  
         {
 871  3
             return string;
 872  
         }
 873  
         else
 874  
         {
 875  4
             return sb.toString();
 876  
         }
 877  
     }
 878  
 
 879  
     /**
 880  
      * Encode an URI, escaping or percent-encoding all required characters and
 881  
      * following the rules mentioned on RFC 3986.  
 882  
      * 
 883  
      * @param string
 884  
      * @param encodeNonLatin
 885  
      * @return
 886  
      * @throws IOException
 887  
      */
 888  
     public static void encodeURIAttribute(Writer writer, final String string, final String characterEncoding)
 889  
         throws IOException
 890  
     {
 891  
         //StringBuilder sb = null;    //create later on demand
 892  11
         int start = 0;
 893  
         String app;
 894  
         char c;
 895  11
         boolean endLoop = false;
 896  11
         int length = string.length();
 897  160
         for (int i = 0; i < length; ++i)
 898  
         {
 899  155
             app = null;
 900  155
             c = string.charAt(i);
 901  
             
 902  
             // This are the guidelines to be taken into account by this algorithm to encode:
 903  
             
 904  
             // RFC 2396 Section 2.4.3 Excluded US-ASCII Characters
 905  
             //
 906  
             // control     = <US-ASCII coded characters 00-1F and 7F hexadecimal>
 907  
             // space       = <US-ASCII coded character 20 hexadecimal>
 908  
             // delims      = "<" | ">" | "#" | "%" | <">
 909  
             //               %3C   %3E   %23   %25   %22
 910  
             // unwise      = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
 911  
             //               %7D   %7B   %7C   %5C   %5E   %5B   %5D   %60
 912  
             //
 913  
             // ".... Data corresponding to excluded characters must be escaped in order to
 914  
             // be properly represented within a URI....."
 915  
             
 916  
             // RFC 3986 Section 3.  Syntax Components
 917  
             //
 918  
             // "... The generic URI syntax consists of a hierarchical sequence of
 919  
             // components referred to as the scheme, authority, path, query, and
 920  
             // fragment.
 921  
             //
 922  
             //   URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 923  
             //
 924  
             //   hier-part   = "//" authority path-abempty
 925  
             //               / path-absolute
 926  
             //               / path-rootless
 927  
             //               / path-empty
 928  
             // ...."
 929  
             
 930  
             // RFC 3986 Section 2.2:
 931  
             // Reserved characters (should not be percent-encoded)
 932  
             // reserved    = gen-delims / sub-delims
 933  
             // gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 934  
             //               %3A   %2F   %3F   %23   %5B   %5D   %40
 935  
             // sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
 936  
             //               %21   %24   %26   %27   %28   %29   %2A   %2B   %2C   %3B   %3D
 937  
             
 938  
             // Note than chars "[" and "]" are mentioned as they should be escaped on RFC 2396,
 939  
             // but on the part D. Changes from RFC 2396 says about this chars (used on IPv6) 
 940  
             // "...those rules were redefined to directly specify the characters allowed...."
 941  
             // There is also other characters moved from excluded list to reserved:
 942  
             // "[" / "]" / "#"  
 943  
             
 944  
             // RFC 3986 Section 2.3:
 945  
             // "... for consistency, percent-encoded octets in the ranges of ALPHA
 946  
             // (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
 947  
             // underscore (%5F), or tilde (%7E) should not be created by URI
 948  
             // producers...."
 949  
             
 950  
             // RFC 3986 Section  3.2.2.  Host
 951  
 
 952  
             // host = IP-literal / IPv4address / reg-name
 953  
 
 954  
             // The reg-name syntax allows percent-encoded octets in order to
 955  
             // represent non-ASCII registered names in a uniform way that is
 956  
             // independent of the underlying name resolution technology.  Non-ASCII
 957  
             // characters must first be encoded according to UTF-8 [STD63], and then
 958  
             // each octet of the corresponding UTF-8 sequence must be percent-
 959  
             // encoded to be represented as URI characters.  URI producing
 960  
             // applications must not use percent-encoding in host unless it is used
 961  
             // to represent a UTF-8 character sequence.
 962  
             
 963  
             // RFC 3986 Section 3.4 Query 
 964  
             //         query       = *( pchar / "/" / "?" )
 965  
             //
 966  
             // "...  However, as query components are often used to carry identifying information 
 967  
             // in the form of "key=value" pairs and one frequently used value is a reference to
 968  
             // another URI, it is sometimes better for usability to avoid percent-encoding those characters....."
 969  
             //
 970  
             // RFC 3986 Section 2.5 Identifying Data (Apply to query section)
 971  
             //
 972  
             // When a new URI scheme defines a component that represents textual
 973  
             // data consisting of characters from the Universal Character Set [UCS],
 974  
             // the data should first be encoded as octets according to the UTF-8
 975  
             // character encoding [STD63]; then only those octets that do not
 976  
             // correspond to characters in the unreserved set should be percent-
 977  
             // encoded.  For example, the character A would be represented as "A",
 978  
             // the character LATIN CAPITAL LETTER A WITH GRAVE would be represented
 979  
             // as "%C3%80", and the character KATAKANA LETTER A would be represented
 980  
             // as "%E3%82%A2".
 981  
             //
 982  
             // RFC 3986 Section 3.5 Fragment
 983  
             //         fragment    = *( pchar / "/" / "?" )
 984  
             //
 985  
             // Note that follows the same as query
 986  
             
 987  
             // Based on the extracts the strategy to apply on this method is:
 988  
             // 
 989  
             // On scheme ":" hier-part
 990  
             //
 991  
             // Escape or percent encode chars inside :
 992  
             // 
 993  
             // - From %00 to %20, 
 994  
             // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of 
 995  
             //                     duplicate encoding, encode it when we are sure 
 996  
             //                     that there are not encoded twice)
 997  
             // - "<" %3C, ">" %3E
 998  
             // - "\" %5C, "^" %5E, "`" %60 
 999  
             // - "{" %7B, "|" %7C, "}" %7D
 1000  
             // - From %7F ad infinitum (characters from %100 to infinitum should not be used in this
 1001  
             //   part of an URI, but it is preferred to encode it that omit it).
 1002  
             //
 1003  
             // The remaining characters must not be encoded
 1004  
             //
 1005  
             // Characters after ? or # should be percent encoding but only the necessary ones:
 1006  
             //
 1007  
             // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
 1008  
             // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of 
 1009  
             //                     duplicate encoding, encode it when we are sure 
 1010  
             //                     that there are not encoded twice)
 1011  
             // - "<" %3C, ">" %3E,
 1012  
             // - "\" %5C, "^" %5E, "`" %60 
 1013  
             // - "{" %7B, "|" %7C, "}" %7D
 1014  
             // - From %7F ad infinitum (each character as many bytes as necessary but take into account
 1015  
             //   that a single char should contain 2,3 or more bytes!. This data should be encoded 
 1016  
             //   translating from the document character encoding to percent encoding, because this values
 1017  
             //   could be retrieved from httpRequest.getParameter() and it uses the current character encoding
 1018  
             //   for decode values)
 1019  
             //
 1020  
             // "&" should be encoded as "&amp;" because this link is inside an html page, and 
 1021  
             // put only & is invalid in this context.
 1022  
 
 1023  155
             if (   (c <= (char)0x20) || (c >= (char)0x7F) || 
 1024  
                     c == '"' || c == '<' ||
 1025  
                     c == '>' || c == '\\' || c == '^' || c == '`' ||
 1026  
                     c == '{' || c == '|' || c == '}')
 1027  
             {
 1028  
                 // The percent encoding on this part should be done using UTF-8 charset
 1029  
                 // as RFC 3986 Section 3.2.2 says.
 1030  
                 // Also there is a reference on 
 1031  
                 // http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
 1032  
                 // that recommend use of UTF-8 instead the document character encoding.
 1033  
                 // Jetty set by default UTF-8 (see http://jira.codehaus.org/browse/JETTY-113)
 1034  
                 //app = percentEncode(c, "UTF-8");
 1035  31
                 if (start < i)
 1036  
                 {
 1037  0
                     writer.write(string, start, i-start);
 1038  
                 }
 1039  31
                 start = i+1;
 1040  31
                 percentEncode(writer, c, "UTF-8");
 1041  
             }
 1042  124
             else if (c == '%')
 1043  
             {
 1044  1
                 if (i + 2 < length)
 1045  
                 {
 1046  1
                     char c1 = string.charAt(i+1);
 1047  1
                     char c2 = string.charAt(i+2);
 1048  1
                     if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z') || (c1 >='a' && c1 <='z')) &&
 1049  
                         (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z') || (c2 >='a' && c2 <='z')))
 1050  
                     {
 1051  
                         // do not percent encode, because it could be already encoded
 1052  
                         // and we don't want encode it twice
 1053  
                     }
 1054  
                     else
 1055  
                     {
 1056  
                         //app = percentEncode(c, UTF8);
 1057  1
                         if (start < i)
 1058  
                         {
 1059  0
                             writer.write(string, start, i-start);
 1060  
                         }
 1061  1
                         start = i+1;
 1062  1
                         percentEncode(writer, c, UTF8);
 1063  
                     }
 1064  1
                 }
 1065  
                 else
 1066  
                 {
 1067  
                     //app = percentEncode(c, UTF8);
 1068  0
                     if (start < i)
 1069  
                     {
 1070  0
                         writer.write(string, start, i-start);
 1071  
                     }
 1072  0
                     start = i+1;
 1073  0
                     percentEncode(writer, c, UTF8);
 1074  
                 }
 1075  
             }
 1076  123
             else if (c == '?' || c == '#')
 1077  
             {
 1078  6
                 if (i+1 < length)
 1079  
                 {
 1080  
                     // The remaining part of the URI are data that should be encoded
 1081  
                     // using the document character encoding.
 1082  
                     //app = c + encodeURIQuery(string.substring(i+1), characterEncoding);
 1083  6
                     if (start < i)
 1084  
                     {
 1085  1
                         writer.write(string, start, i-start);
 1086  
                     }
 1087  6
                     start = i+1;
 1088  6
                     writer.write(c);
 1089  
                     //encodeURIQuery(writer, string.substring(i+1), characterEncoding);
 1090  6
                     encodeURIQuery(writer, string, i+1, characterEncoding);
 1091  6
                     endLoop = true;
 1092  
                 }
 1093  
             }
 1094  
             else
 1095  
             {
 1096  
                 //No encoding, just do nothing, char will be added later.
 1097  
             }
 1098  
                         
 1099  155
             if (app != null)
 1100  
             {
 1101  
                 //if (sb == null)
 1102  
                 //{
 1103  
                 //    sb = new StringBuilder(string.substring(0, i));
 1104  
                 //}
 1105  
                 //sb.append(app);
 1106  0
                 if (start < i)
 1107  
                 {
 1108  0
                     writer.write(string, start, i-start);
 1109  
                 }
 1110  0
                 start = i+1;
 1111  0
                 writer.write(app);
 1112  
             }
 1113  
             //else
 1114  
             //{
 1115  
             //    if (sb != null)
 1116  
             //    {
 1117  
             //        sb.append(c);
 1118  
             //    }
 1119  
             //}
 1120  155
             if (endLoop)
 1121  
             {
 1122  6
                 start = length;
 1123  6
                 break;
 1124  
             }
 1125  
         }
 1126  
         //if (sb == null)
 1127  
         //{
 1128  
         //    return string;
 1129  
         //}
 1130  
         //else
 1131  
         //{
 1132  
         //    return sb.toString();
 1133  
         //}
 1134  11
         if (start == 0)
 1135  
         {
 1136  2
             writer.write(string);
 1137  
         }
 1138  9
         else if (start < length)
 1139  
         {
 1140  0
             writer.write(string,start,length-start);
 1141  
         }
 1142  11
     }
 1143  
 
 1144  
     /**
 1145  
      * Encode a unicode char value in percentEncode, decoding its bytes using a specified 
 1146  
      * characterEncoding.
 1147  
      * 
 1148  
      * @param c
 1149  
      * @param characterEncoding
 1150  
      * @return
 1151  
      */
 1152  
     private static void percentEncode(Writer writer, char c, String characterEncoding) throws IOException
 1153  
     {
 1154  63
         String app = null;
 1155  63
         if (c > (char)((short)0x007F))
 1156  
         {
 1157  
             //percent encode in the proper encoding to be consistent
 1158  
             //app = percentEncodeNonUsAsciiCharacter(writer c, characterEncoding);
 1159  39
             percentEncodeNonUsAsciiCharacter(writer, c, characterEncoding);
 1160  
         }
 1161  
         else
 1162  
         {
 1163  
             //percent encode US-ASCII char (0x00-0x7F range)
 1164  
             //app = "%" + HEX_CHARSET.charAt( ((c >> 0x4) % 0x10)) +HEX_CHARSET.charAt(c % 0x10);
 1165  24
             writer.write('%');
 1166  24
             writer.write(HEX_CHARSET.charAt( ((c >> 0x4) % 0x10)));
 1167  24
             writer.write(HEX_CHARSET.charAt(c % 0x10));
 1168  
         }
 1169  
         //return app;
 1170  63
     }
 1171  
     
 1172  
     private static void percentEncodeNonUsAsciiCharacter(Writer currentWriter, char c, String characterEncoding) 
 1173  
         throws IOException
 1174  
     {
 1175  39
         ByteArrayOutputStream baos = new ByteArrayOutputStream(10);
 1176  
 
 1177  
         try
 1178  
         {
 1179  39
             OutputStreamWriter writer = new OutputStreamWriter(baos,characterEncoding);
 1180  39
             writer.write(c);
 1181  39
             writer.flush();
 1182  
         }
 1183  0
         catch(IOException e)
 1184  
         {
 1185  0
             baos.reset();
 1186  0
             return;
 1187  39
         }
 1188  
         
 1189  39
         byte [] byteArray =  baos.toByteArray();
 1190  117
         for (int i=0; i < byteArray.length; i++)
 1191  
         {
 1192  
             //builder.append('%');
 1193  
             //builder.append(HEX_CHARSET.charAt( (( ((short) byteArray[i] & 0xFF ) >> 0x4) % 0x10)) );
 1194  
             //builder.append(HEX_CHARSET.charAt( ((short) byteArray[i] & 0xFF ) % 0x10));
 1195  78
             currentWriter.write('%');
 1196  78
             currentWriter.write(HEX_CHARSET.charAt( (( ((short) byteArray[i] & 0xFF ) >> 0x4) % 0x10)) );
 1197  78
             currentWriter.write(HEX_CHARSET.charAt( ((short) byteArray[i] & 0xFF ) % 0x10));
 1198  
         }
 1199  
         
 1200  
         //return builder.toString();
 1201  39
     }
 1202  
     
 1203  
     /**
 1204  
      * Encode the query part using the document charset encoding provided.
 1205  
      * 
 1206  
      * 
 1207  
      * @param string
 1208  
      * @param characterEncoding
 1209  
      * @return
 1210  
      */
 1211  
     private static void encodeURIQuery(Writer writer, final String string, int offset, final String characterEncoding)
 1212  
             throws IOException
 1213  
     {
 1214  
         //StringBuilder sb = null;    //create later on demand
 1215  6
         int start = offset;
 1216  6
         int length = string.length();
 1217  6
         int realLength = length-offset;
 1218  
         String app;
 1219  
         char c;
 1220  
         //boolean endLoop = false;
 1221  234
         for (int i = offset; i < length; ++i)
 1222  
         {
 1223  228
             app = null;
 1224  228
             c = string.charAt(i);
 1225  
             
 1226  
             // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
 1227  
             // - <"> %22 (If there is encode of "%", there is a risk of duplicate encoding, so 
 1228  
             //            we make easier and omit this one)
 1229  
             // - "<" %3C, ">" %3E,
 1230  
             // - "\" %5C, "^" %5E, "`" %60 
 1231  
             // - "{" %7B, "|" %7C, "}" %7D
 1232  
             // - From %7F ad infinitum (each character as many bytes as necessary but take into account
 1233  
             //   that a single char should contain 2,3 or more bytes!. This data should be encoded 
 1234  
             //   translating from the document character encoding to percent encoding)
 1235  
             //
 1236  
             // "&" should be encoded as "&amp;" because this link is inside an html page, and 
 1237  
             // put & is invalid in this context   
 1238  
             
 1239  228
             if (   (c <= (char)0x20) || (c >= (char)0x7F) || 
 1240  
                     c == '"' || c == '<' ||
 1241  
                     c == '>' || c == '\\' || c == '^' || c == '`' ||
 1242  
                     c == '{' || c == '|' || c == '}')
 1243  
             {
 1244  
                 // The percent encoding on this part should be done using UTF-8 charset
 1245  
                 // as RFC 3986 Section 3.2.2 says
 1246  
                 //app = percentEncode(c, characterEncoding);
 1247  30
                 if (start < i)
 1248  
                 {
 1249  2
                     writer.write(string, start, i-start);
 1250  
                 }
 1251  30
                 start = i+1;
 1252  30
                 percentEncode(writer, c, characterEncoding);
 1253  
             }
 1254  198
             else if (c == '%')
 1255  
             {
 1256  1
                 if (i + 2 < length)
 1257  
                 {
 1258  1
                     char c1 = string.charAt(i+1);
 1259  1
                     char c2 = string.charAt(i+2);
 1260  1
                     if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z') || (c1 >='a' && c1 <='z')) &&
 1261  
                         (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z') || (c2 >='a' && c2 <='z')))
 1262  
                     {
 1263  
                         // do not percent encode, because it could be already encoded
 1264  
                     }
 1265  
                     else
 1266  
                     {
 1267  
                         //app = percentEncode(c, characterEncoding);
 1268  1
                         if (start < i)
 1269  
                         {
 1270  0
                             writer.write(string, start, i-start);
 1271  
                         }
 1272  1
                         start = i+1;
 1273  1
                         percentEncode(writer, c, characterEncoding);
 1274  
                     }
 1275  1
                 }
 1276  
                 else
 1277  
                 {
 1278  
                     //app = percentEncode(c, characterEncoding);
 1279  0
                     if (start < i)
 1280  
                     {
 1281  0
                         writer.write(string, start, i-start);
 1282  
                     }
 1283  0
                     start = i+1;
 1284  0
                     percentEncode(writer, c, characterEncoding);
 1285  
                 }
 1286  
             }
 1287  197
             else if (c == '&')
 1288  
             {
 1289  1
                 if (i+4 < length )
 1290  
                 {
 1291  1
                     if ('a' == string.charAt(i+1) &&
 1292  
                         'm' == string.charAt(i+2) &&
 1293  
                         'p' == string.charAt(i+3) &&
 1294  
                         ';' == string.charAt(i+4))
 1295  
                     {
 1296  
                         //Skip
 1297  
                     }
 1298  
                     else
 1299  
                     {
 1300  1
                         app = "&amp;";
 1301  
                     }
 1302  
                 }
 1303  
                 else
 1304  
                 {
 1305  0
                     app = "&amp;";
 1306  
                 }
 1307  
             }
 1308  
             else
 1309  
             {
 1310  
                 //No encoding, just do nothing, char will be added later.
 1311  
             }
 1312  
                         
 1313  228
             if (app != null)
 1314  
             {
 1315  
                 //if (sb == null)
 1316  
                 //{
 1317  
                 //    sb = new StringBuilder(string.substring(0, i));
 1318  
                 //}
 1319  
                 //sb.append(app);
 1320  1
                 if (start < i)
 1321  
                 {
 1322  1
                     writer.write(string, start, i-start);
 1323  
                 }
 1324  1
                 start = i+1;
 1325  1
                 writer.write(app);
 1326  
             }
 1327  
             //else
 1328  
             //{
 1329  
             //    if (sb != null)
 1330  
             //    {
 1331  
             //        sb.append(c);
 1332  
             //    }
 1333  
             //}
 1334  
             //if (endLoop)
 1335  
             //{
 1336  
             //    break;
 1337  
             //}
 1338  
         }
 1339  
         
 1340  
         //if (sb == null)
 1341  
         //{
 1342  
         //    return string;
 1343  
         //}
 1344  
         //else
 1345  
         //{
 1346  
         //    return sb.toString();
 1347  
         //}
 1348  6
         if (start == offset)
 1349  
         {
 1350  3
             writer.write(string, offset, realLength);
 1351  
         }
 1352  3
         else if (start < length)
 1353  
         {
 1354  1
             writer.write(string,start,length-start);
 1355  
         }
 1356  6
     }
 1357  
 }