Coverage Report - org.apache.myfaces.shared_impl.renderkit.html.util.HTMLEncoder
 
Classes in this File Line Coverage Branch Coverage Complexity
HTMLEncoder
0%
0/175
0%
0/228
8.333
 
 1  
 /*
 2  
  *  Licensed to the Apache Software Foundation (ASF) under one
 3  
  *  or more contributor license agreements.  See the NOTICE file
 4  
  *  distributed with this work for additional information
 5  
  *  regarding copyright ownership.  The ASF licenses this file
 6  
  *  to you under the Apache License, Version 2.0 (the
 7  
  *  "License"); you may not use this file except in compliance
 8  
  *  with the License.  You may obtain a copy of the License at
 9  
  * 
 10  
  *  http://www.apache.org/licenses/LICENSE-2.0
 11  
  * 
 12  
  *  Unless required by applicable law or agreed to in writing,
 13  
  *  software distributed under the License is distributed on an
 14  
  *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 15  
  *  KIND, either express or implied.  See the License for the
 16  
  *  specific language governing permissions and limitations
 17  
  *  under the License.
 18  
  */
 19  
 package org.apache.myfaces.shared_impl.renderkit.html.util;
 20  
 
 21  
 import java.io.ByteArrayOutputStream;
 22  
 import java.io.IOException;
 23  
 import java.io.OutputStreamWriter;
 24  
 import java.io.Writer;
 25  
 
 26  
 /**
 27  
  * Converts Strings so that they can be used within HTML-Code.
 28  
  */
 29  0
 public abstract class HTMLEncoder
 30  
 {
 31  
     /**
 32  
      * Variant of {@link #encode} where encodeNewline is false and encodeNbsp is true.
 33  
      */
 34  
     public static String encode (String string)
 35  
     {
 36  0
         return encode(string, false, true);
 37  
     }
 38  
 
 39  
     /**
 40  
      * Variant of {@link #encode} where encodeNbsp is true.
 41  
      */
 42  
     public static String encode (String string, boolean encodeNewline)
 43  
     {
 44  0
         return encode(string, encodeNewline, true);
 45  
     }
 46  
 
 47  
     /**
 48  
      * Variant of {@link #encode} where encodeNbsp and encodeNonLatin are true 
 49  
      */
 50  
     public static String encode (String string, boolean encodeNewline, boolean encodeSubsequentBlanksToNbsp)
 51  
     {
 52  0
         return encode(string, encodeNewline, encodeSubsequentBlanksToNbsp, true);
 53  
     }
 54  
 
 55  
     /**
 56  
      * Encodes the given string, so that it can be used within a html page.
 57  
      * @param string the string to convert
 58  
      * @param encodeNewline if true newline characters are converted to <br>'s
 59  
      * @param encodeSubsequentBlanksToNbsp if true subsequent blanks are converted to  's
 60  
      * @param encodeNonLatin if true encode non-latin characters as numeric character references
 61  
      */
 62  
     public static String encode (String string,
 63  
                                  boolean encodeNewline,
 64  
                                  boolean encodeSubsequentBlanksToNbsp,
 65  
                                  boolean encodeNonLatin)
 66  
     {
 67  0
         if (string == null)
 68  
         {
 69  0
             return "";
 70  
         }
 71  
 
 72  0
         StringBuilder sb = null;    //create later on demand
 73  
         String app;
 74  
         char c;
 75  0
         for (int i = 0; i < string.length (); ++i)
 76  
         {
 77  0
             app = null;
 78  0
             c = string.charAt(i);
 79  
             
 80  
             // All characters before letters
 81  0
             if ((int)c < 0x41)
 82  
             {
 83  0
                 switch (c)
 84  
                 {
 85  0
                     case '"': app = "&quot;"; break;    //"
 86  0
                     case '&': app = "&amp;"; break;     //&
 87  0
                     case '<': app = "&lt;"; break;      //<
 88  0
                     case '>': app = "&gt;"; break;      //>
 89  
                     case ' ':
 90  0
                         if (encodeSubsequentBlanksToNbsp &&
 91  
                                 (i == 0 || (i - 1 >= 0 && string.charAt(i - 1) == ' ')))
 92  
                         {
 93  
                             //Space at beginning or after another space
 94  0
                             app = "&#160;";
 95  
                         }
 96  
                         break;
 97  
                     case '\n':
 98  0
                         if (encodeNewline)
 99  
                         {
 100  0
                             app = "<br/>";
 101  
                         }
 102  0
                         break;
 103  
                 }
 104  0
             } else if (encodeNonLatin && (int)c > 0x80) {
 105  0
                  switch(c) {
 106  
                     //german umlauts
 107  0
                     case '\u00E4' : app = "&auml;";  break;
 108  0
                     case '\u00C4' : app = "&Auml;";  break;
 109  0
                     case '\u00F6' : app = "&ouml;";  break;
 110  0
                     case '\u00D6' : app = "&Ouml;";  break;
 111  0
                     case '\u00FC' : app = "&uuml;";  break;
 112  0
                     case '\u00DC' : app = "&Uuml;";  break;
 113  0
                     case '\u00DF' : app = "&szlig;"; break;
 114  
 
 115  
                     //misc
 116  
                     //case 0x80: app = "&euro;"; break;  sometimes euro symbol is ascii 128, should we suport it?
 117  0
                     case '\u20AC': app = "&euro;";  break;
 118  0
                     case '\u00AB': app = "&laquo;"; break;
 119  0
                     case '\u00BB': app = "&raquo;"; break;
 120  0
                     case '\u00A0': app = "&#160;"; break;
 121  
 
 122  
                     default :
 123  
                         //encode all non basic latin characters
 124  0
                         app = "&#" + ((int)c) + ";";
 125  
                     break;
 126  
                 }
 127  
             }
 128  0
             if (app != null)
 129  
             {
 130  0
                 if (sb == null)
 131  
                 {
 132  0
                     sb = new StringBuilder(string.substring(0, i));
 133  
                 }
 134  0
                 sb.append(app);
 135  
             } else {
 136  0
                 if (sb != null)
 137  
                 {
 138  0
                     sb.append(c);
 139  
                 }
 140  
             }
 141  
         }
 142  
 
 143  0
         if (sb == null)
 144  
         {
 145  0
             return string;
 146  
         }
 147  
         else
 148  
         {
 149  0
             return sb.toString();
 150  
         }
 151  
     }
 152  
 
 153  
     /**
 154  
      * Variant of {@link #encode} where encodeNewline is false and encodeNbsp is true.
 155  
      */
 156  
     public static void encode (char[] string, int offset, int length, Writer writer) throws IOException
 157  
     {
 158  0
         encode(string, offset, length, false, true, writer);
 159  0
     }
 160  
 
 161  
     /**
 162  
      * Variant of {@link #encode} where encodeNbsp is true.
 163  
      */
 164  
     public static void encode (char[] string, int offset, int length, boolean encodeNewline, Writer writer) throws IOException
 165  
     {
 166  0
         encode(string, offset, length, encodeNewline, true, writer);
 167  0
     }
 168  
 
 169  
     /**
 170  
      * Variant of {@link #encode} where encodeNbsp and encodeNonLatin are true 
 171  
      */
 172  
     public static void encode (char[] string, int offset, int length, boolean encodeNewline, boolean encodeSubsequentBlanksToNbsp, Writer writer) throws IOException
 173  
     {
 174  0
         encode(string, offset, length, encodeNewline, encodeSubsequentBlanksToNbsp, true, writer);
 175  0
     }
 176  
 
 177  
 
 178  
     /**
 179  
      * Encodes the given string, so that it can be used within a html page.
 180  
      * @param string the string to convert
 181  
      * @param encodeNewline if true newline characters are converted to &lt;br&gt;'s
 182  
      * @param encodeSubsequentBlanksToNbsp if true subsequent blanks are converted to &amp;nbsp;'s
 183  
      * @param encodeNonLatin if true encode non-latin characters as numeric character references
 184  
      */
 185  
     public static void encode (char[] string, int offset, int length,
 186  
                                  boolean encodeNewline,
 187  
                                  boolean encodeSubsequentBlanksToNbsp,
 188  
                                  boolean encodeNonLatin, Writer writer) throws IOException
 189  
     {
 190  0
         if (string == null || length < 0 || offset >= string.length)
 191  
         {
 192  0
             return;
 193  
         }
 194  0
         offset = Math.max(0, offset);
 195  0
         int realLength = Math.min(length, string.length - offset);
 196  
 
 197  0
         StringBuilder sb = null;    //create later on demand
 198  
         String app;
 199  
         char c;
 200  
         
 201  0
         for (int i = offset; i < offset + realLength; ++i)
 202  
         {
 203  0
             app = null;
 204  0
             c = string[i];
 205  
 
 206  
             // All characters before letters
 207  0
             if ((int)c < 0x41)
 208  
             {
 209  0
                 switch (c)
 210  
                 {
 211  0
                     case '"': app = "&quot;"; break;    //"
 212  0
                     case '&': app = "&amp;"; break;     //&
 213  0
                     case '<': app = "&lt;"; break;      //<
 214  0
                     case '>': app = "&gt;"; break;      //>
 215  
                     case ' ':
 216  0
                         if (encodeSubsequentBlanksToNbsp &&
 217  
                                 (i == 0 || (i - 1 >= 0 && string[i - 1] == ' ')))
 218  
                         {
 219  
                             //Space at beginning or after another space
 220  0
                             app = "&#160;";
 221  
                         }
 222  
                         break;
 223  
                     case '\n':
 224  0
                         if (encodeNewline)
 225  
                         {
 226  0
                             app = "<br/>";
 227  
                         }
 228  0
                         break;
 229  
                 }
 230  0
             } else if (encodeNonLatin && (int)c > 0x80) {
 231  0
                  switch(c) {
 232  
                     //german umlauts
 233  0
                     case '\u00E4' : app = "&auml;";  break;
 234  0
                     case '\u00C4' : app = "&Auml;";  break;
 235  0
                     case '\u00F6' : app = "&ouml;";  break;
 236  0
                     case '\u00D6' : app = "&Ouml;";  break;
 237  0
                     case '\u00FC' : app = "&uuml;";  break;
 238  0
                     case '\u00DC' : app = "&Uuml;";  break;
 239  0
                     case '\u00DF' : app = "&szlig;"; break;
 240  
 
 241  
                     //misc
 242  
                     //case 0x80: app = "&euro;"; break;  sometimes euro symbol is ascii 128, should we suport it?
 243  0
                     case '\u20AC': app = "&euro;";  break;
 244  0
                     case '\u00AB': app = "&laquo;"; break;
 245  0
                     case '\u00BB': app = "&raquo;"; break;
 246  0
                     case '\u00A0': app = "&#160;"; break;
 247  
 
 248  
                     default :
 249  
                         //encode all non basic latin characters
 250  0
                         app = "&#" + ((int)c) + ";";
 251  
                     break;
 252  
                 }
 253  
             }
 254  0
             if (app != null)
 255  
             {
 256  0
                 if (sb == null)
 257  
                 {
 258  0
                     sb = new StringBuilder(realLength*2);
 259  0
                     sb.append(string, offset, i - offset);
 260  
                 }
 261  0
                 sb.append(app);
 262  
             } else {
 263  0
                 if (sb != null)
 264  
                 {
 265  0
                     sb.append(c);
 266  
                 }
 267  
             }
 268  
         }
 269  
 
 270  0
         if (sb == null)
 271  
         {
 272  0
             writer.write(string, offset, realLength);
 273  
         }
 274  
         else
 275  
         {
 276  0
             writer.write(sb.toString());
 277  
         }
 278  0
     }
 279  
     
 280  
     private static final String HEX_CHARSET = "0123456789ABCDEF";
 281  
     
 282  
     private static final String UTF8 = "UTF-8";
 283  
     
 284  
     /**
 285  
      * Encode an URI, escaping or percent-encoding all required characters and
 286  
      * following the rules mentioned on RFC 3986.  
 287  
      * 
 288  
      * @param string
 289  
      * @param encodeNonLatin
 290  
      * @return
 291  
      * @throws IOException
 292  
      */
 293  
     public static String encodeURIAtributte(final String string, final String characterEncoding)
 294  
         throws IOException
 295  
     {
 296  0
         StringBuilder sb = null;    //create later on demand
 297  
         String app;
 298  
         char c;
 299  0
         boolean endLoop = false;
 300  0
         for (int i = 0; i < string.length (); ++i)
 301  
         {
 302  0
             app = null;
 303  0
             c = string.charAt(i);
 304  
             
 305  
             // This are the guidelines to be taken into account by this algorithm to encode:
 306  
             
 307  
             // RFC 2396 Section 2.4.3 Excluded US-ASCII Characters
 308  
             //
 309  
             // control     = <US-ASCII coded characters 00-1F and 7F hexadecimal>
 310  
             // space       = <US-ASCII coded character 20 hexadecimal>
 311  
             // delims      = "<" | ">" | "#" | "%" | <">
 312  
             //               %3C   %3E   %23   %25   %22
 313  
             // unwise      = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"
 314  
             //               %7D   %7B   %7C   %5C   %5E   %5B   %5D   %60
 315  
             //
 316  
             // ".... Data corresponding to excluded characters must be escaped in order to
 317  
             // be properly represented within a URI....."
 318  
             
 319  
             // RFC 3986 Section 3.  Syntax Components
 320  
             //
 321  
             // "... The generic URI syntax consists of a hierarchical sequence of
 322  
             // components referred to as the scheme, authority, path, query, and
 323  
             // fragment.
 324  
             //
 325  
             //   URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 326  
             //
 327  
             //   hier-part   = "//" authority path-abempty
 328  
             //               / path-absolute
 329  
             //               / path-rootless
 330  
             //               / path-empty
 331  
             // ...."
 332  
             
 333  
             // RFC 3986 Section 2.2:
 334  
             // Reserved characters (should not be percent-encoded)
 335  
             // reserved    = gen-delims / sub-delims
 336  
             // gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
 337  
             //               %3A   %2F   %3F   %23   %5B   %5D   %40
 338  
             // sub-delims  = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
 339  
             //               %21   %24   %26   %27   %28   %29   %2A   %2B   %2C   %3B   %3D
 340  
             
 341  
             // Note than chars "[" and "]" are mentioned as they should be escaped on RFC 2396,
 342  
             // but on the part D. Changes from RFC 2396 says about this chars (used on IPv6) 
 343  
             // "...those rules were redefined to directly specify the characters allowed...."
 344  
             // There is also other characters moved from excluded list to reserved:
 345  
             // "[" / "]" / "#"  
 346  
             
 347  
             // RFC 3986 Section 2.3:
 348  
             // "... for consistency, percent-encoded octets in the ranges of ALPHA
 349  
             // (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
 350  
             // underscore (%5F), or tilde (%7E) should not be created by URI
 351  
             // producers...."
 352  
             
 353  
             // RFC 3986 Section  3.2.2.  Host
 354  
 
 355  
             // host = IP-literal / IPv4address / reg-name
 356  
 
 357  
             // The reg-name syntax allows percent-encoded octets in order to
 358  
             // represent non-ASCII registered names in a uniform way that is
 359  
             // independent of the underlying name resolution technology.  Non-ASCII
 360  
             // characters must first be encoded according to UTF-8 [STD63], and then
 361  
             // each octet of the corresponding UTF-8 sequence must be percent-
 362  
             // encoded to be represented as URI characters.  URI producing
 363  
             // applications must not use percent-encoding in host unless it is used
 364  
             // to represent a UTF-8 character sequence.
 365  
             
 366  
             // RFC 3986 Section 3.4 Query 
 367  
             //         query       = *( pchar / "/" / "?" )
 368  
             //
 369  
             // "...  However, as query components are often used to carry identifying information 
 370  
             // in the form of "key=value" pairs and one frequently used value is a reference to
 371  
             // another URI, it is sometimes better for usability to avoid percent-encoding those characters....."
 372  
             //
 373  
             // RFC 3986 Section 2.5 Identifying Data (Apply to query section)
 374  
             //
 375  
             // When a new URI scheme defines a component that represents textual
 376  
             // data consisting of characters from the Universal Character Set [UCS],
 377  
             // the data should first be encoded as octets according to the UTF-8
 378  
             // character encoding [STD63]; then only those octets that do not
 379  
             // correspond to characters in the unreserved set should be percent-
 380  
             // encoded.  For example, the character A would be represented as "A",
 381  
             // the character LATIN CAPITAL LETTER A WITH GRAVE would be represented
 382  
             // as "%C3%80", and the character KATAKANA LETTER A would be represented
 383  
             // as "%E3%82%A2".
 384  
             //
 385  
             // RFC 3986 Section 3.5 Fragment
 386  
             //         fragment    = *( pchar / "/" / "?" )
 387  
             //
 388  
             // Note that follows the same as query
 389  
             
 390  
             // Based on the extracts the strategy to apply on this method is:
 391  
             // 
 392  
             // On scheme ":" hier-part
 393  
             //
 394  
             // Escape or percent encode chars inside :
 395  
             // 
 396  
             // - From %00 to %20, 
 397  
             // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of 
 398  
             //                     duplicate encoding, encode it when we are sure 
 399  
             //                     that there are not encoded twice)
 400  
             // - "<" %3C, ">" %3E
 401  
             // - "\" %5C, "^" %5E, "`" %60 
 402  
             // - "{" %7B, "|" %7C, "}" %7D
 403  
             // - From %7F ad infinitum (characters from %100 to infinitum should not be used in this
 404  
             //   part of an URI, but it is preferred to encode it that omit it).
 405  
             //
 406  
             // The remaining characters must not be encoded
 407  
             //
 408  
             // Characters after ? or # should be percent encoding but only the necessary ones:
 409  
             //
 410  
             // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
 411  
             // - <"> %22, "%" %25 (If there is encode of "%", there is a risk of 
 412  
             //                     duplicate encoding, encode it when we are sure 
 413  
             //                     that there are not encoded twice)
 414  
             // - "<" %3C, ">" %3E,
 415  
             // - "\" %5C, "^" %5E, "`" %60 
 416  
             // - "{" %7B, "|" %7C, "}" %7D
 417  
             // - From %7F ad infinitum (each character as many bytes as necessary but take into account
 418  
             //   that a single char should contain 2,3 or more bytes!. This data should be encoded 
 419  
             //   translating from the document character encoding to percent encoding, because this values
 420  
             //   could be retrieved from httpRequest.getParameter() and it uses the current character encoding
 421  
             //   for decode values)
 422  
             //
 423  
             // "&" should be encoded as "&amp;" because this link is inside an html page, and 
 424  
             // put only & is invalid in this context.
 425  
 
 426  0
             if (   (c <= (char)0x20) || (c >= (char)0x7F) || 
 427  
                     c == '"' || c == '<' ||
 428  
                     c == '>' || c == '\\' || c == '^' || c == '`' ||
 429  
                     c == '{' || c == '|' || c == '}')
 430  
             {
 431  
                 // The percent encoding on this part should be done using UTF-8 charset
 432  
                 // as RFC 3986 Section 3.2.2 says.
 433  
                 // Also there is a reference on 
 434  
                 // http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
 435  
                 // that recommend use of UTF-8 instead the document character encoding.
 436  
                 // Jetty set by default UTF-8 (see http://jira.codehaus.org/browse/JETTY-113)
 437  0
                 app = percentEncode(c, "UTF-8");
 438  
             }
 439  0
             else if (c == '%')
 440  
             {
 441  0
                 if (i + 2 < string.length())
 442  
                 {
 443  0
                     char c1 = string.charAt(i+1);
 444  0
                     char c2 = string.charAt(i+2);
 445  0
                     if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z')) &&
 446  
                         (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z')))
 447  
                     {
 448  
                         // do not percent encode, because it could be already encoded
 449  
                         // and we don't want encode it twice
 450  
                     }
 451  
                     else
 452  
                     {
 453  0
                         app = percentEncode(c, UTF8);
 454  
                     }
 455  0
                 }
 456  
                 else
 457  
                 {
 458  0
                     app = percentEncode(c, UTF8);
 459  
                 }
 460  
             }
 461  0
             else if (c == '?' || c == '#')
 462  
             {
 463  0
                 if (i+1 < string.length())
 464  
                 {
 465  
                     // The remaining part of the URI are data that should be encoded
 466  
                     // using the document character encoding.
 467  0
                     app = c + encodeURIQuery(string.substring(i+1), characterEncoding);
 468  0
                     endLoop = true;
 469  
                 }
 470  
             }
 471  
             else
 472  
             {
 473  
                 //No encoding, just do nothing, char will be added later.
 474  
             }
 475  
                         
 476  0
             if (app != null)
 477  
             {
 478  0
                 if (sb == null)
 479  
                 {
 480  0
                     sb = new StringBuilder(string.substring(0, i));
 481  
                 }
 482  0
                 sb.append(app);
 483  
             } else {
 484  0
                 if (sb != null)
 485  
                 {
 486  0
                     sb.append(c);
 487  
                 }
 488  
             }
 489  0
             if (endLoop)
 490  
             {
 491  0
                 break;
 492  
             }
 493  
         }
 494  0
         if (sb == null)
 495  
         {
 496  0
             return string;
 497  
         }
 498  
         else
 499  
         {
 500  0
             return sb.toString();
 501  
         }
 502  
     }
 503  
     
 504  
     /**
 505  
      * Encode a unicode char value in percentEncode, decoding its bytes using a specified 
 506  
      * characterEncoding.
 507  
      * 
 508  
      * @param c
 509  
      * @param characterEncoding
 510  
      * @return
 511  
      */
 512  
     private static String percentEncode(char c, String characterEncoding)
 513  
     {
 514  0
         String app = null;
 515  0
         if (c > (char)((short)0x007F))
 516  
         {
 517  
             //percent encode in the proper encoding to be consistent
 518  0
             app = percentEncodeNonUsAsciiCharacter(c, characterEncoding);
 519  
         }
 520  
         else
 521  
         {
 522  
             //percent encode US-ASCII char (0x00-0x7F range)
 523  0
             app = "%" + HEX_CHARSET.charAt( ((c >> 0x4) % 0x10)) +HEX_CHARSET.charAt(c % 0x10);
 524  
         }
 525  0
         return app;
 526  
     }
 527  
     
 528  
     private static String percentEncodeNonUsAsciiCharacter(char c, String characterEncoding)
 529  
     {
 530  0
         ByteArrayOutputStream baos = new ByteArrayOutputStream(10);
 531  0
         StringBuffer builder = new StringBuffer();
 532  
         try
 533  
         {
 534  0
             OutputStreamWriter writer = new OutputStreamWriter(baos,characterEncoding);
 535  0
             writer.write(c);
 536  0
             writer.flush();
 537  
         }
 538  0
         catch(IOException e)
 539  
         {
 540  0
             baos.reset();
 541  0
             return null;
 542  0
         }
 543  
         
 544  0
         byte [] byteArray =  baos.toByteArray();
 545  0
         for (int i=0; i < byteArray.length; i++)
 546  
         {
 547  0
             builder.append('%');
 548  0
             builder.append(HEX_CHARSET.charAt( (( ((short) byteArray[i] & 0xFF ) >> 0x4) % 0x10)) );
 549  0
             builder.append(HEX_CHARSET.charAt( ((short) byteArray[i] & 0xFF ) % 0x10));
 550  
         }
 551  
         
 552  0
         return builder.toString();
 553  
     }
 554  
 
 555  
     /**
 556  
      * Encode the query part using the document charset encoding provided.
 557  
      * 
 558  
      * 
 559  
      * @param string
 560  
      * @param characterEncoding
 561  
      * @return
 562  
      */
 563  
     private static String encodeURIQuery(final String string, final String characterEncoding)
 564  
     {
 565  0
         StringBuilder sb = null;    //create later on demand
 566  
         String app;
 567  
         char c;
 568  0
         boolean endLoop = false;
 569  0
         for (int i = 0; i < string.length (); ++i)
 570  
         {
 571  0
             app = null;
 572  0
             c = string.charAt(i);
 573  
             
 574  
             // - From %00 to %20 (' ' %20 could encode as +, but %20 also works, so we keep %20)
 575  
             // - <"> %22 (If there is encode of "%", there is a risk of duplicate encoding, so we make easier and omit this one)
 576  
             // - "<" %3C, ">" %3E,
 577  
             // - "\" %5C, "^" %5E, "`" %60 
 578  
             // - "{" %7B, "|" %7C, "}" %7D
 579  
             // - From %7F ad infinitum (each character as many bytes as necessary but take into account
 580  
             //   that a single char should contain 2,3 or more bytes!. This data should be encoded translating from the document
 581  
             //   character encoding to percent encoding)
 582  
             //
 583  
             // "&" should be encoded as "&amp;" because this link is inside an html page, and 
 584  
             // put & is invalid in this context   
 585  
             
 586  0
             if (   (c <= (char)0x20) || (c >= (char)0x7F) || 
 587  
                     c == '"' || c == '<' ||
 588  
                     c == '>' || c == '\\' || c == '^' || c == '`' ||
 589  
                     c == '{' || c == '|' || c == '}')
 590  
             {
 591  
                 // The percent encoding on this part should be done using UTF-8 charset
 592  
                 // as RFC 3986 Section 3.2.2 says
 593  0
                 app = percentEncode(c, characterEncoding);
 594  
             }
 595  0
             else if (c == '%')
 596  
             {
 597  0
                 if (i + 2 < string.length())
 598  
                 {
 599  0
                     char c1 = string.charAt(i+1);
 600  0
                     char c2 = string.charAt(i+2);
 601  0
                     if ((( c1 >= '0' && c1 <='9') || (c1 >='A' && c1 <='Z')) &&
 602  
                         (( c2 >= '0' && c2 <='9') || (c2 >='A' && c2 <='Z')))
 603  
                     {
 604  
                         // do not percent encode, because it could be already encoded
 605  
                     }
 606  
                     else
 607  
                     {
 608  0
                         app = percentEncode(c, characterEncoding);
 609  
                     }
 610  0
                 }
 611  
                 else
 612  
                 {
 613  0
                     app = percentEncode(c, characterEncoding);
 614  
                 }
 615  
             }
 616  0
             else if (c == '&')
 617  
             {
 618  0
                 if (i+4 < string.length() )
 619  
                 {
 620  0
                     if ('a' == string.charAt(i+1) &&
 621  
                         'm' == string.charAt(i+2) &&
 622  
                         'p' == string.charAt(i+3) &&
 623  
                         ';' == string.charAt(i+4))
 624  
                     {
 625  
                         //Skip
 626  
                     }
 627  
                     else
 628  
                     {
 629  0
                         app = "&amp;";
 630  
                     }
 631  
                 }
 632  
                 else
 633  
                 {
 634  0
                     app = "&amp;";
 635  
                 }
 636  
             }
 637  
             else
 638  
             {
 639  
                 //No encoding, just do nothing, char will be added later.
 640  
             }
 641  
                         
 642  0
             if (app != null)
 643  
             {
 644  0
                 if (sb == null)
 645  
                 {
 646  0
                     sb = new StringBuilder(string.substring(0, i));
 647  
                 }
 648  0
                 sb.append(app);
 649  
             } else {
 650  0
                 if (sb != null)
 651  
                 {
 652  0
                     sb.append(c);
 653  
                 }
 654  
             }
 655  0
             if (endLoop)
 656  
             {
 657  0
                 break;
 658  
             }
 659  
         }
 660  0
         if (sb == null)
 661  
         {
 662  0
             return string;
 663  
         }
 664  
         else
 665  
         {
 666  0
             return sb.toString();
 667  
         }
 668  
     }
 669  
 }