Coverage Report - org.apache.commons.codec.net.URLCodec
 
Classes in this File Line Coverage Branch Coverage Complexity
URLCodec
98%
86/87
100%
40/40
4
 
 1  
 /*
 2  
  * Licensed to the Apache Software Foundation (ASF) under one or more
 3  
  * contributor license agreements.  See the NOTICE file distributed with
 4  
  * this work for additional information regarding copyright ownership.
 5  
  * The ASF licenses this file to You under the Apache License, Version 2.0
 6  
  * (the "License"); you may not use this file except in compliance with
 7  
  * the License.  You may obtain a copy of the License at
 8  
  *
 9  
  *      http://www.apache.org/licenses/LICENSE-2.0
 10  
  *
 11  
  * Unless required by applicable law or agreed to in writing, software
 12  
  * distributed under the License is distributed on an "AS IS" BASIS,
 13  
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14  
  * See the License for the specific language governing permissions and
 15  
  * limitations under the License.
 16  
  */
 17  
 
 18  
 package org.apache.commons.codec.net;
 19  
 
 20  
 import java.io.ByteArrayOutputStream;
 21  
 import java.io.UnsupportedEncodingException;
 22  
 import java.util.BitSet;
 23  
 
 24  
 import org.apache.commons.codec.BinaryDecoder;
 25  
 import org.apache.commons.codec.BinaryEncoder;
 26  
 import org.apache.commons.codec.CharEncoding;
 27  
 import org.apache.commons.codec.DecoderException;
 28  
 import org.apache.commons.codec.EncoderException;
 29  
 import org.apache.commons.codec.StringDecoder;
 30  
 import org.apache.commons.codec.StringEncoder;
 31  
 import org.apache.commons.codec.binary.StringUtils;
 32  
 
 33  
 /**
 34  
  * Implements the 'www-form-urlencoded' encoding scheme, also misleadingly known as URL encoding.
 35  
  * <p>
 36  
  * This codec is meant to be a replacement for standard Java classes {@link java.net.URLEncoder} and
 37  
  * {@link java.net.URLDecoder} on older Java platforms, as these classes in Java versions below
 38  
  * 1.4 rely on the platform's default charset encoding.
 39  
  * <p>
 40  
  * This class is immutable and thread-safe.
 41  
  *
 42  
  * @see <a href="http://www.w3.org/TR/html4/interact/forms.html#h-17.13.4.1">Chapter 17.13.4 Form content types</a>
 43  
  *           of the <a href="http://www.w3.org/TR/html4/">HTML 4.01 Specification<a>
 44  
  *
 45  
  * @since 1.2
 46  
  * @version $Id$
 47  
  */
 48  
 public class URLCodec implements BinaryEncoder, BinaryDecoder, StringEncoder, StringDecoder {
 49  
 
 50  
     /**
 51  
      * Radix used in encoding and decoding.
 52  
      */
 53  
     static final int RADIX = 16;
 54  
 
 55  
     /**
 56  
      * The default charset used for string decoding and encoding.
 57  
      *
 58  
      * @deprecated TODO: This field will be changed to a private final Charset in 2.0.
 59  
      */
 60  
     @Deprecated
 61  
     protected String charset;
 62  
 
 63  
     /**
 64  
      * Release 1.5 made this field final.
 65  
      */
 66  
     protected static final byte ESCAPE_CHAR = '%';
 67  
     /**
 68  
      * BitSet of www-form-url safe characters.
 69  
      */
 70  1
     protected static final BitSet WWW_FORM_URL = new BitSet(256);
 71  
 
 72  
     // Static initializer for www_form_url
 73  
     static {
 74  
         // alpha characters
 75  27
         for (int i = 'a'; i <= 'z'; i++) {
 76  26
             WWW_FORM_URL.set(i);
 77  
         }
 78  27
         for (int i = 'A'; i <= 'Z'; i++) {
 79  26
             WWW_FORM_URL.set(i);
 80  
         }
 81  
         // numeric characters
 82  11
         for (int i = '0'; i <= '9'; i++) {
 83  10
             WWW_FORM_URL.set(i);
 84  
         }
 85  
         // special chars
 86  1
         WWW_FORM_URL.set('-');
 87  1
         WWW_FORM_URL.set('_');
 88  1
         WWW_FORM_URL.set('.');
 89  1
         WWW_FORM_URL.set('*');
 90  
         // blank to be replaced with +
 91  1
         WWW_FORM_URL.set(' ');
 92  1
     }
 93  
 
 94  
 
 95  
     /**
 96  
      * Default constructor.
 97  
      */
 98  
     public URLCodec() {
 99  13
         this(CharEncoding.UTF_8);
 100  13
     }
 101  
 
 102  
     /**
 103  
      * Constructor which allows for the selection of a default charset.
 104  
      *
 105  
      * @param charset the default string charset to use.
 106  
      */
 107  
     public URLCodec(final String charset) {
 108  15
         super();
 109  15
         this.charset = charset;
 110  15
     }
 111  
 
 112  
     /**
 113  
      * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
 114  
      *
 115  
      * @param urlsafe
 116  
      *            bitset of characters deemed URL safe
 117  
      * @param bytes
 118  
      *            array of bytes to convert to URL safe characters
 119  
      * @return array of bytes containing URL safe characters
 120  
      */
 121  
     public static final byte[] encodeUrl(BitSet urlsafe, final byte[] bytes) {
 122  14
         if (bytes == null) {
 123  1
             return null;
 124  
         }
 125  13
         if (urlsafe == null) {
 126  1
             urlsafe = WWW_FORM_URL;
 127  
         }
 128  
 
 129  13
         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
 130  241
         for (final byte c : bytes) {
 131  228
             int b = c;
 132  228
             if (b < 0) {
 133  58
                 b = 256 + b;
 134  
             }
 135  228
             if (urlsafe.get(b)) {
 136  105
                 if (b == ' ') {
 137  7
                     b = '+';
 138  
                 }
 139  105
                 buffer.write(b);
 140  
             } else {
 141  123
                 buffer.write(ESCAPE_CHAR);
 142  123
                 final char hex1 = Character.toUpperCase(Character.forDigit((b >> 4) & 0xF, RADIX));
 143  123
                 final char hex2 = Character.toUpperCase(Character.forDigit(b & 0xF, RADIX));
 144  123
                 buffer.write(hex1);
 145  123
                 buffer.write(hex2);
 146  
             }
 147  
         }
 148  13
         return buffer.toByteArray();
 149  
     }
 150  
 
 151  
     /**
 152  
      * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
 153  
      * back to their original representation.
 154  
      *
 155  
      * @param bytes
 156  
      *            array of URL safe characters
 157  
      * @return array of original bytes
 158  
      * @throws DecoderException
 159  
      *             Thrown if URL decoding is unsuccessful
 160  
      */
 161  
     public static final byte[] decodeUrl(final byte[] bytes) throws DecoderException {
 162  15
         if (bytes == null) {
 163  1
             return null;
 164  
         }
 165  14
         final ByteArrayOutputStream buffer = new ByteArrayOutputStream();
 166  152
         for (int i = 0; i < bytes.length; i++) {
 167  142
             final int b = bytes[i];
 168  142
             if (b == '+') {
 169  4
                 buffer.write(' ');
 170  138
             } else if (b == ESCAPE_CHAR) {
 171  
                 try {
 172  56
                     final int u = Utils.digit16(bytes[++i]);
 173  54
                     final int l = Utils.digit16(bytes[++i]);
 174  52
                     buffer.write((char) ((u << 4) + l));
 175  2
                 } catch (final ArrayIndexOutOfBoundsException e) {
 176  2
                     throw new DecoderException("Invalid URL encoding: ", e);
 177  52
                 }
 178  
             } else {
 179  82
                 buffer.write(b);
 180  
             }
 181  
         }
 182  10
         return buffer.toByteArray();
 183  
     }
 184  
 
 185  
     /**
 186  
      * Encodes an array of bytes into an array of URL safe 7-bit characters. Unsafe characters are escaped.
 187  
      *
 188  
      * @param bytes
 189  
      *            array of bytes to convert to URL safe characters
 190  
      * @return array of bytes containing URL safe characters
 191  
      */
 192  
     @Override
 193  
     public byte[] encode(final byte[] bytes) {
 194  13
         return encodeUrl(WWW_FORM_URL, bytes);
 195  
     }
 196  
 
 197  
 
 198  
     /**
 199  
      * Decodes an array of URL safe 7-bit characters into an array of original bytes. Escaped characters are converted
 200  
      * back to their original representation.
 201  
      *
 202  
      * @param bytes
 203  
      *            array of URL safe characters
 204  
      * @return array of original bytes
 205  
      * @throws DecoderException
 206  
      *             Thrown if URL decoding is unsuccessful
 207  
      */
 208  
     @Override
 209  
     public byte[] decode(final byte[] bytes) throws DecoderException {
 210  14
         return decodeUrl(bytes);
 211  
     }
 212  
 
 213  
     /**
 214  
      * Encodes a string into its URL safe form using the specified string charset. Unsafe characters are escaped.
 215  
      *
 216  
      * @param str
 217  
      *            string to convert to a URL safe form
 218  
      * @param charset
 219  
      *            the charset for str
 220  
      * @return URL safe string
 221  
      * @throws UnsupportedEncodingException
 222  
      *             Thrown if charset is not supported
 223  
      */
 224  
     public String encode(final String str, final String charset) throws UnsupportedEncodingException {
 225  13
         if (str == null) {
 226  1
             return null;
 227  
         }
 228  12
         return StringUtils.newStringUsAscii(encode(str.getBytes(charset)));
 229  
     }
 230  
 
 231  
     /**
 232  
      * Encodes a string into its URL safe form using the default string charset. Unsafe characters are escaped.
 233  
      *
 234  
      * @param str
 235  
      *            string to convert to a URL safe form
 236  
      * @return URL safe string
 237  
      * @throws EncoderException
 238  
      *             Thrown if URL encoding is unsuccessful
 239  
      *
 240  
      * @see #getDefaultCharset()
 241  
      */
 242  
     @Override
 243  
     public String encode(final String str) throws EncoderException {
 244  8
         if (str == null) {
 245  1
             return null;
 246  
         }
 247  
         try {
 248  7
             return encode(str, getDefaultCharset());
 249  1
         } catch (final UnsupportedEncodingException e) {
 250  1
             throw new EncoderException(e.getMessage(), e);
 251  
         }
 252  
     }
 253  
 
 254  
 
 255  
     /**
 256  
      * Decodes a URL safe string into its original form using the specified encoding. Escaped characters are converted
 257  
      * back to their original representation.
 258  
      *
 259  
      * @param str
 260  
      *            URL safe string to convert into its original form
 261  
      * @param charset
 262  
      *            the original string charset
 263  
      * @return original string
 264  
      * @throws DecoderException
 265  
      *             Thrown if URL decoding is unsuccessful
 266  
      * @throws UnsupportedEncodingException
 267  
      *             Thrown if charset is not supported
 268  
      */
 269  
     public String decode(final String str, final String charset) throws DecoderException, UnsupportedEncodingException {
 270  13
         if (str == null) {
 271  1
             return null;
 272  
         }
 273  12
         return new String(decode(StringUtils.getBytesUsAscii(str)), charset);
 274  
     }
 275  
 
 276  
     /**
 277  
      * Decodes a URL safe string into its original form using the default string charset. Escaped characters are
 278  
      * converted back to their original representation.
 279  
      *
 280  
      * @param str
 281  
      *            URL safe string to convert into its original form
 282  
      * @return original string
 283  
      * @throws DecoderException
 284  
      *             Thrown if URL decoding is unsuccessful
 285  
      * @see #getDefaultCharset()
 286  
      */
 287  
     @Override
 288  
     public String decode(final String str) throws DecoderException {
 289  11
         if (str == null) {
 290  1
             return null;
 291  
         }
 292  
         try {
 293  10
             return decode(str, getDefaultCharset());
 294  1
         } catch (final UnsupportedEncodingException e) {
 295  1
             throw new DecoderException(e.getMessage(), e);
 296  
         }
 297  
     }
 298  
 
 299  
     /**
 300  
      * Encodes an object into its URL safe form. Unsafe characters are escaped.
 301  
      *
 302  
      * @param obj
 303  
      *            string to convert to a URL safe form
 304  
      * @return URL safe object
 305  
      * @throws EncoderException
 306  
      *             Thrown if URL encoding is not applicable to objects of this type or if encoding is unsuccessful
 307  
      */
 308  
     @Override
 309  
     public Object encode(final Object obj) throws EncoderException {
 310  4
         if (obj == null) {
 311  1
             return null;
 312  3
         } else if (obj instanceof byte[]) {
 313  1
             return encode((byte[])obj);
 314  2
         } else if (obj instanceof String) {
 315  1
             return encode((String)obj);
 316  
         } else {
 317  1
             throw new EncoderException("Objects of type " + obj.getClass().getName() + " cannot be URL encoded");
 318  
 
 319  
         }
 320  
     }
 321  
 
 322  
     /**
 323  
      * Decodes a URL safe object into its original form. Escaped characters are converted back to their original
 324  
      * representation.
 325  
      *
 326  
      * @param obj
 327  
      *            URL safe object to convert into its original form
 328  
      * @return original object
 329  
      * @throws DecoderException
 330  
      *             Thrown if the argument is not a <code>String</code> or <code>byte[]</code>. Thrown if a failure
 331  
      *             condition is encountered during the decode process.
 332  
      */
 333  
     @Override
 334  
     public Object decode(final Object obj) throws DecoderException {
 335  4
         if (obj == null) {
 336  1
             return null;
 337  3
         } else if (obj instanceof byte[]) {
 338  1
             return decode((byte[]) obj);
 339  2
         } else if (obj instanceof String) {
 340  1
             return decode((String) obj);
 341  
         } else {
 342  1
             throw new DecoderException("Objects of type " + obj.getClass().getName() + " cannot be URL decoded");
 343  
 
 344  
         }
 345  
     }
 346  
 
 347  
     /**
 348  
      * The default charset used for string decoding and encoding.
 349  
      *
 350  
      * @return the default string charset.
 351  
      */
 352  
     public String getDefaultCharset() {
 353  17
         return this.charset;
 354  
     }
 355  
 
 356  
     /**
 357  
      * The <code>String</code> encoding used for decoding and encoding.
 358  
      *
 359  
      * @return Returns the encoding.
 360  
      *
 361  
      * @deprecated Use {@link #getDefaultCharset()}, will be removed in 2.0.
 362  
      */
 363  
     @Deprecated
 364  
     public String getEncoding() {
 365  0
         return this.charset;
 366  
     }
 367  
 
 368  
 }