001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.util;
018    
019    import java.util.BitSet;
020    
021    /**
022     * Encoder for unsafe URI characters.
023     * <p/>
024     * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article.
025     */
026    public final class UnsafeUriCharactersEncoder {
027        private static BitSet unsafeCharactersRfc1738;
028        private static BitSet unsafeCharactersHttp;
029        private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
030                                                  'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'};
031    
032        static {
033            unsafeCharactersRfc1738 = new BitSet(256);
034            unsafeCharactersRfc1738.set(' ');
035            unsafeCharactersRfc1738.set('"');
036            unsafeCharactersRfc1738.set('<');
037            unsafeCharactersRfc1738.set('>');
038            unsafeCharactersRfc1738.set('#');
039            unsafeCharactersRfc1738.set('%');
040            unsafeCharactersRfc1738.set('{');
041            unsafeCharactersRfc1738.set('}');
042            unsafeCharactersRfc1738.set('|');
043            unsafeCharactersRfc1738.set('\\');
044            unsafeCharactersRfc1738.set('^');
045            unsafeCharactersRfc1738.set('~');
046            unsafeCharactersRfc1738.set('[');
047            unsafeCharactersRfc1738.set(']');
048            unsafeCharactersRfc1738.set('`');
049        }
050        
051        static {
052            unsafeCharactersHttp = new BitSet(256);
053            unsafeCharactersHttp.set(' ');
054            unsafeCharactersHttp.set('"');
055            unsafeCharactersHttp.set('<');
056            unsafeCharactersHttp.set('>');
057            unsafeCharactersHttp.set('#');
058            unsafeCharactersHttp.set('%');
059            unsafeCharactersHttp.set('{');
060            unsafeCharactersHttp.set('}');
061            unsafeCharactersHttp.set('|');
062            unsafeCharactersHttp.set('\\');
063            unsafeCharactersHttp.set('^');
064            unsafeCharactersHttp.set('~');
065            unsafeCharactersHttp.set('`');
066        }
067    
068        private UnsafeUriCharactersEncoder() {
069            // util class
070        }
071    
072        public static String encode(String s) {
073            return encode(s, unsafeCharactersRfc1738);
074        }
075        
076        public static String encodeHttpURI(String s) {
077            return encode(s, unsafeCharactersHttp);
078        }
079        
080        public static String encode(String s, BitSet unsafeCharacters) {
081            int n = s == null ? 0 : s.length();
082            if (n == 0) {
083                return s;
084            }
085    
086            // First check whether we actually need to encode
087            char chars[] = s.toCharArray();
088            for (int i = 0;;) {
089                // just deal with the ascii character
090                if (chars[i] > 0 && chars[i] < 128) {
091                    if (unsafeCharacters.get(chars[i])) {
092                        break;
093                    }
094                }
095                if (++i >= chars.length) {
096                    return s;
097                }
098            }
099    
100            // okay there are some unsafe characters so we do need to encode
101            // see details at: http://en.wikipedia.org/wiki/Url_encode
102            StringBuilder sb = new StringBuilder();
103            for (int i = 0; i < chars.length; i++) {
104                char ch = chars[i];
105                if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) {
106                    // special for % sign as it may be a decimal encoded value
107                    if (ch == '%') {
108                        char next = i + 1 < chars.length ? chars[i + 1] : ' ';
109                        char next2 = i + 2 < chars.length ? chars[i + 2] : ' ';
110    
111                        if (isHexDigit(next) && isHexDigit(next2)) {
112                            // its already encoded (decimal encoded) so just append as is
113                            sb.append(ch);
114                        } else {
115                            // must escape then, as its an unsafe character
116                            appendEscape(sb, (byte)ch);
117                        }
118                    } else {
119                        // must escape then, as its an unsafe character
120                        appendEscape(sb, (byte)ch);
121                    }
122                } else {
123                    sb.append(ch);
124                }
125            }
126            return sb.toString();
127        }
128    
129        private static void appendEscape(StringBuilder sb, byte b) {
130            sb.append('%');
131            sb.append(HEX_DIGITS[(b >> 4) & 0x0f]);
132            sb.append(HEX_DIGITS[(b >> 0) & 0x0f]);
133        }
134    
135        private static boolean isHexDigit(char ch) {
136            for (char hex : HEX_DIGITS) {
137                if (hex == ch) {
138                    return true;
139                }
140            }
141            return false;
142        }
143    
144    }