001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.camel.util; 018 019 import java.util.BitSet; 020 021 /** 022 * Encoder for unsafe URI characters. 023 * <p/> 024 * A good source for details is <a href="http://en.wikipedia.org/wiki/Url_encode">wikipedia url encode</a> article. 025 */ 026 public final class UnsafeUriCharactersEncoder { 027 private static BitSet unsafeCharactersRfc1738; 028 private static BitSet unsafeCharactersHttp; 029 private static final char[] HEX_DIGITS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 030 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'}; 031 032 static { 033 unsafeCharactersRfc1738 = new BitSet(256); 034 unsafeCharactersRfc1738.set(' '); 035 unsafeCharactersRfc1738.set('"'); 036 unsafeCharactersRfc1738.set('<'); 037 unsafeCharactersRfc1738.set('>'); 038 unsafeCharactersRfc1738.set('#'); 039 unsafeCharactersRfc1738.set('%'); 040 unsafeCharactersRfc1738.set('{'); 041 unsafeCharactersRfc1738.set('}'); 042 unsafeCharactersRfc1738.set('|'); 043 unsafeCharactersRfc1738.set('\\'); 044 unsafeCharactersRfc1738.set('^'); 045 unsafeCharactersRfc1738.set('~'); 046 unsafeCharactersRfc1738.set('['); 047 unsafeCharactersRfc1738.set(']'); 048 unsafeCharactersRfc1738.set('`'); 049 } 050 051 static { 052 unsafeCharactersHttp = new BitSet(256); 053 unsafeCharactersHttp.set(' '); 054 unsafeCharactersHttp.set('"'); 055 unsafeCharactersHttp.set('<'); 056 unsafeCharactersHttp.set('>'); 057 unsafeCharactersHttp.set('#'); 058 unsafeCharactersHttp.set('%'); 059 unsafeCharactersHttp.set('{'); 060 unsafeCharactersHttp.set('}'); 061 unsafeCharactersHttp.set('|'); 062 unsafeCharactersHttp.set('\\'); 063 unsafeCharactersHttp.set('^'); 064 unsafeCharactersHttp.set('~'); 065 unsafeCharactersHttp.set('`'); 066 } 067 068 private UnsafeUriCharactersEncoder() { 069 // util class 070 } 071 072 public static String encode(String s) { 073 return encode(s, unsafeCharactersRfc1738); 074 } 075 076 public static String encodeHttpURI(String s) { 077 return encode(s, unsafeCharactersHttp); 078 } 079 080 public static String encode(String s, BitSet unsafeCharacters) { 081 int n = s == null ? 0 : s.length(); 082 if (n == 0) { 083 return s; 084 } 085 086 // First check whether we actually need to encode 087 char chars[] = s.toCharArray(); 088 for (int i = 0;;) { 089 // just deal with the ascii character 090 if (chars[i] > 0 && chars[i] < 128) { 091 if (unsafeCharacters.get(chars[i])) { 092 break; 093 } 094 } 095 if (++i >= chars.length) { 096 return s; 097 } 098 } 099 100 // okay there are some unsafe characters so we do need to encode 101 // see details at: http://en.wikipedia.org/wiki/Url_encode 102 StringBuilder sb = new StringBuilder(); 103 for (int i = 0; i < chars.length; i++) { 104 char ch = chars[i]; 105 if (ch > 0 && ch < 128 && unsafeCharacters.get(ch)) { 106 // special for % sign as it may be a decimal encoded value 107 if (ch == '%') { 108 char next = i + 1 < chars.length ? chars[i + 1] : ' '; 109 char next2 = i + 2 < chars.length ? chars[i + 2] : ' '; 110 111 if (isHexDigit(next) && isHexDigit(next2)) { 112 // its already encoded (decimal encoded) so just append as is 113 sb.append(ch); 114 } else { 115 // must escape then, as its an unsafe character 116 appendEscape(sb, (byte)ch); 117 } 118 } else { 119 // must escape then, as its an unsafe character 120 appendEscape(sb, (byte)ch); 121 } 122 } else { 123 sb.append(ch); 124 } 125 } 126 return sb.toString(); 127 } 128 129 private static void appendEscape(StringBuilder sb, byte b) { 130 sb.append('%'); 131 sb.append(HEX_DIGITS[(b >> 4) & 0x0f]); 132 sb.append(HEX_DIGITS[(b >> 0) & 0x0f]); 133 } 134 135 private static boolean isHexDigit(char ch) { 136 for (char hex : HEX_DIGITS) { 137 if (hex == ch) { 138 return true; 139 } 140 } 141 return false; 142 } 143 144 }