001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.util;
018    
019    import java.util.ArrayList;
020    import java.util.List;
021    import java.util.StringTokenizer;
022    
023    /**
024     * PathMatcher implementation for Ant-style path patterns. Examples are provided below.
025     * <p>
026     * Part of this mapping code has been kindly borrowed from <a href="http://ant.apache.org">Apache Ant</a>
027     * and <a href="http://springframework.org">Spring Framework</a>.
028     * <p>
029     * The mapping matches URLs using the following rules:<br>
030     * <ul>
031     *   <li>? matches one character</li>
032     *   <li>* matches zero or more characters</li>
033     *   <li>** matches zero or more 'directories' in a path</li>
034     * </ul>
035     * <p>
036     * Some examples:<br>
037     * <ul>
038     *   <li><code>com/t?st.jsp</code> - matches <code>com/test.jsp</code> but also
039     *       <code>com/tast.jsp</code> or <code>com/txst.jsp</code>
040     *   </li>
041     *   <li><code>com/*.jsp</code> - matches all <code>.jsp</code> files in the
042     *       <code>com</code> directory
043     *   </li>
044     *   <li><code>com/&#42;&#42;/test.jsp</code> - matches all <code>test.jsp</code>
045     *       files underneath the <code>com</code> path
046     *   </li>
047     *   <li><code>org/springframework/&#42;&#42;/*.jsp</code> - matches all
048     *       <code>.jsp</code> files underneath the <code>org/springframework</code> path
049     *   </li>
050     *   <li><code>org/&#42;&#42;/servlet/bla.jsp</code> - matches
051     *       <code>org/springframework/servlet/bla.jsp</code> but also
052     *       <code>org/springframework/testing/servlet/bla.jsp</code> and
053     *       <code>org/servlet/bla.jsp</code>
054     *   </li>
055     * </ul>
056     */
057    public class AntPathMatcher {
058    
059        /** Default path separator: "/" */
060        public static final String DEFAULT_PATH_SEPARATOR = "/";
061    
062        private String pathSeparator = DEFAULT_PATH_SEPARATOR;
063    
064        /**
065         * Set the path separator to use for pattern parsing. Default is "/", as in
066         * Ant.
067         */
068        public void setPathSeparator(String pathSeparator) {
069            this.pathSeparator = pathSeparator != null ? pathSeparator : DEFAULT_PATH_SEPARATOR;
070        }
071    
072        public boolean isPattern(String path) {
073            return path.indexOf('*') != -1 || path.indexOf('?') != -1;
074        }
075    
076        public boolean match(String pattern, String path) {
077            return match(pattern, path, true);
078        }
079    
080        public boolean matchStart(String pattern, String path) {
081            return matchStart(pattern, path, true);
082        }
083    
084        public boolean match(String pattern, String path, boolean isCaseSensitive) {
085            return doMatch(pattern, path, true, isCaseSensitive);
086        }
087    
088        public boolean matchStart(String pattern, String path, boolean isCaseSensitive) {
089            return doMatch(pattern, path, false, isCaseSensitive);
090        }
091    
092        /**
093         * Actually match the given <code>path</code> against the given
094         * <code>pattern</code>.
095         * 
096         * @param pattern the pattern to match against
097         * @param path the path String to test
098         * @param fullMatch whether a full pattern match is required (else a pattern
099         *            match as far as the given base path goes is sufficient)
100         * @param isCaseSensitive Whether or not matching should be performed
101         *                        case sensitively.
102         * @return <code>true</code> if the supplied <code>path</code> matched,
103         *         <code>false</code> if it didn't
104         */
105        protected boolean doMatch(String pattern, String path, boolean fullMatch, boolean isCaseSensitive) {
106            if (path.startsWith(this.pathSeparator) != pattern.startsWith(this.pathSeparator)) {
107                return false;
108            }
109    
110            String[] pattDirs = tokenizeToStringArray(pattern, this.pathSeparator);
111            String[] pathDirs = tokenizeToStringArray(path, this.pathSeparator);
112    
113            int pattIdxStart = 0;
114            int pattIdxEnd = pattDirs.length - 1;
115            int pathIdxStart = 0;
116            int pathIdxEnd = pathDirs.length - 1;
117    
118            // Match all elements up to the first **
119            while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
120                String patDir = pattDirs[pattIdxStart];
121                if ("**".equals(patDir)) {
122                    break;
123                }
124                if (!matchStrings(patDir, pathDirs[pathIdxStart], isCaseSensitive)) {
125                    return false;
126                }
127                pattIdxStart++;
128                pathIdxStart++;
129            }
130    
131            if (pathIdxStart > pathIdxEnd) {
132                // Path is exhausted, only match if rest of pattern is * or **'s
133                if (pattIdxStart > pattIdxEnd) {
134                    return pattern.endsWith(this.pathSeparator) ? path.endsWith(this.pathSeparator) : !path
135                        .endsWith(this.pathSeparator);
136                }
137                if (!fullMatch) {
138                    return true;
139                }
140                if (pattIdxStart == pattIdxEnd && pattDirs[pattIdxStart].equals("*")
141                    && path.endsWith(this.pathSeparator)) {
142                    return true;
143                }
144                for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
145                    if (!pattDirs[i].equals("**")) {
146                        return false;
147                    }
148                }
149                return true;
150            } else if (pattIdxStart > pattIdxEnd) {
151                // String not exhausted, but pattern is. Failure.
152                return false;
153            } else if (!fullMatch && "**".equals(pattDirs[pattIdxStart])) {
154                // Path start definitely matches due to "**" part in pattern.
155                return true;
156            }
157    
158            // up to last '**'
159            while (pattIdxStart <= pattIdxEnd && pathIdxStart <= pathIdxEnd) {
160                String patDir = pattDirs[pattIdxEnd];
161                if (patDir.equals("**")) {
162                    break;
163                }
164                if (!matchStrings(patDir, pathDirs[pathIdxEnd], isCaseSensitive)) {
165                    return false;
166                }
167                pattIdxEnd--;
168                pathIdxEnd--;
169            }
170            if (pathIdxStart > pathIdxEnd) {
171                // String is exhausted
172                for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
173                    if (!pattDirs[i].equals("**")) {
174                        return false;
175                    }
176                }
177                return true;
178            }
179    
180            while (pattIdxStart != pattIdxEnd && pathIdxStart <= pathIdxEnd) {
181                int patIdxTmp = -1;
182                for (int i = pattIdxStart + 1; i <= pattIdxEnd; i++) {
183                    if (pattDirs[i].equals("**")) {
184                        patIdxTmp = i;
185                        break;
186                    }
187                }
188                if (patIdxTmp == pattIdxStart + 1) {
189                    // '**/**' situation, so skip one
190                    pattIdxStart++;
191                    continue;
192                }
193                // Find the pattern between padIdxStart & padIdxTmp in str between
194                // strIdxStart & strIdxEnd
195                int patLength = patIdxTmp - pattIdxStart - 1;
196                int strLength = pathIdxEnd - pathIdxStart + 1;
197                int foundIdx = -1;
198    
199            strLoop:
200                for (int i = 0; i <= strLength - patLength; i++) {
201                    for (int j = 0; j < patLength; j++) {
202                        String subPat = pattDirs[pattIdxStart + j + 1];
203                        String subStr = pathDirs[pathIdxStart + i + j];
204                        if (!matchStrings(subPat, subStr, isCaseSensitive)) {
205                            continue strLoop;
206                        }
207                    }
208                    foundIdx = pathIdxStart + i;
209                    break;
210                }
211    
212                if (foundIdx == -1) {
213                    return false;
214                }
215    
216                pattIdxStart = patIdxTmp;
217                pathIdxStart = foundIdx + patLength;
218            }
219    
220            for (int i = pattIdxStart; i <= pattIdxEnd; i++) {
221                if (!pattDirs[i].equals("**")) {
222                    return false;
223                }
224            }
225    
226            return true;
227        }
228    
229        /**
230         * Tests whether or not a string matches against a pattern. The pattern may
231         * contain two special characters:<br>
232         * '*' means zero or more characters<br>
233         * '?' means one and only one character
234         * 
235         * @param pattern pattern to match against. Must not be <code>null</code>.
236         * @param str string which must be matched against the pattern. Must not be
237         *            <code>null</code>.
238         * @param caseSensitive Whether or not matching should be performed
239         *                      case sensitively.
240         * @return <code>true</code> if the string matches against the pattern, or
241         *         <code>false</code> otherwise.
242         */
243        private boolean matchStrings(String pattern, String str, boolean caseSensitive) {
244            char[] patArr = pattern.toCharArray();
245            char[] strArr = str.toCharArray();
246            int patIdxStart = 0;
247            int patIdxEnd = patArr.length - 1;
248            int strIdxStart = 0;
249            int strIdxEnd = strArr.length - 1;
250            char ch;
251    
252            boolean containsStar = false;
253            for (char c : patArr) {
254                if (c == '*') {
255                    containsStar = true;
256                    break;
257                }
258            }
259    
260            if (!containsStar) {
261                // No '*'s, so we make a shortcut
262                if (patIdxEnd != strIdxEnd) {
263                    return false; // Pattern and string do not have the same size
264                }
265                for (int i = 0; i <= patIdxEnd; i++) {
266                    ch = patArr[i];
267                    if (ch != '?') {
268                        if (different(caseSensitive, ch, strArr[i])) {
269                            return false;
270                            // Character mismatch
271                        }
272                    }
273                }
274                return true; // String matches against pattern
275            }
276    
277            if (patIdxEnd == 0) {
278                return true; // Pattern contains only '*', which matches anything
279            }
280    
281            // Process characters before first star
282            while ((ch = patArr[patIdxStart]) != '*' && strIdxStart <= strIdxEnd) {
283                if (ch != '?') {
284                    if (different(caseSensitive, ch, strArr[strIdxStart])) {
285                        return false;
286                        // Character mismatch
287                    }
288                }
289                patIdxStart++;
290                strIdxStart++;
291            }
292            if (strIdxStart > strIdxEnd) {
293                // All characters in the string are used. Check if only '*'s are
294                // left in the pattern. If so, we succeeded. Otherwise failure.
295                for (int i = patIdxStart; i <= patIdxEnd; i++) {
296                    if (patArr[i] != '*') {
297                        return false;
298                    }
299                }
300                return true;
301            }
302    
303            // Process characters after last star
304            while ((ch = patArr[patIdxEnd]) != '*' && strIdxStart <= strIdxEnd) {
305                if (ch != '?') {
306                    if (different(caseSensitive, ch, strArr[strIdxEnd])) {
307                        return false;
308                        // Character mismatch
309                    }
310                }
311                patIdxEnd--;
312                strIdxEnd--;
313            }
314            if (strIdxStart > strIdxEnd) {
315                // All characters in the string are used. Check if only '*'s are
316                // left in the pattern. If so, we succeeded. Otherwise failure.
317                for (int i = patIdxStart; i <= patIdxEnd; i++) {
318                    if (patArr[i] != '*') {
319                        return false;
320                    }
321                }
322                return true;
323            }
324    
325            // process pattern between stars. padIdxStart and patIdxEnd point
326            // always to a '*'.
327            while (patIdxStart != patIdxEnd && strIdxStart <= strIdxEnd) {
328                int patIdxTmp = -1;
329                for (int i = patIdxStart + 1; i <= patIdxEnd; i++) {
330                    if (patArr[i] == '*') {
331                        patIdxTmp = i;
332                        break;
333                    }
334                }
335                if (patIdxTmp == patIdxStart + 1) {
336                    // Two stars next to each other, skip the first one.
337                    patIdxStart++;
338                    continue;
339                }
340                // Find the pattern between padIdxStart & padIdxTmp in str between
341                // strIdxStart & strIdxEnd
342                int patLength = patIdxTmp - patIdxStart - 1;
343                int strLength = strIdxEnd - strIdxStart + 1;
344                int foundIdx = -1;
345            strLoop: 
346                for (int i = 0; i <= strLength - patLength; i++) {
347                    for (int j = 0; j < patLength; j++) {
348                        ch = patArr[patIdxStart + j + 1];
349                        if (ch != '?') {
350                            if (different(caseSensitive, ch, strArr[strIdxStart + i + j])) {
351                                continue strLoop;
352                            }
353                        }
354                    }
355    
356                    foundIdx = strIdxStart + i;
357                    break;
358                }
359    
360                if (foundIdx == -1) {
361                    return false;
362                }
363    
364                patIdxStart = patIdxTmp;
365                strIdxStart = foundIdx + patLength;
366            }
367    
368            // All characters in the string are used. Check if only '*'s are left
369            // in the pattern. If so, we succeeded. Otherwise failure.
370            for (int i = patIdxStart; i <= patIdxEnd; i++) {
371                if (patArr[i] != '*') {
372                    return false;
373                }
374            }
375    
376            return true;
377        }
378    
379        /**
380         * Given a pattern and a full path, determine the pattern-mapped part.
381         * <p>
382         * For example:
383         * <ul>
384         * <li>'<code>/docs/cvs/commit.html</code>' and '
385         * <code>/docs/cvs/commit.html</code> -> ''</li>
386         * <li>'<code>/docs/*</code>' and '<code>/docs/cvs/commit</code> -> '
387         * <code>cvs/commit</code>'</li>
388         * <li>'<code>/docs/cvs/*.html</code>' and '
389         * <code>/docs/cvs/commit.html</code> -> '<code>commit.html</code>'</li>
390         * <li>'<code>/docs/**</code>' and '<code>/docs/cvs/commit</code> -> '
391         * <code>cvs/commit</code>'</li>
392         * <li>'<code>/docs/**\/*.html</code>' and '
393         * <code>/docs/cvs/commit.html</code> -> '<code>cvs/commit.html</code>'</li>
394         * <li>'<code>/*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
395         * <code>docs/cvs/commit.html</code>'</li>
396         * <li>'<code>*.html</code>' and '<code>/docs/cvs/commit.html</code> -> '
397         * <code>/docs/cvs/commit.html</code>'</li>
398         * <li>'<code>*</code>' and '<code>/docs/cvs/commit.html</code> -> '
399         * <code>/docs/cvs/commit.html</code>'</li>
400         * </ul>
401         * <p>
402         * Assumes that {@link #match} returns <code>true</code> for '
403         * <code>pattern</code>' and '<code>path</code>', but does
404         * <strong>not</strong> enforce this.
405         */
406        public String extractPathWithinPattern(String pattern, String path) {
407            String[] patternParts = tokenizeToStringArray(pattern, this.pathSeparator);
408            String[] pathParts = tokenizeToStringArray(path, this.pathSeparator);
409    
410            StringBuilder buffer = new StringBuilder();
411    
412            // Add any path parts that have a wildcarded pattern part.
413            int puts = 0;
414            for (int i = 0; i < patternParts.length; i++) {
415                String patternPart = patternParts[i];
416                if ((patternPart.indexOf('*') > -1 || patternPart.indexOf('?') > -1) && pathParts.length >= i + 1) {
417                    if (puts > 0 || (i == 0 && !pattern.startsWith(this.pathSeparator))) {
418                        buffer.append(this.pathSeparator);
419                    }
420                    buffer.append(pathParts[i]);
421                    puts++;
422                }
423            }
424    
425            // Append any trailing path parts.
426            for (int i = patternParts.length; i < pathParts.length; i++) {
427                if (puts > 0 || i > 0) {
428                    buffer.append(this.pathSeparator);
429                }
430                buffer.append(pathParts[i]);
431            }
432    
433            return buffer.toString();
434        }
435    
436        /**
437         * Tokenize the given String into a String array via a StringTokenizer.
438         * Trims tokens and omits empty tokens.
439         * <p>
440         * The given delimiters string is supposed to consist of any number of
441         * delimiter characters. Each of those characters can be used to separate
442         * tokens. A delimiter is always a single character; for multi-character
443         * delimiters, consider using <code>delimitedListToStringArray</code>
444         * 
445         * @param str the String to tokenize
446         * @param delimiters the delimiter characters, assembled as String (each of
447         *            those characters is individually considered as delimiter).
448         * @return an array of the tokens
449         * @see java.util.StringTokenizer
450         * @see java.lang.String#trim()
451         */
452        public static String[] tokenizeToStringArray(String str, String delimiters) {
453            if (str == null) {
454                return null;
455            }
456            StringTokenizer st = new StringTokenizer(str, delimiters);
457            List<String> tokens = new ArrayList<String>();
458            while (st.hasMoreTokens()) {
459                String token = st.nextToken();
460                token = token.trim();
461                if (token.length() > 0) {
462                    tokens.add(token);
463                }
464            }
465            return tokens.toArray(new String[tokens.size()]);
466        }
467    
468        private static boolean different(boolean caseSensitive, char ch, char other) {
469            return caseSensitive ? ch != other : Character.toUpperCase(ch) != Character.toUpperCase(other);
470        }
471    }