001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.lang3.text;
018    
019    import java.util.Arrays;
020    
021    /**
022     * A matcher class that can be queried to determine if a character array
023     * portion matches.
024     * <p>
025     * This class comes complete with various factory methods.
026     * If these do not suffice, you can subclass and implement your own matcher.
027     *
028     * @author Apache Software Foundation
029     * @since 2.2
030     * @version $Id: StrMatcher.java 889215 2009-12-10 11:56:38Z bayard $
031     */
032    public abstract class StrMatcher {
033    
034        /**
035         * Matches the comma character.
036         */
037        private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
038        /**
039         * Matches the tab character.
040         */
041        private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
042        /**
043         * Matches the space character.
044         */
045        private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
046        /**
047         * Matches the same characters as StringTokenizer,
048         * namely space, tab, newline, formfeed.
049         */
050        private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
051        /**
052         * Matches the String trim() whitespace characters.
053         */
054        private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
055        /**
056         * Matches the double quote character.
057         */
058        private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
059        /**
060         * Matches the double quote character.
061         */
062        private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
063        /**
064         * Matches the single or double quote character.
065         */
066        private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
067        /**
068         * Matches no characters.
069         */
070        private static final StrMatcher NONE_MATCHER = new NoMatcher();
071    
072        // -----------------------------------------------------------------------
073    
074        /**
075         * Returns a matcher which matches the comma character.
076         *
077         * @return a matcher for a comma
078         */
079        public static StrMatcher commaMatcher() {
080            return COMMA_MATCHER;
081        }
082    
083        /**
084         * Returns a matcher which matches the tab character.
085         *
086         * @return a matcher for a tab
087         */
088        public static StrMatcher tabMatcher() {
089            return TAB_MATCHER;
090        }
091    
092        /**
093         * Returns a matcher which matches the space character.
094         *
095         * @return a matcher for a space
096         */
097        public static StrMatcher spaceMatcher() {
098            return SPACE_MATCHER;
099        }
100    
101        /**
102         * Matches the same characters as StringTokenizer,
103         * namely space, tab, newline and formfeed.
104         *
105         * @return the split matcher
106         */
107        public static StrMatcher splitMatcher() {
108            return SPLIT_MATCHER;
109        }
110    
111        /**
112         * Matches the String trim() whitespace characters.
113         *
114         * @return the trim matcher
115         */
116        public static StrMatcher trimMatcher() {
117            return TRIM_MATCHER;
118        }
119    
120        /**
121         * Returns a matcher which matches the single quote character.
122         *
123         * @return a matcher for a single quote
124         */
125        public static StrMatcher singleQuoteMatcher() {
126            return SINGLE_QUOTE_MATCHER;
127        }
128    
129        /**
130         * Returns a matcher which matches the double quote character.
131         *
132         * @return a matcher for a double quote
133         */
134        public static StrMatcher doubleQuoteMatcher() {
135            return DOUBLE_QUOTE_MATCHER;
136        }
137    
138        /**
139         * Returns a matcher which matches the single or double quote character.
140         *
141         * @return a matcher for a single or double quote
142         */
143        public static StrMatcher quoteMatcher() {
144            return QUOTE_MATCHER;
145        }
146    
147        /**
148         * Matches no characters.
149         *
150         * @return a matcher that matches nothing
151         */
152        public static StrMatcher noneMatcher() {
153            return NONE_MATCHER;
154        }
155    
156        /**
157         * Constructor that creates a matcher from a character.
158         *
159         * @param ch  the character to match, must not be null
160         * @return a new Matcher for the given char
161         */
162        public static StrMatcher charMatcher(char ch) {
163            return new CharMatcher(ch);
164        }
165    
166        /**
167         * Constructor that creates a matcher from a set of characters.
168         *
169         * @param chars  the characters to match, null or empty matches nothing
170         * @return a new matcher for the given char[]
171         */
172        public static StrMatcher charSetMatcher(char[] chars) {
173            if (chars == null || chars.length == 0) {
174                return NONE_MATCHER;
175            }
176            if (chars.length == 1) {
177                return new CharMatcher(chars[0]);
178            }
179            return new CharSetMatcher(chars);
180        }
181    
182        /**
183         * Constructor that creates a matcher from a string representing a set of characters.
184         *
185         * @param chars  the characters to match, null or empty matches nothing
186         * @return a new Matcher for the given characters
187         */
188        public static StrMatcher charSetMatcher(String chars) {
189            if (chars == null || chars.length() == 0) {
190                return NONE_MATCHER;
191            }
192            if (chars.length() == 1) {
193                return new CharMatcher(chars.charAt(0));
194            }
195            return new CharSetMatcher(chars.toCharArray());
196        }
197    
198        /**
199         * Constructor that creates a matcher from a string.
200         *
201         * @param str  the string to match, null or empty matches nothing
202         * @return a new Matcher for the given String
203         */
204        public static StrMatcher stringMatcher(String str) {
205            if (str == null || str.length() == 0) {
206                return NONE_MATCHER;
207            }
208            return new StringMatcher(str);
209        }
210    
211        //-----------------------------------------------------------------------
212        /**
213         * Constructor.
214         */
215        protected StrMatcher() {
216            super();
217        }
218    
219        /**
220         * Returns the number of matching characters, zero for no match.
221         * <p>
222         * This method is called to check for a match.
223         * The parameter <code>pos</code> represents the current position to be
224         * checked in the string <code>buffer</code> (a character array which must
225         * not be changed).
226         * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
227         * <p>
228         * The character array may be larger than the active area to be matched.
229         * Only values in the buffer between the specifed indices may be accessed.
230         * <p>
231         * The matching code may check one character or many.
232         * It may check characters preceeding <code>pos</code> as well as those
233         * after, so long as no checks exceed the bounds specified.
234         * <p>
235         * It must return zero for no match, or a positive number if a match was found.
236         * The number indicates the number of characters that matched.
237         *
238         * @param buffer  the text content to match against, do not change
239         * @param pos  the starting position for the match, valid for buffer
240         * @param bufferStart  the first active index in the buffer, valid for buffer
241         * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
242         * @return the number of matching characters, zero for no match
243         */
244        public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
245    
246        /**
247         * Returns the number of matching characters, zero for no match.
248         * <p>
249         * This method is called to check for a match.
250         * The parameter <code>pos</code> represents the current position to be
251         * checked in the string <code>buffer</code> (a character array which must
252         * not be changed).
253         * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
254         * <p>
255         * The matching code may check one character or many.
256         * It may check characters preceeding <code>pos</code> as well as those after.
257         * <p>
258         * It must return zero for no match, or a positive number if a match was found.
259         * The number indicates the number of characters that matched.
260         *
261         * @param buffer  the text content to match against, do not change
262         * @param pos  the starting position for the match, valid for buffer
263         * @return the number of matching characters, zero for no match
264         * @since 2.4
265         */
266        public int isMatch(char[] buffer, int pos) {
267            return isMatch(buffer, pos, 0, buffer.length);
268        }
269    
270        //-----------------------------------------------------------------------
271        /**
272         * Class used to define a set of characters for matching purposes.
273         */
274        static final class CharSetMatcher extends StrMatcher {
275            /** The set of characters to match. */
276            private final char[] chars;
277    
278            /**
279             * Constructor that creates a matcher from a character array.
280             *
281             * @param chars  the characters to match, must not be null
282             */
283            CharSetMatcher(char chars[]) {
284                super();
285                this.chars = chars.clone();
286                Arrays.sort(this.chars);
287            }
288    
289            /**
290             * Returns whether or not the given character matches.
291             *
292             * @param buffer  the text content to match against, do not change
293             * @param pos  the starting position for the match, valid for buffer
294             * @param bufferStart  the first active index in the buffer, valid for buffer
295             * @param bufferEnd  the end index of the active buffer, valid for buffer
296             * @return the number of matching characters, zero for no match
297             */
298            @Override
299            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
300                return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
301            }
302        }
303    
304        //-----------------------------------------------------------------------
305        /**
306         * Class used to define a character for matching purposes.
307         */
308        static final class CharMatcher extends StrMatcher {
309            /** The character to match. */
310            private final char ch;
311    
312            /**
313             * Constructor that creates a matcher that matches a single character.
314             *
315             * @param ch  the character to match
316             */
317            CharMatcher(char ch) {
318                super();
319                this.ch = ch;
320            }
321    
322            /**
323             * Returns whether or not the given character matches.
324             *
325             * @param buffer  the text content to match against, do not change
326             * @param pos  the starting position for the match, valid for buffer
327             * @param bufferStart  the first active index in the buffer, valid for buffer
328             * @param bufferEnd  the end index of the active buffer, valid for buffer
329             * @return the number of matching characters, zero for no match
330             */
331            @Override
332            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
333                return ch == buffer[pos] ? 1 : 0;
334            }
335        }
336    
337        //-----------------------------------------------------------------------
338        /**
339         * Class used to define a set of characters for matching purposes.
340         */
341        static final class StringMatcher extends StrMatcher {
342            /** The string to match, as a character array. */
343            private final char[] chars;
344    
345            /**
346             * Constructor that creates a matcher from a String.
347             *
348             * @param str  the string to match, must not be null
349             */
350            StringMatcher(String str) {
351                super();
352                chars = str.toCharArray();
353            }
354    
355            /**
356             * Returns whether or not the given text matches the stored string.
357             *
358             * @param buffer  the text content to match against, do not change
359             * @param pos  the starting position for the match, valid for buffer
360             * @param bufferStart  the first active index in the buffer, valid for buffer
361             * @param bufferEnd  the end index of the active buffer, valid for buffer
362             * @return the number of matching characters, zero for no match
363             */
364            @Override
365            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
366                int len = chars.length;
367                if (pos + len > bufferEnd) {
368                    return 0;
369                }
370                for (int i = 0; i < chars.length; i++, pos++) {
371                    if (chars[i] != buffer[pos]) {
372                        return 0;
373                    }
374                }
375                return len;
376            }
377        }
378    
379        //-----------------------------------------------------------------------
380        /**
381         * Class used to match no characters.
382         */
383        static final class NoMatcher extends StrMatcher {
384    
385            /**
386             * Constructs a new instance of <code>NoMatcher</code>.
387             */
388            NoMatcher() {
389                super();
390            }
391    
392            /**
393             * Always returns <code>false</code>.
394             *
395             * @param buffer  the text content to match against, do not change
396             * @param pos  the starting position for the match, valid for buffer
397             * @param bufferStart  the first active index in the buffer, valid for buffer
398             * @param bufferEnd  the end index of the active buffer, valid for buffer
399             * @return the number of matching characters, zero for no match
400             */
401            @Override
402            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
403                return 0;
404            }
405        }
406    
407        //-----------------------------------------------------------------------
408        /**
409         * Class used to match whitespace as per trim().
410         */
411        static final class TrimMatcher extends StrMatcher {
412    
413            /**
414             * Constructs a new instance of <code>TrimMatcher</code>.
415             */
416            TrimMatcher() {
417                super();
418            }
419    
420            /**
421             * Returns whether or not the given character matches.
422             *
423             * @param buffer  the text content to match against, do not change
424             * @param pos  the starting position for the match, valid for buffer
425             * @param bufferStart  the first active index in the buffer, valid for buffer
426             * @param bufferEnd  the end index of the active buffer, valid for buffer
427             * @return the number of matching characters, zero for no match
428             */
429            @Override
430            public int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd) {
431                return buffer[pos] <= 32 ? 1 : 0;
432            }
433        }
434    
435    }