001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 * 
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 * 
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.lang3.text;
018
019import java.util.Arrays;
020
021import org.apache.commons.lang3.StringUtils;
022
023/**
024 * A matcher class that can be queried to determine if a character array
025 * portion matches.
026 * <p>
027 * This class comes complete with various factory methods.
028 * If these do not suffice, you can subclass and implement your own matcher.
029 *
030 * @since 2.2
031 */
032public abstract class StrMatcher {
033
034    /**
035     * Matches the comma character.
036     */
037    private static final StrMatcher COMMA_MATCHER = new CharMatcher(',');
038    /**
039     * Matches the tab character.
040     */
041    private static final StrMatcher TAB_MATCHER = new CharMatcher('\t');
042    /**
043     * Matches the space character.
044     */
045    private static final StrMatcher SPACE_MATCHER = new CharMatcher(' ');
046    /**
047     * Matches the same characters as StringTokenizer,
048     * namely space, tab, newline, formfeed.
049     */
050    private static final StrMatcher SPLIT_MATCHER = new CharSetMatcher(" \t\n\r\f".toCharArray());
051    /**
052     * Matches the String trim() whitespace characters.
053     */
054    private static final StrMatcher TRIM_MATCHER = new TrimMatcher();
055    /**
056     * Matches the double quote character.
057     */
058    private static final StrMatcher SINGLE_QUOTE_MATCHER = new CharMatcher('\'');
059    /**
060     * Matches the double quote character.
061     */
062    private static final StrMatcher DOUBLE_QUOTE_MATCHER = new CharMatcher('"');
063    /**
064     * Matches the single or double quote character.
065     */
066    private static final StrMatcher QUOTE_MATCHER = new CharSetMatcher("'\"".toCharArray());
067    /**
068     * Matches no characters.
069     */
070    private static final StrMatcher NONE_MATCHER = new NoMatcher();
071
072    // -----------------------------------------------------------------------
073
074    /**
075     * Returns a matcher which matches the comma character.
076     *
077     * @return a matcher for a comma
078     */
079    public static StrMatcher commaMatcher() {
080        return COMMA_MATCHER;
081    }
082
083    /**
084     * Returns a matcher which matches the tab character.
085     *
086     * @return a matcher for a tab
087     */
088    public static StrMatcher tabMatcher() {
089        return TAB_MATCHER;
090    }
091
092    /**
093     * Returns a matcher which matches the space character.
094     *
095     * @return a matcher for a space
096     */
097    public static StrMatcher spaceMatcher() {
098        return SPACE_MATCHER;
099    }
100
101    /**
102     * Matches the same characters as StringTokenizer,
103     * namely space, tab, newline and formfeed.
104     *
105     * @return the split matcher
106     */
107    public static StrMatcher splitMatcher() {
108        return SPLIT_MATCHER;
109    }
110
111    /**
112     * Matches the String trim() whitespace characters.
113     *
114     * @return the trim matcher
115     */
116    public static StrMatcher trimMatcher() {
117        return TRIM_MATCHER;
118    }
119
120    /**
121     * Returns a matcher which matches the single quote character.
122     *
123     * @return a matcher for a single quote
124     */
125    public static StrMatcher singleQuoteMatcher() {
126        return SINGLE_QUOTE_MATCHER;
127    }
128
129    /**
130     * Returns a matcher which matches the double quote character.
131     *
132     * @return a matcher for a double quote
133     */
134    public static StrMatcher doubleQuoteMatcher() {
135        return DOUBLE_QUOTE_MATCHER;
136    }
137
138    /**
139     * Returns a matcher which matches the single or double quote character.
140     *
141     * @return a matcher for a single or double quote
142     */
143    public static StrMatcher quoteMatcher() {
144        return QUOTE_MATCHER;
145    }
146
147    /**
148     * Matches no characters.
149     *
150     * @return a matcher that matches nothing
151     */
152    public static StrMatcher noneMatcher() {
153        return NONE_MATCHER;
154    }
155
156    /**
157     * Constructor that creates a matcher from a character.
158     *
159     * @param ch  the character to match, must not be null
160     * @return a new Matcher for the given char
161     */
162    public static StrMatcher charMatcher(final char ch) {
163        return new CharMatcher(ch);
164    }
165
166    /**
167     * Constructor that creates a matcher from a set of characters.
168     *
169     * @param chars  the characters to match, null or empty matches nothing
170     * @return a new matcher for the given char[]
171     */
172    public static StrMatcher charSetMatcher(final char... chars) {
173        if (chars == null || chars.length == 0) {
174            return NONE_MATCHER;
175        }
176        if (chars.length == 1) {
177            return new CharMatcher(chars[0]);
178        }
179        return new CharSetMatcher(chars);
180    }
181
182    /**
183     * Constructor that creates a matcher from a string representing a set of characters.
184     *
185     * @param chars  the characters to match, null or empty matches nothing
186     * @return a new Matcher for the given characters
187     */
188    public static StrMatcher charSetMatcher(final String chars) {
189        if (StringUtils.isEmpty(chars)) {
190            return NONE_MATCHER;
191        }
192        if (chars.length() == 1) {
193            return new CharMatcher(chars.charAt(0));
194        }
195        return new CharSetMatcher(chars.toCharArray());
196    }
197
198    /**
199     * Constructor that creates a matcher from a string.
200     *
201     * @param str  the string to match, null or empty matches nothing
202     * @return a new Matcher for the given String
203     */
204    public static StrMatcher stringMatcher(final String str) {
205        if (StringUtils.isEmpty(str)) {
206            return NONE_MATCHER;
207        }
208        return new StringMatcher(str);
209    }
210
211    //-----------------------------------------------------------------------
212    /**
213     * Constructor.
214     */
215    protected StrMatcher() {
216        super();
217    }
218
219    /**
220     * Returns the number of matching characters, zero for no match.
221     * <p>
222     * This method is called to check for a match.
223     * The parameter <code>pos</code> represents the current position to be
224     * checked in the string <code>buffer</code> (a character array which must
225     * not be changed).
226     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
227     * <p>
228     * The character array may be larger than the active area to be matched.
229     * Only values in the buffer between the specified indices may be accessed.
230     * <p>
231     * The matching code may check one character or many.
232     * It may check characters preceding <code>pos</code> as well as those
233     * after, so long as no checks exceed the bounds specified.
234     * <p>
235     * It must return zero for no match, or a positive number if a match was found.
236     * The number indicates the number of characters that matched.
237     *
238     * @param buffer  the text content to match against, do not change
239     * @param pos  the starting position for the match, valid for buffer
240     * @param bufferStart  the first active index in the buffer, valid for buffer
241     * @param bufferEnd  the end index (exclusive) of the active buffer, valid for buffer
242     * @return the number of matching characters, zero for no match
243     */
244    public abstract int isMatch(char[] buffer, int pos, int bufferStart, int bufferEnd);
245
246    /**
247     * Returns the number of matching characters, zero for no match.
248     * <p>
249     * This method is called to check for a match.
250     * The parameter <code>pos</code> represents the current position to be
251     * checked in the string <code>buffer</code> (a character array which must
252     * not be changed).
253     * The API guarantees that <code>pos</code> is a valid index for <code>buffer</code>.
254     * <p>
255     * The matching code may check one character or many.
256     * It may check characters preceding <code>pos</code> as well as those after.
257     * <p>
258     * It must return zero for no match, or a positive number if a match was found.
259     * The number indicates the number of characters that matched.
260     *
261     * @param buffer  the text content to match against, do not change
262     * @param pos  the starting position for the match, valid for buffer
263     * @return the number of matching characters, zero for no match
264     * @since 2.4
265     */
266    public int isMatch(final char[] buffer, final int pos) {
267        return isMatch(buffer, pos, 0, buffer.length);
268    }
269
270    //-----------------------------------------------------------------------
271    /**
272     * Class used to define a set of characters for matching purposes.
273     */
274    static final class CharSetMatcher extends StrMatcher {
275        /** The set of characters to match. */
276        private final char[] chars;
277
278        /**
279         * Constructor that creates a matcher from a character array.
280         *
281         * @param chars  the characters to match, must not be null
282         */
283        CharSetMatcher(final char chars[]) {
284            super();
285            this.chars = chars.clone();
286            Arrays.sort(this.chars);
287        }
288
289        /**
290         * Returns whether or not the given character matches.
291         *
292         * @param buffer  the text content to match against, do not change
293         * @param pos  the starting position for the match, valid for buffer
294         * @param bufferStart  the first active index in the buffer, valid for buffer
295         * @param bufferEnd  the end index of the active buffer, valid for buffer
296         * @return the number of matching characters, zero for no match
297         */
298        @Override
299        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
300            return Arrays.binarySearch(chars, buffer[pos]) >= 0 ? 1 : 0;
301        }
302    }
303
304    //-----------------------------------------------------------------------
305    /**
306     * Class used to define a character for matching purposes.
307     */
308    static final class CharMatcher extends StrMatcher {
309        /** The character to match. */
310        private final char ch;
311
312        /**
313         * Constructor that creates a matcher that matches a single character.
314         *
315         * @param ch  the character to match
316         */
317        CharMatcher(final char ch) {
318            super();
319            this.ch = ch;
320        }
321
322        /**
323         * Returns whether or not the given character matches.
324         *
325         * @param buffer  the text content to match against, do not change
326         * @param pos  the starting position for the match, valid for buffer
327         * @param bufferStart  the first active index in the buffer, valid for buffer
328         * @param bufferEnd  the end index of the active buffer, valid for buffer
329         * @return the number of matching characters, zero for no match
330         */
331        @Override
332        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
333            return ch == buffer[pos] ? 1 : 0;
334        }
335    }
336
337    //-----------------------------------------------------------------------
338    /**
339     * Class used to define a set of characters for matching purposes.
340     */
341    static final class StringMatcher extends StrMatcher {
342        /** The string to match, as a character array. */
343        private final char[] chars;
344
345        /**
346         * Constructor that creates a matcher from a String.
347         *
348         * @param str  the string to match, must not be null
349         */
350        StringMatcher(final String str) {
351            super();
352            chars = str.toCharArray();
353        }
354
355        /**
356         * Returns whether or not the given text matches the stored string.
357         *
358         * @param buffer  the text content to match against, do not change
359         * @param pos  the starting position for the match, valid for buffer
360         * @param bufferStart  the first active index in the buffer, valid for buffer
361         * @param bufferEnd  the end index of the active buffer, valid for buffer
362         * @return the number of matching characters, zero for no match
363         */
364        @Override
365        public int isMatch(final char[] buffer, int pos, final int bufferStart, final int bufferEnd) {
366            final int len = chars.length;
367            if (pos + len > bufferEnd) {
368                return 0;
369            }
370            for (int i = 0; i < chars.length; i++, pos++) {
371                if (chars[i] != buffer[pos]) {
372                    return 0;
373                }
374            }
375            return len;
376        }
377        
378        @Override
379        public String toString() {
380            return super.toString() + ' ' + Arrays.toString(chars);
381        }
382
383    }
384
385    //-----------------------------------------------------------------------
386    /**
387     * Class used to match no characters.
388     */
389    static final class NoMatcher extends StrMatcher {
390
391        /**
392         * Constructs a new instance of <code>NoMatcher</code>.
393         */
394        NoMatcher() {
395            super();
396        }
397
398        /**
399         * Always returns <code>false</code>.
400         *
401         * @param buffer  the text content to match against, do not change
402         * @param pos  the starting position for the match, valid for buffer
403         * @param bufferStart  the first active index in the buffer, valid for buffer
404         * @param bufferEnd  the end index of the active buffer, valid for buffer
405         * @return the number of matching characters, zero for no match
406         */
407        @Override
408        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
409            return 0;
410        }
411    }
412
413    //-----------------------------------------------------------------------
414    /**
415     * Class used to match whitespace as per trim().
416     */
417    static final class TrimMatcher extends StrMatcher {
418
419        /**
420         * Constructs a new instance of <code>TrimMatcher</code>.
421         */
422        TrimMatcher() {
423            super();
424        }
425
426        /**
427         * Returns whether or not the given character matches.
428         *
429         * @param buffer  the text content to match against, do not change
430         * @param pos  the starting position for the match, valid for buffer
431         * @param bufferStart  the first active index in the buffer, valid for buffer
432         * @param bufferEnd  the end index of the active buffer, valid for buffer
433         * @return the number of matching characters, zero for no match
434         */
435        @Override
436        public int isMatch(final char[] buffer, final int pos, final int bufferStart, final int bufferEnd) {
437            return buffer[pos] <= 32 ? 1 : 0;
438        }
439    }
440
441}