001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.language.simple;
018    
019    import java.util.List;
020    import java.util.concurrent.CopyOnWriteArrayList;
021    
022    import org.apache.camel.language.simple.types.SimpleToken;
023    import org.apache.camel.language.simple.types.SimpleTokenType;
024    import org.apache.camel.language.simple.types.TokenType;
025    import org.apache.camel.util.ObjectHelper;
026    
027    /**
028     * Tokenizer to create {@link SimpleToken} from the input.
029     */
030    public final class SimpleTokenizer {
031    
032        // use CopyOnWriteArrayList so we can modify it in the for loop when changing function start/end tokens
033        private static final List<SimpleTokenType> KNOWN_TOKENS = new CopyOnWriteArrayList<SimpleTokenType>();
034    
035        static {
036            // add known tokens
037            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, " "));
038            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\t"));
039            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\n"));
040            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.whiteSpace, "\r"));
041            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.singleQuote, "'"));
042            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.doubleQuote, "\""));
043            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "${"));
044            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionStart, "$simple{"));
045            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.functionEnd, "}"));
046            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "true"));
047            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.booleanValue, "false"));
048            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.nullValue, "null"));
049            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.escape, "\\"));
050    
051            // binary operators
052            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "=="));
053            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">="));
054            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<="));
055            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, ">"));
056            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "<"));
057            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "!="));
058            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not is"));
059            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "is"));
060            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not contains"));
061            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "contains"));
062            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not regex"));
063            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "regex"));
064            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not in"));
065            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "in"));
066            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "range"));
067            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.binaryOperator, "not range"));
068    
069            // unary operators
070            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "++"));
071            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.unaryOperator, "--"));
072    
073            // logical operators
074            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "&&"));
075            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "||"));
076            // TODO: @deprecated logical operators, to be removed in Camel 3.0
077            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "and"));
078            KNOWN_TOKENS.add(new SimpleTokenType(TokenType.logicalOperator, "or"));
079        }
080    
081        private SimpleTokenizer() {
082            // static methods
083        }
084    
085    
086        /**
087         * @see SimpleLanguage#changeFunctionStartToken(String...)
088         */
089        public static void changeFunctionStartToken(String... startToken) {
090            for (SimpleTokenType type : KNOWN_TOKENS) {
091                if (type.getType() == TokenType.functionStart) {
092                    KNOWN_TOKENS.remove(type);
093                }
094            }
095    
096            // add in start of list as its a more common token to be used
097            for (String token : startToken) {
098                KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionStart, token));
099            }
100        }
101    
102        /**
103         * @see SimpleLanguage#changeFunctionEndToken(String...)
104         */
105        public static void changeFunctionEndToken(String... endToken) {
106            for (SimpleTokenType type : KNOWN_TOKENS) {
107                if (type.getType() == TokenType.functionEnd) {
108                    KNOWN_TOKENS.remove(type);
109                }
110            }
111    
112            // add in start of list as its a more common token to be used
113            for (String token : endToken) {
114                KNOWN_TOKENS.add(0, new SimpleTokenType(TokenType.functionEnd, token));
115            }
116        }
117    
118        /**
119         * Create the next token
120         *
121         * @param expression  the input expression
122         * @param index       the current index
123         * @param allowEscape whether to allow escapes
124         * @param filter      defines the accepted token types to be returned (character is always used as fallback)
125         * @return the created token, will always return a token
126         */
127        public static SimpleToken nextToken(String expression, int index, boolean allowEscape, TokenType... filter) {
128            return doNextToken(expression, index, allowEscape, filter);
129        }
130    
131        /**
132         * Create the next token
133         *
134         * @param expression  the input expression
135         * @param index       the current index
136         * @param allowEscape whether to allow escapes
137         * @return the created token, will always return a token
138         */
139        public static SimpleToken nextToken(String expression, int index, boolean allowEscape) {
140            return doNextToken(expression, index, allowEscape);
141        }
142    
143        private static SimpleToken doNextToken(String expression, int index, boolean allowEscape, TokenType... filters) {
144    
145            boolean numericAllowed = acceptType(TokenType.numericValue, filters);
146            if (numericAllowed) {
147                // is it a numeric value
148                StringBuilder sb = new StringBuilder();
149                boolean digit = true;
150                while (digit && index < expression.length()) {
151                    digit = Character.isDigit(expression.charAt(index));
152                    if (digit) {
153                        char ch = expression.charAt(index);
154                        sb.append(ch);
155                        index++;
156                        continue;
157                    }
158                    // is it a dot or comma as part of a floating point number
159                    boolean decimalSeparator = '.' == expression.charAt(index) || ',' == expression.charAt(index);
160                    if (decimalSeparator && sb.length() > 0) {
161                        char ch = expression.charAt(index);
162                        sb.append(ch);
163                        index++;
164                        // assume its still a digit
165                        digit = true;
166                        continue;
167                    }
168                }
169                if (sb.length() > 0) {
170                    return new SimpleToken(new SimpleTokenType(TokenType.numericValue, sb.toString()), index);
171                }
172            }
173    
174            boolean escapeAllowed = allowEscape && acceptType(TokenType.escape, filters);
175            if (escapeAllowed) {
176                StringBuilder sb = new StringBuilder();
177                char ch = expression.charAt(index);
178                boolean escaped = '\\' == ch;
179                if (escaped && index < expression.length() - 1) {
180                    // grab next character to escape
181                    char next = expression.charAt(++index);
182                    // special for new line, tabs and carriage return
183                    boolean special = false;
184                    if ('n' == next) {
185                        sb.append("\n");
186                        special = true;
187                    } else if ('t' == next) {
188                        sb.append("\t");
189                        special = true;
190                    } else if ('r' == next) {
191                        sb.append("\r");
192                        special = true;
193                    } else {
194                        // not special just a regular character
195                        sb.append(ch);
196                    }
197    
198                    // force 2 as length if special
199                    return new SimpleToken(new SimpleTokenType(TokenType.character, sb.toString()), index, special ? 2 : 1);
200                }
201            }
202    
203            // it could be any of the known tokens
204            String text = expression.substring(index);
205            for (SimpleTokenType token : KNOWN_TOKENS) {
206                if (acceptType(token.getType(), filters)) {
207                    if (acceptToken(token, text, expression, index)) {
208                        return new SimpleToken(token, index);
209                    }
210                }
211            }
212    
213            // fallback and create a character token
214            char ch = expression.charAt(index);
215            SimpleToken token = new SimpleToken(new SimpleTokenType(TokenType.character, "" + ch), index);
216            return token;
217        }
218    
219        private static boolean acceptType(TokenType type, TokenType... filters) {
220            if (filters == null || filters.length == 0) {
221                return true;
222            }
223            for (TokenType filter : filters) {
224                if (type == filter) {
225                    return true;
226                }
227            }
228            return false;
229        }
230    
231        private static boolean acceptToken(SimpleTokenType token, String text, String expression, int index) {
232            if (token.isUnary() && text.startsWith(token.getValue())) {
233                SimpleTokenType functionEndToken = getFunctionEndToken();
234                if (functionEndToken != null) {
235                    int endLen = functionEndToken.getValue().length();
236    
237                    // special check for unary as the previous must be a function end, and the next a whitespace
238                    // to ensure unary operators is only applied on functions as intended
239                    int len = token.getValue().length();
240    
241                    String previous = "";
242                    if (index - endLen >= 0) {
243                        previous = expression.substring(index - endLen, index);
244                    }
245                    String after = text.substring(len);
246                    boolean whiteSpace = ObjectHelper.isEmpty(after) || after.startsWith(" ");
247                    boolean functionEnd = previous.equals(functionEndToken.getValue());
248                    return functionEnd && whiteSpace;
249                }
250            }
251    
252            return text.startsWith(token.getValue());
253        }
254    
255        private static SimpleTokenType getFunctionEndToken() {
256            for (SimpleTokenType token : KNOWN_TOKENS) {
257                if (token.isFunctionEnd()) {
258                    return token;
259                }
260            }
261            return null;
262        }
263    
264    }