001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.language.simple;
018    
019    import java.util.ArrayList;
020    import java.util.Iterator;
021    import java.util.List;
022    import java.util.Stack;
023    import java.util.concurrent.atomic.AtomicBoolean;
024    
025    import org.apache.camel.Expression;
026    import org.apache.camel.Predicate;
027    import org.apache.camel.builder.PredicateBuilder;
028    import org.apache.camel.language.simple.ast.BinaryExpression;
029    import org.apache.camel.language.simple.ast.DoubleQuoteEnd;
030    import org.apache.camel.language.simple.ast.DoubleQuoteStart;
031    import org.apache.camel.language.simple.ast.LiteralExpression;
032    import org.apache.camel.language.simple.ast.LiteralNode;
033    import org.apache.camel.language.simple.ast.LogicalExpression;
034    import org.apache.camel.language.simple.ast.NullExpression;
035    import org.apache.camel.language.simple.ast.SimpleFunctionEnd;
036    import org.apache.camel.language.simple.ast.SimpleFunctionStart;
037    import org.apache.camel.language.simple.ast.SimpleNode;
038    import org.apache.camel.language.simple.ast.SingleQuoteEnd;
039    import org.apache.camel.language.simple.ast.SingleQuoteStart;
040    import org.apache.camel.language.simple.ast.UnaryExpression;
041    import org.apache.camel.language.simple.types.BinaryOperatorType;
042    import org.apache.camel.language.simple.types.LogicalOperatorType;
043    import org.apache.camel.language.simple.types.SimpleIllegalSyntaxException;
044    import org.apache.camel.language.simple.types.SimpleParserException;
045    import org.apache.camel.language.simple.types.SimpleToken;
046    import org.apache.camel.language.simple.types.TokenType;
047    import org.apache.camel.util.ExpressionToPredicateAdapter;
048    
049    /**
050     * A parser to parse simple language as a Camel {@link Predicate}
051     */
052    public class SimplePredicateParser extends BaseSimpleParser {
053    
054        @Deprecated
055        public SimplePredicateParser(String expression) {
056            super(expression, true);
057        }
058    
059        public SimplePredicateParser(String expression, boolean allowEscape) {
060            super(expression, allowEscape);
061        }
062    
063        public Predicate parsePredicate() {
064            clear();
065            try {
066                return doParsePredicate();
067            } catch (SimpleParserException e) {
068                // catch parser exception and turn that into a syntax exceptions
069                throw new SimpleIllegalSyntaxException(expression, e.getIndex(), e.getMessage(), e);
070            } catch (Exception e) {
071                // include exception in rethrown exception
072                throw new SimpleIllegalSyntaxException(expression, -1, e.getMessage(), e);
073            }
074        }
075    
076        protected Predicate doParsePredicate() {
077    
078            // parse using the following grammar
079            nextToken();
080            while (!token.getType().isEol()) {
081                // predicate supports quotes, functions, operators and whitespaces
082                //CHECKSTYLE:OFF
083                if (!singleQuotedLiteralWithFunctionsText()
084                        && !doubleQuotedLiteralWithFunctionsText()
085                        && !functionText()
086                        && !unaryOperator()
087                        && !binaryOperator()
088                        && !logicalOperator()
089                        && !token.getType().isWhitespace()
090                        && !token.getType().isEol()) {
091                    // okay the symbol was not one of the above, so its not supported
092                    // use the previous index as that is where the problem is
093                    throw new SimpleParserException("Unexpected token " + token, previousIndex);
094                }
095                //CHECKSTYLE:ON
096                // take the next token
097                nextToken();
098            }
099    
100            // now after parsing we need a bit of work to do, to make it easier to turn the tokens
101            // into and ast, and then from the ast, to Camel predicate(s).
102            // hence why there is a number of tasks going on below to accomplish this
103    
104            // remove any ignorable white space tokens
105            removeIgnorableWhiteSpaceTokens();
106            // turn the tokens into the ast model
107            parseTokensAndCreateNodes();
108            // compact and stack blocks (eg function start/end, quotes start/end, etc.)
109            prepareBlocks();
110            // compact and stack unary expressions
111            prepareUnaryExpressions();
112            // compact and stack binary expressions
113            prepareBinaryExpressions();
114            // compact and stack logical expressions
115            prepareLogicalExpressions();
116    
117            // create and return as a Camel predicate
118            List<Predicate> predicates = createPredicates();
119            if (predicates.isEmpty()) {
120                // return a false predicate as response as there was nothing to parse
121                return PredicateBuilder.constant(false);
122            } else if (predicates.size() == 1) {
123                return predicates.get(0);
124            } else {
125                return PredicateBuilder.and(predicates);
126            }
127        }
128    
129        /**
130         * Parses the tokens and crates the AST nodes.
131         * <p/>
132         * After the initial parsing of the input (input -> tokens) then we
133         * parse again (tokens -> ast).
134         * <p/>
135         * In this parsing the balance of the blocks is checked, so that each block has a matching
136         * start and end token. For example a single quote block, or a function block etc.
137         */
138        protected void parseTokensAndCreateNodes() {
139            // we loop the tokens and create a sequence of ast nodes
140    
141            // we need to keep a bit of state for keeping track of single and double quotes
142            // which need to be balanced and have matching start/end pairs
143            SimpleNode lastSingle = null;
144            SimpleNode lastDouble = null;
145            SimpleNode lastFunction = null;
146            AtomicBoolean startSingle = new AtomicBoolean(false);
147            AtomicBoolean startDouble = new AtomicBoolean(false);
148            AtomicBoolean startFunction = new AtomicBoolean(false);
149    
150            LiteralNode imageToken = null;
151            for (SimpleToken token : tokens) {
152                // break if eol
153                if (token.getType().isEol()) {
154                    break;
155                }
156    
157                // create a node from the token
158                SimpleNode node = createNode(token, startSingle, startDouble, startFunction);
159                if (node != null) {
160                    // keep state of last single/double
161                    if (node instanceof SingleQuoteStart) {
162                        lastSingle = node;
163                    } else if (node instanceof DoubleQuoteStart) {
164                        lastDouble = node;
165                    } else if (node instanceof SimpleFunctionStart) {
166                        lastFunction = node;
167                    }
168    
169                    // a new token was created so the current image token need to be added first
170                    if (imageToken != null) {
171                        nodes.add(imageToken);
172                        imageToken = null;
173                    }
174                    // and then add the created node
175                    nodes.add(node);
176                    // continue to next
177                    continue;
178                }
179    
180                // if no token was created then its a character/whitespace/escaped symbol
181                // which we need to add together in the same image
182                if (imageToken == null) {
183                    imageToken = new LiteralExpression(token);
184                }
185                imageToken.addText(token.getText());
186            }
187    
188            // append any leftover image tokens (when we reached eol)
189            if (imageToken != null) {
190                nodes.add(imageToken);
191            }
192    
193            // validate the single, double quote pairs and functions is in balance
194            if (startSingle.get()) {
195                int index = lastSingle != null ? lastSingle.getToken().getIndex() : 0;
196                throw new SimpleParserException("single quote has no ending quote", index);
197            }
198            if (startDouble.get()) {
199                int index = lastDouble != null ? lastDouble.getToken().getIndex() : 0;
200                throw new SimpleParserException("double quote has no ending quote", index);
201            }
202            if (startFunction.get()) {
203                // we have a start function, but no ending function
204                int index = lastFunction != null ? lastFunction.getToken().getIndex() : 0;
205                throw new SimpleParserException("function has no ending token", index);
206            }
207        }
208    
209    
210        /**
211         * Creates a node from the given token
212         *
213         * @param token         the token
214         * @param startSingle   state of single quoted blocks
215         * @param startDouble   state of double quoted blocks
216         * @param startFunction state of function blocks
217         * @return the created node, or <tt>null</tt> to let a default node be created instead.
218         */
219        private SimpleNode createNode(SimpleToken token, AtomicBoolean startSingle, AtomicBoolean startDouble,
220                                      AtomicBoolean startFunction) {
221            if (token.getType().isFunctionStart()) {
222                startFunction.set(true);
223                return new SimpleFunctionStart(token);
224            } else if (token.getType().isFunctionEnd()) {
225                startFunction.set(false);
226                return new SimpleFunctionEnd(token);
227            }
228    
229            // if we are inside a function, then we do not support any other kind of tokens
230            // as we want all the tokens to be literal instead
231            if (startFunction.get()) {
232                return null;
233            }
234    
235            // okay so far we also want to support quotes
236            if (token.getType().isSingleQuote()) {
237                SimpleNode answer;
238                boolean start = startSingle.get();
239                if (!start) {
240                    answer = new SingleQuoteStart(token);
241                } else {
242                    answer = new SingleQuoteEnd(token);
243                }
244                // flip state on start/end flag
245                startSingle.set(!start);
246                return answer;
247            } else if (token.getType().isDoubleQuote()) {
248                SimpleNode answer;
249                boolean start = startDouble.get();
250                if (!start) {
251                    answer = new DoubleQuoteStart(token);
252                } else {
253                    answer = new DoubleQuoteEnd(token);
254                }
255                // flip state on start/end flag
256                startDouble.set(!start);
257                return answer;
258            }
259    
260            // if we are inside a quote, then we do not support any further kind of tokens
261            // as we want to only support embedded functions and all other kinds to be literal tokens
262            if (startSingle.get() || startDouble.get()) {
263                return null;
264            }
265    
266            // okay we are not inside a function or quote, so we want to support operators
267            // and the special null value as well
268            if (token.getType().isUnary()) {
269                return new UnaryExpression(token);
270            } else if (token.getType().isBinary()) {
271                return new BinaryExpression(token);
272            } else if (token.getType().isLogical()) {
273                return new LogicalExpression(token);
274            } else if (token.getType().isNullValue()) {
275                return new NullExpression(token);
276            }
277    
278            // by returning null, we will let the parser determine what to do
279            return null;
280        }
281    
282        /**
283         * Removes any ignorable whitespace tokens.
284         * <p/>
285         * During the initial parsing (input -> tokens), then there may
286         * be excessive whitespace tokens, which can safely be removed,
287         * which makes the succeeding parsing easier.
288         */
289        private void removeIgnorableWhiteSpaceTokens() {
290            // white space can be removed if its not part of a quoted text or within function(s)
291            boolean quote = false;
292            int functionCount = 0;
293    
294            Iterator<SimpleToken> it = tokens.iterator();
295            while (it.hasNext()) {
296                SimpleToken token = it.next();
297                if (token.getType().isSingleQuote()) {
298                    quote = !quote;
299                } else if (!quote) {
300                    if (token.getType().isFunctionStart()) {
301                        functionCount++;
302                    } else if (token.getType().isFunctionEnd()) {
303                        functionCount--;
304                    } else if (token.getType().isWhitespace() && functionCount == 0) {
305                        it.remove();
306                    }
307                }
308            }
309        }
310    
311        /**
312         * Prepares binary expressions.
313         * <p/>
314         * This process prepares the binary expressions in the AST. This is done
315         * by linking the binary operator with both the right and left hand side
316         * nodes, to have the AST graph updated and prepared properly.
317         * <p/>
318         * So when the AST node is later used to create the {@link Predicate}s
319         * to be used by Camel then the AST graph has a linked and prepared
320         * graph of nodes which represent the input expression.
321         */
322        private void prepareBinaryExpressions() {
323            Stack<SimpleNode> stack = new Stack<SimpleNode>();
324    
325            SimpleNode left = null;
326            for (int i = 0; i < nodes.size(); i++) {
327                if (left == null) {
328                    left = i > 0 ? nodes.get(i - 1) : null;
329                }
330                SimpleNode token = nodes.get(i);
331                SimpleNode right = i < nodes.size() - 1 ? nodes.get(i + 1) : null;
332    
333                if (token instanceof BinaryExpression) {
334                    BinaryExpression binary = (BinaryExpression) token;
335    
336                    // remember the binary operator
337                    String operator = binary.getOperator().toString();
338    
339                    if (left == null) {
340                        throw new SimpleParserException("Binary operator " + operator + " has no left hand side token", token.getToken().getIndex());
341                    }
342                    if (!binary.acceptLeftNode(left)) {
343                        throw new SimpleParserException("Binary operator " + operator + " does not support left hand side token " + left.getToken(), token.getToken().getIndex());
344                    }
345                    if (right == null) {
346                        throw new SimpleParserException("Binary operator " + operator + " has no right hand side token", token.getToken().getIndex());
347                    }
348                    if (!binary.acceptRightNode(right)) {
349                        throw new SimpleParserException("Binary operator " + operator + " does not support right hand side token " + right.getToken(), token.getToken().getIndex());
350                    }
351    
352                    // pop previous as we need to replace it with this binary operator
353                    stack.pop();
354                    stack.push(token);
355                    // advantage after the right hand side
356                    i++;
357                    // this token is now the left for the next loop
358                    left = token;
359                } else {
360                    // clear left
361                    left = null;
362                    stack.push(token);
363                }
364            }
365    
366            nodes.clear();
367            nodes.addAll(stack);
368        }
369    
370        /**
371         * Prepares logical expressions.
372         * <p/>
373         * This process prepares the logical expressions in the AST. This is done
374         * by linking the logical operator with both the right and left hand side
375         * nodes, to have the AST graph updated and prepared properly.
376         * <p/>
377         * So when the AST node is later used to create the {@link Predicate}s
378         * to be used by Camel then the AST graph has a linked and prepared
379         * graph of nodes which represent the input expression.
380         */
381        private void prepareLogicalExpressions() {
382            Stack<SimpleNode> stack = new Stack<SimpleNode>();
383    
384            SimpleNode left = null;
385            for (int i = 0; i < nodes.size(); i++) {
386                if (left == null) {
387                    left = i > 0 ? nodes.get(i - 1) : null;
388                }
389                SimpleNode token = nodes.get(i);
390                SimpleNode right = i < nodes.size() - 1 ? nodes.get(i + 1) : null;
391    
392                if (token instanceof LogicalExpression) {
393                    LogicalExpression logical = (LogicalExpression) token;
394    
395                    // remember the logical operator
396                    String operator = logical.getOperator().toString();
397    
398                    if (left == null) {
399                        throw new SimpleParserException("Logical operator " + operator + " has no left hand side token", token.getToken().getIndex());
400                    }
401                    if (!logical.acceptLeftNode(left)) {
402                        throw new SimpleParserException("Logical operator " + operator + " does not support left hand side token " + left.getToken(), token.getToken().getIndex());
403                    }
404                    if (right == null) {
405                        throw new SimpleParserException("Logical operator " + operator + " has no right hand side token", token.getToken().getIndex());
406                    }
407                    if (!logical.acceptRightNode(right)) {
408                        throw new SimpleParserException("Logical operator " + operator + " does not support right hand side token " + left.getToken(), token.getToken().getIndex());
409                    }
410    
411                    // pop previous as we need to replace it with this binary operator
412                    stack.pop();
413                    stack.push(token);
414                    // advantage after the right hand side
415                    i++;
416                    // this token is now the left for the next loop
417                    left = token;
418                } else {
419                    // clear left
420                    left = null;
421                    stack.push(token);
422                }
423            }
424    
425            nodes.clear();
426            nodes.addAll(stack);
427        }
428    
429        /**
430         * Creates the {@link Predicate}s from the AST nodes.
431         *
432         * @return the created {@link Predicate}s, is never <tt>null</tt>.
433         */
434        private List<Predicate> createPredicates() {
435            List<Predicate> answer = new ArrayList<Predicate>();
436            for (SimpleNode node : nodes) {
437                Expression exp = node.createExpression(expression);
438                if (exp != null) {
439                    Predicate predicate = ExpressionToPredicateAdapter.toPredicate(exp);
440                    answer.add(predicate);
441                }
442            }
443            return answer;
444        }
445    
446        // --------------------------------------------------------------
447        // grammar
448        // --------------------------------------------------------------
449    
450        // the predicate parser understands a lot more than the expression parser
451        // - single quoted = block of nodes enclosed by single quotes
452        // - double quoted = block of nodes enclosed by double quotes
453        // - single quoted with functions = block of nodes enclosed by single quotes allowing embedded functions
454        // - double quoted with functions = block of nodes enclosed by double quotes allowing embedded functions
455        // - function = simple functions such as ${body} etc
456        // - numeric = numeric value
457        // - boolean = boolean value
458        // - null = null value
459        // - unary operator = operator attached to the left hand side node
460        // - binary operator = operator attached to both the left and right hand side nodes
461        // - logical operator = operator attached to both the left and right hand side nodes
462    
463        protected boolean singleQuotedLiteralWithFunctionsText() {
464            if (accept(TokenType.singleQuote)) {
465                nextToken(TokenType.singleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
466                while (!token.getType().isSingleQuote() && !token.getType().isEol()) {
467                    // we need to loop until we find the ending single quote, or the eol
468                    nextToken(TokenType.singleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
469                }
470                expect(TokenType.singleQuote);
471                return true;
472            }
473            return false;
474        }
475    
476        protected boolean singleQuotedLiteralText() {
477            if (accept(TokenType.singleQuote)) {
478                nextToken(TokenType.singleQuote, TokenType.eol);
479                while (!token.getType().isSingleQuote() && !token.getType().isEol()) {
480                    // we need to loop until we find the ending single quote, or the eol
481                    nextToken(TokenType.singleQuote, TokenType.eol);
482                }
483                expect(TokenType.singleQuote);
484                return true;
485            }
486            return false;
487        }
488    
489        protected boolean doubleQuotedLiteralWithFunctionsText() {
490            if (accept(TokenType.doubleQuote)) {
491                nextToken(TokenType.doubleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
492                while (!token.getType().isDoubleQuote() && !token.getType().isEol()) {
493                    // we need to loop until we find the ending double quote, or the eol
494                    nextToken(TokenType.doubleQuote, TokenType.eol, TokenType.functionStart, TokenType.functionEnd);
495                }
496                expect(TokenType.doubleQuote);
497                return true;
498            }
499            return false;
500        }
501    
502        protected boolean doubleQuotedLiteralText() {
503            if (accept(TokenType.doubleQuote)) {
504                nextToken(TokenType.doubleQuote, TokenType.eol);
505                while (!token.getType().isDoubleQuote() && !token.getType().isEol()) {
506                    // we need to loop until we find the ending double quote, or the eol
507                    nextToken(TokenType.doubleQuote, TokenType.eol);
508                }
509                expect(TokenType.doubleQuote);
510                return true;
511            }
512            return false;
513        }
514    
515        protected boolean functionText() {
516            if (accept(TokenType.functionStart)) {
517                nextToken();
518                while (!token.getType().isFunctionEnd() && !token.getType().isEol()) {
519                    if (token.getType().isFunctionStart()) {
520                        // embedded function
521                        functionText();
522                    }
523                    // we need to loop until we find the ending function quote, an embedded function, or the eol
524                    nextToken();
525                }
526                // if its not an embedded function then we expect the end token
527                if (!token.getType().isFunctionStart()) {
528                    expect(TokenType.functionEnd);
529                }
530                return true;
531            }
532            return false;
533        }
534    
535        protected boolean unaryOperator() {
536            if (accept(TokenType.unaryOperator)) {
537                nextToken();
538                // there should be a whitespace after the operator
539                expect(TokenType.whiteSpace);
540                return true;
541            }
542            return false;
543        }
544    
545        protected boolean binaryOperator() {
546            if (accept(TokenType.binaryOperator)) {
547                // remember the binary operator
548                BinaryOperatorType operatorType = BinaryOperatorType.asOperator(token.getText());
549    
550                nextToken();
551                // there should be at least one whitespace after the operator
552                expectAndAcceptMore(TokenType.whiteSpace);
553    
554                // okay a binary operator may not support all kind if preceding parameters, so we need to limit this
555                BinaryOperatorType.ParameterType[] types = BinaryOperatorType.supportedParameterTypes(operatorType);
556    
557                // based on the parameter types the binary operator support, we need to set this state into
558                // the following booleans so we know how to proceed in the grammar
559                boolean literalWithFunctionsSupported = false;
560                boolean literalSupported = false;
561                boolean functionSupported = false;
562                boolean numericSupported = false;
563                boolean booleanSupported = false;
564                boolean nullSupported = false;
565                if (types == null || types.length == 0) {
566                    literalWithFunctionsSupported = true;
567                    // favor literal with functions over literals without functions
568                    literalSupported = false;
569                    functionSupported = true;
570                    numericSupported = true;
571                    booleanSupported = true;
572                    nullSupported = true;
573                } else {
574                    for (BinaryOperatorType.ParameterType parameterType : types) {
575                        literalSupported |= parameterType.isLiteralSupported();
576                        literalWithFunctionsSupported |= parameterType.isLiteralWithFunctionSupport();
577                        functionSupported |= parameterType.isFunctionSupport();
578                        nullSupported |= parameterType.isNumericValueSupported();
579                        booleanSupported |= parameterType.isBooleanValueSupported();
580                        nullSupported |= parameterType.isNullValueSupported();
581                    }
582                }
583    
584                // then we proceed in the grammar according to the parameter types supported by the given binary operator
585                //CHECKSTYLE:OFF
586                if ((literalWithFunctionsSupported && singleQuotedLiteralWithFunctionsText())
587                        || (literalWithFunctionsSupported && doubleQuotedLiteralWithFunctionsText())
588                        || (literalSupported && singleQuotedLiteralText())
589                        || (literalSupported && doubleQuotedLiteralText())
590                        || (functionSupported && functionText())
591                        || (numericSupported && numericValue())
592                        || (booleanSupported && booleanValue())
593                        || (nullSupported && nullValue())) {
594                    // then after the right hand side value, there should be a whitespace if there is more tokens
595                    nextToken();
596                    if (!token.getType().isEol()) {
597                        expect(TokenType.whiteSpace);
598                    }
599                } else {
600                    throw new SimpleParserException("Binary operator " + operatorType + " does not support token " + token, token.getIndex());
601                }
602                //CHECKSTYLE:ON
603                return true;
604            }
605            return false;
606        }
607    
608        protected boolean logicalOperator() {
609            if (accept(TokenType.logicalOperator)) {
610                // remember the logical operator
611                LogicalOperatorType operatorType = LogicalOperatorType.asOperator(token.getText());
612    
613                nextToken();
614                // there should be at least one whitespace after the operator
615                expectAndAcceptMore(TokenType.whiteSpace);
616    
617                // then we expect either some quoted text, another function, or a numeric, boolean or null value
618                if (singleQuotedLiteralWithFunctionsText()
619                        || doubleQuotedLiteralWithFunctionsText()
620                        || functionText()
621                        || numericValue()
622                        || booleanValue()
623                        || nullValue()) {
624                    // then after the right hand side value, there should be a whitespace if there is more tokens
625                    nextToken();
626                    if (!token.getType().isEol()) {
627                        expect(TokenType.whiteSpace);
628                    }
629                } else {
630                    throw new SimpleParserException("Logical operator " + operatorType + " does not support token " + token, token.getIndex());
631                }
632                return true;
633            }
634            return false;
635        }
636    
637        protected boolean numericValue() {
638            return accept(TokenType.numericValue);
639            // no other tokens to check so do not use nextToken
640        }
641    
642        protected boolean booleanValue() {
643            return accept(TokenType.booleanValue);
644            // no other tokens to check so do not use nextToken
645        }
646    
647        protected boolean nullValue() {
648            return accept(TokenType.nullValue);
649            // no other tokens to check so do not use nextToken
650        }
651    
652    }