001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.support;
018    
019    import java.io.Closeable;
020    import java.io.IOException;
021    import java.io.InputStream;
022    import java.util.Iterator;
023    import java.util.Scanner;
024    
025    import org.apache.camel.Exchange;
026    import org.apache.camel.InvalidPayloadException;
027    import org.apache.camel.util.IOHelper;
028    import org.apache.camel.util.ObjectHelper;
029    
030    /**
031     * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} body
032     * using an {@link Iterator}, which grabs the content between a start and end token.
033     * <p/>
034     * The message body must be able to convert to {@link InputStream} type which is used as stream
035     * to access the message body.
036     * <p/>
037     * For splitting XML files use {@link org.apache.camel.support.TokenXMLExpressionIterator} instead.
038     */
039    public class TokenPairExpressionIterator extends ExpressionAdapter {
040    
041        protected final String startToken;
042        protected final String endToken;
043        protected final boolean includeTokens;
044    
045        public TokenPairExpressionIterator(String startToken, String endToken, boolean includeTokens) {
046            ObjectHelper.notEmpty(startToken, "startToken");
047            ObjectHelper.notEmpty(endToken, "endToken");
048            this.startToken = startToken;
049            this.endToken = endToken;
050            this.includeTokens = includeTokens;
051        }
052    
053        @Override
054        public boolean matches(Exchange exchange) {
055            // as a predicate we must close the stream, as we do not return an iterator that can be used
056            // afterwards to iterate the input stream
057            Object value = doEvaluate(exchange, true);
058            return ObjectHelper.evaluateValuePredicate(value);
059        }
060    
061        @Override
062        public Object evaluate(Exchange exchange) {
063            // as we return an iterator to access the input stream, we should not close it
064            return doEvaluate(exchange, false);
065        }
066    
067        /**
068         * Strategy to evaluate the exchange
069         *
070         * @param exchange   the exchange
071         * @param closeStream whether to close the stream before returning from this method.
072         * @return the evaluated value
073         */
074        protected Object doEvaluate(Exchange exchange, boolean closeStream) {
075            InputStream in = null;
076            try {
077                in = exchange.getIn().getMandatoryBody(InputStream.class);
078                // we may read from a file, and want to support custom charset defined on the exchange
079                String charset = IOHelper.getCharsetName(exchange);
080                return createIterator(in, charset);
081            } catch (InvalidPayloadException e) {
082                exchange.setException(e);
083                // must close input stream
084                IOHelper.close(in);
085                return null;
086            } finally {
087                if (closeStream) {
088                    IOHelper.close(in);
089                }
090            }
091        }
092    
093        /**
094         * Strategy to create the iterator
095         *
096         * @param in input stream to iterate
097         * @param charset charset
098         * @return the iterator
099         */
100        protected Iterator<?> createIterator(InputStream in, String charset) {
101            TokenPairIterator iterator = new TokenPairIterator(startToken, endToken, includeTokens, in, charset);
102            iterator.init();
103            return iterator;
104        }
105    
106        @Override
107        public String toString() {
108            return "tokenize[body() using tokens: " + startToken + "..." + endToken + "]";
109        }
110    
111        /**
112         * Iterator to walk the input stream
113         */
114        static class TokenPairIterator implements Iterator<Object>, Closeable {
115    
116            final String startToken;
117            String scanStartToken;
118            final String endToken;
119            String scanEndToken;
120            final boolean includeTokens;
121            final InputStream in;
122            final String charset;
123            Scanner scanner;
124            Object image;
125    
126            TokenPairIterator(String startToken, String endToken, boolean includeTokens, InputStream in, String charset) {
127                this.startToken = startToken;
128                this.endToken = endToken;
129                this.includeTokens = includeTokens;
130                this.in = in;
131                this.charset = charset;
132    
133                // make sure [ and ] is escaped as we use scanner which is reg exp based
134                // where [ and ] have special meaning
135                scanStartToken = startToken;
136                if (scanStartToken.startsWith("[")) {
137                    scanStartToken = "\\" + scanStartToken;
138                }
139                if (scanStartToken.endsWith("]")) {
140                    scanStartToken = scanStartToken.substring(0, startToken.length() - 1)  + "\\]";
141                }
142                scanEndToken = endToken;
143                if (scanEndToken.startsWith("[")) {
144                    scanEndToken = "\\" + scanEndToken;
145                }
146                if (scanEndToken.endsWith("]")) {
147                    scanEndToken = scanEndToken.substring(0, scanEndToken.length() - 1)  + "\\]";
148                }
149            }
150    
151            void init() {
152                // use end token as delimiter
153                this.scanner = new Scanner(in, charset).useDelimiter(scanEndToken);
154                // this iterator will do look ahead as we may have data
155                // after the last end token, which the scanner would find
156                // so we need to be one step ahead of the scanner
157                this.image = scanner.hasNext() ? next(true) : null;
158            }
159    
160            @Override
161            public boolean hasNext() {
162                return image != null;
163            }
164    
165            @Override
166            public Object next() {
167                return next(false);
168            }
169    
170            Object next(boolean first) {
171                Object answer = image;
172                // calculate next
173                if (scanner.hasNext()) {
174                    image = getNext(first);
175                } else {
176                    image = null;
177                }
178    
179                if (answer == null) {
180                    // first time the image may be null
181                    answer = image;
182                }
183                return answer;
184            }
185    
186            Object getNext(boolean first) {
187                String next = scanner.next();
188    
189                // only grab text after the start token
190                if (next != null && next.contains(startToken)) {
191                    next = ObjectHelper.after(next, startToken);
192    
193                    // include tokens in answer
194                    if (next != null && includeTokens) {
195                        StringBuilder sb = new StringBuilder();
196                        next = sb.append(startToken).append(next).append(endToken).toString();
197                    }
198                } else {
199                    // must have start token, otherwise we have reached beyond last tokens
200                    // and should not return more data
201                    return null;
202                }
203    
204                return next;
205            }
206    
207            @Override
208            public void remove() {
209                // noop
210            }
211    
212            @Override
213            public void close() throws IOException {
214                scanner.close();
215            }
216        }
217    
218    }