001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.support;
018    
019    import java.io.Closeable;
020    import java.io.IOException;
021    import java.io.InputStream;
022    import java.text.MessageFormat;
023    import java.util.Iterator;
024    import java.util.LinkedHashMap;
025    import java.util.Map;
026    import java.util.Scanner;
027    import java.util.regex.Matcher;
028    import java.util.regex.Pattern;
029    
030    import org.apache.camel.Exchange;
031    import org.apache.camel.InvalidPayloadException;
032    import org.apache.camel.util.IOHelper;
033    import org.apache.camel.util.ObjectHelper;
034    
035    /**
036     * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body
037     * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token,
038     * where the end token corresponds implicitly to either the end tag or the self-closing start tag.
039     * <p/>
040     * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream
041     * to access the message body.
042     * <p/>
043     * Can be used to split big XML files.
044     * <p/>
045     * This implementation supports inheriting namespaces from a parent/root tag.
046     */
047    public class TokenXMLExpressionIterator extends ExpressionAdapter {
048        private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")");
049        private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)";
050        private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>";
051        private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>";
052        
053        protected final String tagToken;
054        protected final String inheritNamespaceToken;
055    
056        public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) {
057            ObjectHelper.notEmpty(tagToken, "tagToken");
058            this.tagToken = tagToken;
059            // namespace token is optional
060            this.inheritNamespaceToken = inheritNamespaceToken;
061    
062            // must be XML tokens
063            if (!tagToken.startsWith("<") || !tagToken.endsWith(">")) {
064                throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tagToken);
065            }
066            if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) {
067                throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken);
068            }
069        }
070    
071        protected Iterator<?> createIterator(InputStream in, String charset) {
072            XMLTokenIterator iterator = new XMLTokenIterator(tagToken, inheritNamespaceToken, in, charset);
073            iterator.init();
074            return iterator;
075        }
076    
077        @Override
078        public boolean matches(Exchange exchange) {
079            // as a predicate we must close the stream, as we do not return an iterator that can be used
080            // afterwards to iterate the input stream
081            Object value = doEvaluate(exchange, true);
082            return ObjectHelper.evaluateValuePredicate(value);
083        }
084    
085        @Override
086        public Object evaluate(Exchange exchange) {
087            // as we return an iterator to access the input stream, we should not close it
088            return doEvaluate(exchange, false);
089        }
090    
091        /**
092         * Strategy to evaluate the exchange
093         *
094         * @param exchange   the exchange
095         * @param closeStream whether to close the stream before returning from this method.
096         * @return the evaluated value
097         */
098        protected Object doEvaluate(Exchange exchange, boolean closeStream) {
099            InputStream in = null;
100            try {
101                in = exchange.getIn().getMandatoryBody(InputStream.class);
102                // we may read from a file, and want to support custom charset defined on the exchange
103                String charset = IOHelper.getCharsetName(exchange);
104                return createIterator(in, charset);
105            } catch (InvalidPayloadException e) {
106                exchange.setException(e);
107                // must close input stream
108                IOHelper.close(in);
109                return null;
110            } finally {
111                if (closeStream) {
112                    IOHelper.close(in);
113                }
114            }
115        }
116        
117        /**
118         * Iterator to walk the input stream
119         */
120        static class XMLTokenIterator implements Iterator<Object>, Closeable {
121            final String tagToken;
122            final InputStream in;
123            final String charset;
124            Scanner scanner;
125            Object image;
126    
127            private final Pattern tagTokenPattern;
128            private final String inheritNamespaceToken;
129            private Pattern inheritNamespaceTokenPattern;
130            private String rootTokenNamespaces;
131    
132            XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) {
133                this.tagToken = tagToken;
134                this.in = in;
135                this.charset = charset;
136              
137                // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns
138                this.tagTokenPattern = 
139                    Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 
140                                                         SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 
141                                                         Pattern.MULTILINE | Pattern.DOTALL);
142                
143                this.inheritNamespaceToken = inheritNamespaceToken;
144                if (inheritNamespaceToken != null) {
145                    // the inherit namespace token may itself have a namespace prefix
146                    // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines
147                    this.inheritNamespaceTokenPattern = 
148                        Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE,
149                                                             SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 
150                                                             Pattern.MULTILINE | Pattern.DOTALL);
151                }
152            }
153    
154            void init() {
155                // use a scanner with the default delimiter
156                this.scanner = new Scanner(in, charset);
157                this.image = scanner.hasNext() ? (String) next(true) : null;
158            }
159    
160            String getNext(boolean first) {
161                // initialize inherited namespaces on first
162                if (first && inheritNamespaceToken != null) {
163                    rootTokenNamespaces =  getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0));
164                }
165    
166                String next = scanner.findWithinHorizon(tagTokenPattern, 0);
167                if (next == null) {
168                    return null;
169                }
170    
171                // build answer accordingly to whether namespaces should be inherited or not
172                // REVISIT should skip the prefixes that are declared within the child itself.
173                if (inheritNamespaceToken != null && rootTokenNamespaces != null) {
174                    String head = ObjectHelper.before(next, ">");
175                    boolean empty = false;
176                    if (head.endsWith("/")) {
177                        head = head.substring(0, head.length() - 1);
178                        empty = true;
179                    }
180                    StringBuilder sb = new StringBuilder();
181                    // append root namespaces to local start token
182                    // grab the text
183                    String tail = ObjectHelper.after(next, ">");
184                    // build result with inherited namespaces
185                    next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString();
186                }
187                
188                return next;
189            }
190    
191            private String getNamespacesFromNamespaceToken(String text) {
192                if (text == null) {
193                    return null;
194                }
195    
196                // find namespaces (there can be attributes mixed, so we should only grab the namespaces)
197                Map<String, String> namespaces = new LinkedHashMap<String, String>();
198                Matcher matcher = NAMESPACE_PATTERN.matcher(text);
199                while (matcher.find()) {
200                    String prefix = matcher.group(1);
201                    String url = matcher.group(2);
202                    if (ObjectHelper.isEmpty(prefix)) {
203                        prefix = "_DEFAULT_";
204                    } else {
205                        // skip leading :
206                        prefix = prefix.substring(1);
207                    }
208                    namespaces.put(prefix, url);
209                }
210    
211                // did we find any namespaces
212                if (namespaces.isEmpty()) {
213                    return null;
214                }
215    
216                // build namespace String
217                StringBuilder sb = new StringBuilder();
218                for (Map.Entry<String, String> entry : namespaces.entrySet()) {
219                    String key = entry.getKey();
220                    // note the value is already quoted
221                    String value = entry.getValue();
222                    if ("_DEFAULT_".equals(key)) {
223                        sb.append(" xmlns=").append(value);
224                    } else {
225                        sb.append(" xmlns:").append(key).append("=").append(value);
226                    }
227                }
228    
229                return sb.toString();
230            }
231            
232            @Override
233            public boolean hasNext() {
234                return image != null;
235            }
236    
237            @Override
238            public Object next() {
239                return next(false);
240            }
241    
242            Object next(boolean first) {
243                Object answer = image;
244                // calculate next
245                if (scanner.hasNext()) {
246                    image = getNext(first);
247                } else {
248                    image = null;
249                }
250    
251                if (answer == null) {
252                    // first time the image may be null
253                    answer = image;
254                }
255                return answer;
256            }
257    
258            @Override
259            public void remove() {
260                // noop
261            }
262    
263            @Override
264            public void close() throws IOException {
265                scanner.close();
266            }
267    
268        }
269    
270    }