001 /** 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.camel.support; 018 019 import java.io.Closeable; 020 import java.io.IOException; 021 import java.io.InputStream; 022 import java.text.MessageFormat; 023 import java.util.Iterator; 024 import java.util.LinkedHashMap; 025 import java.util.Map; 026 import java.util.Scanner; 027 import java.util.regex.Matcher; 028 import java.util.regex.Pattern; 029 030 import org.apache.camel.Exchange; 031 import org.apache.camel.InvalidPayloadException; 032 import org.apache.camel.util.IOHelper; 033 import org.apache.camel.util.ObjectHelper; 034 035 /** 036 * {@link org.apache.camel.Expression} to walk a {@link org.apache.camel.Message} XML body 037 * using an {@link java.util.Iterator}, which grabs the content between a XML start and end token, 038 * where the end token corresponds implicitly to either the end tag or the self-closing start tag. 039 * <p/> 040 * The message body must be able to convert to {@link java.io.InputStream} type which is used as stream 041 * to access the message body. 042 * <p/> 043 * Can be used to split big XML files. 044 * <p/> 045 * This implementation supports inheriting namespaces from a parent/root tag. 046 */ 047 public class TokenXMLExpressionIterator extends ExpressionAdapter { 048 private static final Pattern NAMESPACE_PATTERN = Pattern.compile("xmlns(:\\w+|)\\s*=\\s*('[^']+'|\"[^\"]+\")"); 049 private static final String SCAN_TOKEN_NS_PREFIX_REGEX = "([^:<>]{1,15}?:|)"; 050 private static final String SCAN_BLOCK_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*)?/>|<{0}(\\s+[^>]*)?>(?:(?!(</{0}\\s*>)).)*</{0}\\s*>"; 051 private static final String SCAN_PARENT_TOKEN_REGEX_TEMPLATE = "<{0}(\\s+[^>]*\\s*)?>"; 052 053 protected final String tagToken; 054 protected final String inheritNamespaceToken; 055 056 public TokenXMLExpressionIterator(String tagToken, String inheritNamespaceToken) { 057 ObjectHelper.notEmpty(tagToken, "tagToken"); 058 this.tagToken = tagToken; 059 // namespace token is optional 060 this.inheritNamespaceToken = inheritNamespaceToken; 061 062 // must be XML tokens 063 if (!tagToken.startsWith("<") || !tagToken.endsWith(">")) { 064 throw new IllegalArgumentException("XML Tag token must be a valid XML tag, was: " + tagToken); 065 } 066 if (inheritNamespaceToken != null && (!inheritNamespaceToken.startsWith("<") || !inheritNamespaceToken.endsWith(">"))) { 067 throw new IllegalArgumentException("Namespace token must be a valid XML token, was: " + inheritNamespaceToken); 068 } 069 } 070 071 protected Iterator<?> createIterator(InputStream in, String charset) { 072 XMLTokenIterator iterator = new XMLTokenIterator(tagToken, inheritNamespaceToken, in, charset); 073 iterator.init(); 074 return iterator; 075 } 076 077 @Override 078 public boolean matches(Exchange exchange) { 079 // as a predicate we must close the stream, as we do not return an iterator that can be used 080 // afterwards to iterate the input stream 081 Object value = doEvaluate(exchange, true); 082 return ObjectHelper.evaluateValuePredicate(value); 083 } 084 085 @Override 086 public Object evaluate(Exchange exchange) { 087 // as we return an iterator to access the input stream, we should not close it 088 return doEvaluate(exchange, false); 089 } 090 091 /** 092 * Strategy to evaluate the exchange 093 * 094 * @param exchange the exchange 095 * @param closeStream whether to close the stream before returning from this method. 096 * @return the evaluated value 097 */ 098 protected Object doEvaluate(Exchange exchange, boolean closeStream) { 099 InputStream in = null; 100 try { 101 in = exchange.getIn().getMandatoryBody(InputStream.class); 102 // we may read from a file, and want to support custom charset defined on the exchange 103 String charset = IOHelper.getCharsetName(exchange); 104 return createIterator(in, charset); 105 } catch (InvalidPayloadException e) { 106 exchange.setException(e); 107 // must close input stream 108 IOHelper.close(in); 109 return null; 110 } finally { 111 if (closeStream) { 112 IOHelper.close(in); 113 } 114 } 115 } 116 117 /** 118 * Iterator to walk the input stream 119 */ 120 static class XMLTokenIterator implements Iterator<Object>, Closeable { 121 final String tagToken; 122 final InputStream in; 123 final String charset; 124 Scanner scanner; 125 Object image; 126 127 private final Pattern tagTokenPattern; 128 private final String inheritNamespaceToken; 129 private Pattern inheritNamespaceTokenPattern; 130 private String rootTokenNamespaces; 131 132 XMLTokenIterator(String tagToken, String inheritNamespaceToken, InputStream in, String charset) { 133 this.tagToken = tagToken; 134 this.in = in; 135 this.charset = charset; 136 137 // remove any beginning < and ending > as we need to support ns prefixes and attributes, so we use a reg exp patterns 138 this.tagTokenPattern = 139 Pattern.compile(MessageFormat.format(SCAN_BLOCK_TOKEN_REGEX_TEMPLATE, 140 SCAN_TOKEN_NS_PREFIX_REGEX + tagToken.substring(1, tagToken.length() - 1)), 141 Pattern.MULTILINE | Pattern.DOTALL); 142 143 this.inheritNamespaceToken = inheritNamespaceToken; 144 if (inheritNamespaceToken != null) { 145 // the inherit namespace token may itself have a namespace prefix 146 // the namespaces on the parent tag can be in multi line, so we need to instruct the dot to support multilines 147 this.inheritNamespaceTokenPattern = 148 Pattern.compile(MessageFormat.format(SCAN_PARENT_TOKEN_REGEX_TEMPLATE, 149 SCAN_TOKEN_NS_PREFIX_REGEX + inheritNamespaceToken.substring(1, inheritNamespaceToken.length() - 1)), 150 Pattern.MULTILINE | Pattern.DOTALL); 151 } 152 } 153 154 void init() { 155 // use a scanner with the default delimiter 156 this.scanner = new Scanner(in, charset); 157 this.image = scanner.hasNext() ? (String) next(true) : null; 158 } 159 160 String getNext(boolean first) { 161 // initialize inherited namespaces on first 162 if (first && inheritNamespaceToken != null) { 163 rootTokenNamespaces = getNamespacesFromNamespaceToken(scanner.findWithinHorizon(inheritNamespaceTokenPattern, 0)); 164 } 165 166 String next = scanner.findWithinHorizon(tagTokenPattern, 0); 167 if (next == null) { 168 return null; 169 } 170 171 // build answer accordingly to whether namespaces should be inherited or not 172 // REVISIT should skip the prefixes that are declared within the child itself. 173 if (inheritNamespaceToken != null && rootTokenNamespaces != null) { 174 String head = ObjectHelper.before(next, ">"); 175 boolean empty = false; 176 if (head.endsWith("/")) { 177 head = head.substring(0, head.length() - 1); 178 empty = true; 179 } 180 StringBuilder sb = new StringBuilder(); 181 // append root namespaces to local start token 182 // grab the text 183 String tail = ObjectHelper.after(next, ">"); 184 // build result with inherited namespaces 185 next = sb.append(head).append(rootTokenNamespaces).append(empty ? "/>" : ">").append(tail).toString(); 186 } 187 188 return next; 189 } 190 191 private String getNamespacesFromNamespaceToken(String text) { 192 if (text == null) { 193 return null; 194 } 195 196 // find namespaces (there can be attributes mixed, so we should only grab the namespaces) 197 Map<String, String> namespaces = new LinkedHashMap<String, String>(); 198 Matcher matcher = NAMESPACE_PATTERN.matcher(text); 199 while (matcher.find()) { 200 String prefix = matcher.group(1); 201 String url = matcher.group(2); 202 if (ObjectHelper.isEmpty(prefix)) { 203 prefix = "_DEFAULT_"; 204 } else { 205 // skip leading : 206 prefix = prefix.substring(1); 207 } 208 namespaces.put(prefix, url); 209 } 210 211 // did we find any namespaces 212 if (namespaces.isEmpty()) { 213 return null; 214 } 215 216 // build namespace String 217 StringBuilder sb = new StringBuilder(); 218 for (Map.Entry<String, String> entry : namespaces.entrySet()) { 219 String key = entry.getKey(); 220 // note the value is already quoted 221 String value = entry.getValue(); 222 if ("_DEFAULT_".equals(key)) { 223 sb.append(" xmlns=").append(value); 224 } else { 225 sb.append(" xmlns:").append(key).append("=").append(value); 226 } 227 } 228 229 return sb.toString(); 230 } 231 232 @Override 233 public boolean hasNext() { 234 return image != null; 235 } 236 237 @Override 238 public Object next() { 239 return next(false); 240 } 241 242 Object next(boolean first) { 243 Object answer = image; 244 // calculate next 245 if (scanner.hasNext()) { 246 image = getNext(first); 247 } else { 248 image = null; 249 } 250 251 if (answer == null) { 252 // first time the image may be null 253 answer = image; 254 } 255 return answer; 256 } 257 258 @Override 259 public void remove() { 260 // noop 261 } 262 263 @Override 264 public void close() throws IOException { 265 scanner.close(); 266 } 267 268 } 269 270 }