Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
XmlStreamReader |
|
| 1.0;1 |
1 | package org.codehaus.plexus.util.xml; | |
2 | ||
3 | /* | |
4 | * Licensed to the Apache Software Foundation (ASF) under one | |
5 | * or more contributor license agreements. See the NOTICE file | |
6 | * distributed with this work for additional information | |
7 | * regarding copyright ownership. The ASF licenses this file | |
8 | * to you under the Apache License, Version 2.0 (the | |
9 | * "License"); you may not use this file except in compliance | |
10 | * with the License. You may obtain a copy of the License at | |
11 | * | |
12 | * http://www.apache.org/licenses/LICENSE-2.0 | |
13 | * | |
14 | * Unless required by applicable law or agreed to in writing, | |
15 | * software distributed under the License is distributed on an | |
16 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
17 | * KIND, either express or implied. See the License for the | |
18 | * specific language governing permissions and limitations | |
19 | * under the License. | |
20 | */ | |
21 | ||
22 | import java.io.File; | |
23 | import java.io.IOException; | |
24 | import java.io.InputStream; | |
25 | import java.net.URL; | |
26 | import java.net.URLConnection; | |
27 | ||
28 | /** | |
29 | * Character stream that handles (or at least attemtps to) all the necessary Voodo to figure out the charset encoding of | |
30 | * the XML document within the stream. | |
31 | * <p> | |
32 | * IMPORTANT: This class is not related in any way to the org.xml.sax.XMLReader. This one IS a character stream. | |
33 | * <p> | |
34 | * All this has to be done without consuming characters from the stream, if not the XML parser will not recognized the | |
35 | * document as a valid XML. This is not 100% true, but it's close enough (UTF-8 BOM is not handled by all parsers right | |
36 | * now, XmlReader handles it and things work in all parsers). | |
37 | * <p> | |
38 | * The XmlReader class handles the charset encoding of XML documents in Files, raw streams and HTTP streams by offering | |
39 | * a wide set of constructors. | |
40 | * <P> | |
41 | * By default the charset encoding detection is lenient, the constructor with the lenient flag can be used for an script | |
42 | * (following HTTP MIME and XML specifications). All this is nicely explained by Mark Pilgrim in his blog, <a | |
43 | * href="http://diveintomark.org/archives/2004/02/13/xml-media-types"> Determining the character encoding of a feed</a>. | |
44 | * <p> | |
45 | * | |
46 | * @author Alejandro Abdelnur | |
47 | * @version revision 1.17 taken on 26/06/2007 from Rome (see https://rome.dev.java.net/source/browse/rome/src/java/com/sun/syndication/io/XmlReader.java) | |
48 | * @since 1.4.4 | |
49 | * @deprecated TO BE REMOVED from here when plexus-utils is upgraded to 1.4.5+ (and prerequisite upgraded to Maven 2.0.6) | |
50 | */ | |
51 | public class XmlStreamReader | |
52 | extends XmlReader | |
53 | { | |
54 | /** | |
55 | * Creates a Reader for a File. | |
56 | * <p> | |
57 | * It looks for the UTF-8 BOM first, if none sniffs the XML prolog charset, if this is also missing defaults to | |
58 | * UTF-8. | |
59 | * <p> | |
60 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
61 | * <p> | |
62 | * | |
63 | * @param file | |
64 | * File to create a Reader from. | |
65 | * @throws IOException | |
66 | * thrown if there is a problem reading the file. | |
67 | * | |
68 | */ | |
69 | public XmlStreamReader( File file ) throws IOException | |
70 | { | |
71 | 0 | super( file ); |
72 | 0 | } |
73 | ||
74 | /** | |
75 | * Creates a Reader for a raw InputStream. | |
76 | * <p> | |
77 | * It follows the same logic used for files. | |
78 | * <p> | |
79 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
80 | * <p> | |
81 | * | |
82 | * @param is | |
83 | * InputStream to create a Reader from. | |
84 | * @throws IOException | |
85 | * thrown if there is a problem reading the stream. | |
86 | * | |
87 | */ | |
88 | public XmlStreamReader( InputStream is ) throws IOException | |
89 | { | |
90 | 0 | super( is ); |
91 | 0 | } |
92 | ||
93 | /** | |
94 | * Creates a Reader for a raw InputStream. | |
95 | * <p> | |
96 | * It follows the same logic used for files. | |
97 | * <p> | |
98 | * If lenient detection is indicated and the detection above fails as per specifications it then attempts the | |
99 | * following: | |
100 | * <p> | |
101 | * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. | |
102 | * <p> | |
103 | * Else if the XML prolog had a charset encoding that encoding is used. | |
104 | * <p> | |
105 | * Else if the content type had a charset encoding that encoding is used. | |
106 | * <p> | |
107 | * Else 'UTF-8' is used. | |
108 | * <p> | |
109 | * If lenient detection is indicated an XmlStreamReaderException is never thrown. | |
110 | * <p> | |
111 | * | |
112 | * @param is | |
113 | * InputStream to create a Reader from. | |
114 | * @param lenient | |
115 | * indicates if the charset encoding detection should be relaxed. | |
116 | * @throws IOException | |
117 | * thrown if there is a problem reading the stream. | |
118 | * @throws XmlStreamReaderException | |
119 | * thrown if the charset encoding could not be determined according to the specs. | |
120 | * | |
121 | */ | |
122 | public XmlStreamReader( InputStream is, boolean lenient ) throws IOException, XmlStreamReaderException | |
123 | { | |
124 | 0 | super( is, lenient ); |
125 | 0 | } |
126 | ||
127 | /** | |
128 | * Creates a Reader using the InputStream of a URL. | |
129 | * <p> | |
130 | * If the URL is not of type HTTP and there is not 'content-type' header in the fetched data it uses the same logic | |
131 | * used for Files. | |
132 | * <p> | |
133 | * If the URL is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic used for | |
134 | * an InputStream with content-type. | |
135 | * <p> | |
136 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
137 | * <p> | |
138 | * | |
139 | * @param url | |
140 | * URL to create a Reader from. | |
141 | * @throws IOException | |
142 | * thrown if there is a problem reading the stream of the URL. | |
143 | * | |
144 | */ | |
145 | public XmlStreamReader( URL url ) throws IOException | |
146 | { | |
147 | 0 | super( url ); |
148 | 0 | } |
149 | ||
150 | /** | |
151 | * Creates a Reader using the InputStream of a URLConnection. | |
152 | * <p> | |
153 | * If the URLConnection is not of type HttpURLConnection and there is not 'content-type' header in the fetched data | |
154 | * it uses the same logic used for files. | |
155 | * <p> | |
156 | * If the URLConnection is a HTTP Url or there is a 'content-type' header in the fetched data it uses the same logic | |
157 | * used for an InputStream with content-type. | |
158 | * <p> | |
159 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
160 | * <p> | |
161 | * | |
162 | * @param conn | |
163 | * URLConnection to create a Reader from. | |
164 | * @throws IOException | |
165 | * thrown if there is a problem reading the stream of the URLConnection. | |
166 | * | |
167 | */ | |
168 | public XmlStreamReader( URLConnection conn ) throws IOException | |
169 | { | |
170 | 0 | super( conn ); |
171 | 0 | } |
172 | ||
173 | /** | |
174 | * Creates a Reader using an InputStream an the associated content-type header. | |
175 | * <p> | |
176 | * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not | |
177 | * content-type encoding checks the XML prolog encoding. If there is not XML prolog encoding uses the default | |
178 | * encoding mandated by the content-type MIME type. | |
179 | * <p> | |
180 | * It does a lenient charset encoding detection, check the constructor with the lenient parameter for details. | |
181 | * <p> | |
182 | * | |
183 | * @param is | |
184 | * InputStream to create the reader from. | |
185 | * @param httpContentType | |
186 | * content-type header to use for the resolution of the charset encoding. | |
187 | * @throws IOException | |
188 | * thrown if there is a problem reading the file. | |
189 | * | |
190 | */ | |
191 | public XmlStreamReader( InputStream is, String httpContentType ) throws IOException | |
192 | { | |
193 | 0 | super( is, httpContentType ); |
194 | 0 | } |
195 | ||
196 | /** | |
197 | * Creates a Reader using an InputStream an the associated content-type header. This constructor is lenient | |
198 | * regarding the encoding detection. | |
199 | * <p> | |
200 | * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not | |
201 | * content-type encoding checks the XML prolog encoding. If there is not XML prolog encoding uses the default | |
202 | * encoding mandated by the content-type MIME type. | |
203 | * <p> | |
204 | * If lenient detection is indicated and the detection above fails as per specifications it then attempts the | |
205 | * following: | |
206 | * <p> | |
207 | * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. | |
208 | * <p> | |
209 | * Else if the XML prolog had a charset encoding that encoding is used. | |
210 | * <p> | |
211 | * Else if the content type had a charset encoding that encoding is used. | |
212 | * <p> | |
213 | * Else 'UTF-8' is used. | |
214 | * <p> | |
215 | * If lenient detection is indicated an XmlStreamReaderException is never thrown. | |
216 | * <p> | |
217 | * | |
218 | * @param is | |
219 | * InputStream to create the reader from. | |
220 | * @param httpContentType | |
221 | * content-type header to use for the resolution of the charset encoding. | |
222 | * @param lenient | |
223 | * indicates if the charset encoding detection should be relaxed. | |
224 | * @throws IOException | |
225 | * thrown if there is a problem reading the file. | |
226 | * @throws XmlStreamReaderException | |
227 | * thrown if the charset encoding could not be determined according to the specs. | |
228 | * | |
229 | */ | |
230 | public XmlStreamReader( InputStream is, String httpContentType, boolean lenient, String defaultEncoding ) | |
231 | throws IOException, XmlStreamReaderException | |
232 | { | |
233 | 0 | super( is, httpContentType, lenient, defaultEncoding ); |
234 | 0 | } |
235 | ||
236 | /** | |
237 | * Creates a Reader using an InputStream an the associated content-type header. This constructor is lenient | |
238 | * regarding the encoding detection. | |
239 | * <p> | |
240 | * First it checks if the stream has BOM. If there is not BOM checks the content-type encoding. If there is not | |
241 | * content-type encoding checks the XML prolog encoding. If there is not XML prolog encoding uses the default | |
242 | * encoding mandated by the content-type MIME type. | |
243 | * <p> | |
244 | * If lenient detection is indicated and the detection above fails as per specifications it then attempts the | |
245 | * following: | |
246 | * <p> | |
247 | * If the content type was 'text/html' it replaces it with 'text/xml' and tries the detection again. | |
248 | * <p> | |
249 | * Else if the XML prolog had a charset encoding that encoding is used. | |
250 | * <p> | |
251 | * Else if the content type had a charset encoding that encoding is used. | |
252 | * <p> | |
253 | * Else 'UTF-8' is used. | |
254 | * <p> | |
255 | * If lenient detection is indicated an XmlStreamReaderException is never thrown. | |
256 | * <p> | |
257 | * | |
258 | * @param is | |
259 | * InputStream to create the reader from. | |
260 | * @param httpContentType | |
261 | * content-type header to use for the resolution of the charset encoding. | |
262 | * @param lenient | |
263 | * indicates if the charset encoding detection should be relaxed. | |
264 | * @throws IOException | |
265 | * thrown if there is a problem reading the file. | |
266 | * @throws XmlStreamReaderException | |
267 | * thrown if the charset encoding could not be determined according to the specs. | |
268 | * | |
269 | */ | |
270 | public XmlStreamReader( InputStream is, String httpContentType, boolean lenient ) throws IOException, XmlStreamReaderException | |
271 | { | |
272 | 0 | super( is, httpContentType, lenient ); |
273 | 0 | } |
274 | } |