Classes in this File | Line Coverage | Branch Coverage | Complexity | ||||
DataStreamParser |
|
| 2.7;2,7 |
1 | package org.apache.fulcrum.parser; | |
2 | ||
3 | ||
4 | /* | |
5 | * Licensed to the Apache Software Foundation (ASF) under one | |
6 | * or more contributor license agreements. See the NOTICE file | |
7 | * distributed with this work for additional information | |
8 | * regarding copyright ownership. The ASF licenses this file | |
9 | * to you under the Apache License, Version 2.0 (the | |
10 | * "License"); you may not use this file except in compliance | |
11 | * with the License. You may obtain a copy of the License at | |
12 | * | |
13 | * http://www.apache.org/licenses/LICENSE-2.0 | |
14 | * | |
15 | * Unless required by applicable law or agreed to in writing, | |
16 | * software distributed under the License is distributed on an | |
17 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
18 | * KIND, either express or implied. See the License for the | |
19 | * specific language governing permissions and limitations | |
20 | * under the License. | |
21 | */ | |
22 | ||
23 | ||
24 | import java.io.BufferedReader; | |
25 | import java.io.IOException; | |
26 | import java.io.InputStreamReader; | |
27 | import java.io.Reader; | |
28 | import java.io.StreamTokenizer; | |
29 | import java.util.ArrayList; | |
30 | import java.util.Iterator; | |
31 | import java.util.List; | |
32 | import java.util.NoSuchElementException; | |
33 | ||
34 | import org.apache.avalon.framework.logger.LogEnabled; | |
35 | import org.apache.avalon.framework.logger.Logger; | |
36 | ||
37 | /** | |
38 | * DataStreamParser is used to parse a stream with a fixed format and | |
39 | * generate ValueParser objects which can be used to extract the values | |
40 | * in the desired type. | |
41 | * | |
42 | * <p>The class itself is abstract - a concrete subclass which implements | |
43 | * the initTokenizer method such as CSVParser or TSVParser is required | |
44 | * to use the functionality. | |
45 | * | |
46 | * <p>The class implements the java.util.Iterator interface for convenience. | |
47 | * This allows simple use in a Velocity template for example: | |
48 | * | |
49 | * <pre> | |
50 | * #foreach ($row in $datastream) | |
51 | * Name: $row.Name | |
52 | * Description: $row.Description | |
53 | * #end | |
54 | * </pre> | |
55 | * | |
56 | * @author <a href="mailto:sean@informage.net">Sean Legassick</a> | |
57 | * @version $Id: DataStreamParser.java 1844836 2018-10-25 14:59:49Z painter $ | |
58 | */ | |
59 | 0 | public abstract class DataStreamParser |
60 | implements Iterator<ValueParser>, LogEnabled | |
61 | { | |
62 | /** | |
63 | * The list of column names. | |
64 | */ | |
65 | private List<String> columnNames; | |
66 | ||
67 | /** | |
68 | * The stream tokenizer for reading values from the input reader. | |
69 | */ | |
70 | private final StreamTokenizer tokenizer; | |
71 | ||
72 | /** | |
73 | * The parameter parser holding the values of columns for the current line. | |
74 | */ | |
75 | private ValueParser lineValues; | |
76 | ||
77 | /** | |
78 | * Indicates whether or not the tokenizer has read anything yet. | |
79 | */ | |
80 | 0 | private boolean neverRead = true; |
81 | ||
82 | /** | |
83 | * The character encoding of the input | |
84 | */ | |
85 | private String characterEncoding; | |
86 | ||
87 | /** | |
88 | * Logger to use | |
89 | */ | |
90 | protected Logger log; | |
91 | ||
92 | /** | |
93 | * Create a new DataStreamParser instance. Requires a Reader to read the | |
94 | * comma-separated values from, a list of column names and a | |
95 | * character encoding. | |
96 | * | |
97 | * @param in the input reader. | |
98 | * @param columnNames a list of column names. | |
99 | * @param characterEncoding the character encoding of the input. | |
100 | */ | |
101 | public DataStreamParser(Reader in, List<String> columnNames, | |
102 | String characterEncoding) | |
103 | 0 | { |
104 | 0 | this.columnNames = columnNames; |
105 | 0 | this.characterEncoding = characterEncoding; |
106 | ||
107 | 0 | if (this.characterEncoding == null) |
108 | { | |
109 | // try and get the characterEncoding from the reader | |
110 | 0 | this.characterEncoding = "US-ASCII"; |
111 | try | |
112 | { | |
113 | 0 | this.characterEncoding = ((InputStreamReader)in).getEncoding(); |
114 | } | |
115 | 0 | catch (ClassCastException e) |
116 | { | |
117 | // ignore | |
118 | 0 | } |
119 | } | |
120 | ||
121 | 0 | tokenizer = new StreamTokenizer(new BufferedReader(in)); |
122 | 0 | initTokenizer(tokenizer); |
123 | 0 | } |
124 | ||
125 | /** | |
126 | * Initialize the StreamTokenizer instance used to read the lines | |
127 | * from the input reader. This must be implemented in subclasses to | |
128 | * set up the tokenizing properties. | |
129 | * | |
130 | * @param tokenizer the StreamTokenizer to use | |
131 | */ | |
132 | protected abstract void initTokenizer(StreamTokenizer tokenizer); | |
133 | ||
134 | /** | |
135 | * Provide a logger | |
136 | * | |
137 | * @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger) | |
138 | */ | |
139 | public void enableLogging(Logger logger) | |
140 | { | |
141 | 0 | this.log = logger.getChildLogger("DataStreamParser"); |
142 | 0 | } |
143 | ||
144 | /** | |
145 | * Set the list of column names explicitly. | |
146 | * | |
147 | * @param columnNames A list of column names. | |
148 | */ | |
149 | public void setColumnNames(List<String> columnNames) | |
150 | { | |
151 | 0 | this.columnNames = columnNames; |
152 | 0 | } |
153 | ||
154 | /** | |
155 | * Read the list of column names from the input reader using the | |
156 | * tokenizer. | |
157 | * | |
158 | * @exception IOException an IOException occurred. | |
159 | */ | |
160 | public void readColumnNames() | |
161 | throws IOException | |
162 | { | |
163 | 0 | columnNames = new ArrayList<String>(); |
164 | ||
165 | 0 | neverRead = false; |
166 | 0 | tokenizer.nextToken(); |
167 | 0 | while (tokenizer.ttype == StreamTokenizer.TT_WORD |
168 | || tokenizer.ttype == '"') | |
169 | { | |
170 | 0 | columnNames.add(tokenizer.sval); |
171 | 0 | tokenizer.nextToken(); |
172 | } | |
173 | 0 | } |
174 | ||
175 | /** | |
176 | * Determine whether a further row of values exists in the input. | |
177 | * | |
178 | * @return true if the input has more rows. | |
179 | * @exception IOException an IOException occurred. | |
180 | */ | |
181 | public boolean hasNextRow() | |
182 | throws IOException | |
183 | { | |
184 | // check for end of line ensures that an empty last line doesn't | |
185 | // give a false positive for hasNextRow | |
186 | 0 | if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL) |
187 | { | |
188 | 0 | tokenizer.nextToken(); |
189 | 0 | tokenizer.pushBack(); |
190 | 0 | neverRead = false; |
191 | } | |
192 | 0 | return tokenizer.ttype != StreamTokenizer.TT_EOF; |
193 | } | |
194 | ||
195 | /** | |
196 | * Returns a ValueParser object containing the next row of values. | |
197 | * | |
198 | * @return a ValueParser object. | |
199 | * @exception IOException an IOException occurred. | |
200 | * @exception NoSuchElementException there are no more rows in the input. | |
201 | */ | |
202 | public ValueParser nextRow() | |
203 | throws IOException, NoSuchElementException | |
204 | { | |
205 | 0 | if (!hasNextRow()) |
206 | { | |
207 | 0 | throw new NoSuchElementException(); |
208 | } | |
209 | ||
210 | 0 | if (lineValues == null) |
211 | { | |
212 | 0 | lineValues = new BaseValueParser(characterEncoding); |
213 | } | |
214 | else | |
215 | { | |
216 | 0 | lineValues.clear(); |
217 | } | |
218 | ||
219 | 0 | Iterator<String> it = columnNames.iterator(); |
220 | 0 | tokenizer.nextToken(); |
221 | 0 | while (tokenizer.ttype == StreamTokenizer.TT_WORD |
222 | || tokenizer.ttype == '"') | |
223 | { | |
224 | // note this means that if there are more values than | |
225 | // column names, the extra values are discarded. | |
226 | 0 | if (it.hasNext()) |
227 | { | |
228 | 0 | String colname = it.next().toString(); |
229 | 0 | String colval = tokenizer.sval; |
230 | 0 | if (log.isDebugEnabled()) |
231 | { | |
232 | 0 | log.debug("DataStreamParser.nextRow(): " + |
233 | colname + '=' + colval); | |
234 | } | |
235 | 0 | lineValues.add(colname, colval); |
236 | } | |
237 | 0 | tokenizer.nextToken(); |
238 | } | |
239 | ||
240 | 0 | return lineValues; |
241 | } | |
242 | ||
243 | /** | |
244 | * Determine whether a further row of values exists in the input. | |
245 | * | |
246 | * @return true if the input has more rows. | |
247 | */ | |
248 | public boolean hasNext() | |
249 | { | |
250 | 0 | boolean hasNext = false; |
251 | ||
252 | try | |
253 | { | |
254 | 0 | hasNext = hasNextRow(); |
255 | } | |
256 | 0 | catch (IOException e) |
257 | { | |
258 | 0 | log.error("IOException in CSVParser.hasNext", e); |
259 | 0 | } |
260 | ||
261 | 0 | return hasNext; |
262 | } | |
263 | ||
264 | /** | |
265 | * Returns a ValueParser object containing the next row of values. | |
266 | * | |
267 | * @return a ValueParser object as an Object. | |
268 | * @exception NoSuchElementException there are no more rows in the input | |
269 | * or an IOException occurred. | |
270 | */ | |
271 | public ValueParser next() | |
272 | throws NoSuchElementException | |
273 | { | |
274 | 0 | ValueParser nextRow = null; |
275 | ||
276 | try | |
277 | { | |
278 | 0 | nextRow = nextRow(); |
279 | } | |
280 | 0 | catch (IOException e) |
281 | { | |
282 | 0 | log.error("IOException in CSVParser.next", e); |
283 | 0 | throw new NoSuchElementException(); |
284 | 0 | } |
285 | ||
286 | 0 | return nextRow; |
287 | } | |
288 | ||
289 | /** | |
290 | * The optional Iterator.remove method is not supported. | |
291 | * | |
292 | * @exception UnsupportedOperationException the operation is not supported. | |
293 | */ | |
294 | public void remove() | |
295 | throws UnsupportedOperationException | |
296 | { | |
297 | 0 | throw new UnsupportedOperationException(); |
298 | } | |
299 | } |