Coverage Report

Coverage Report - org.apache.fulcrum.parser.DataStreamParser

Classes in this File

Line Coverage

Branch Coverage

Complexity

DataStreamParser

0/59

0/24

2,7

 package org.apache.fulcrum.parser;
 
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */
 
 
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.StreamTokenizer;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
 
 import org.apache.avalon.framework.logger.LogEnabled;
 import org.apache.avalon.framework.logger.Logger;
 
 /**
  * DataStreamParser is used to parse a stream with a fixed format and
  * generate ValueParser objects which can be used to extract the values
  * in the desired type.
  *
  * <p>The class itself is abstract - a concrete subclass which implements
  * the initTokenizer method such as CSVParser or TSVParser is required
  * to use the functionality.
  *
  * <p>The class implements the java.util.Iterator interface for convenience.
  * This allows simple use in a Velocity template for example:
  *
  * <pre>
  * #foreach ($row in $datastream)
  *   Name: $row.Name
  *   Description: $row.Description
  * #end
  * </pre>
  *
  * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
  * @version $Id: DataStreamParser.java 1844836 2018-10-25 14:59:49Z painter $
  */
 public abstract class DataStreamParser
     implements Iterator<ValueParser>, LogEnabled
 {
     /**
      * The list of column names.
      */
     private List<String>    columnNames;
 
     /**
      * The stream tokenizer for reading values from the input reader.
      */
     private final StreamTokenizer tokenizer;
 
     /**
      * The parameter parser holding the values of columns for the current line.
      */
     private ValueParser     lineValues;
 
     /**
      * Indicates whether or not the tokenizer has read anything yet.
      */
     private boolean         neverRead = true;
 
     /**
      * The character encoding of the input
      */
     private String          characterEncoding;
 
     /**
      * Logger to use
      */
     protected Logger log;
 
     /**
      * Create a new DataStreamParser instance. Requires a Reader to read the
      * comma-separated values from, a list of column names and a
      * character encoding.
      *
      * @param in the input reader.
      * @param columnNames a list of column names.
      * @param characterEncoding the character encoding of the input.
      */
     public DataStreamParser(Reader in, List<String> columnNames,
             String characterEncoding)
     {
         this.columnNames = columnNames;
         this.characterEncoding = characterEncoding;
 
         if (this.characterEncoding == null)
         {
             // try and get the characterEncoding from the reader
             this.characterEncoding = "US-ASCII";
             try
             {
                 this.characterEncoding = ((InputStreamReader)in).getEncoding();
             }
             catch (ClassCastException e)
             {
                 // ignore
             }
         }
 
         tokenizer = new StreamTokenizer(new BufferedReader(in));
         initTokenizer(tokenizer);
     }
 
     /**
      * Initialize the StreamTokenizer instance used to read the lines
      * from the input reader. This must be implemented in subclasses to
      * set up the tokenizing properties.
      * 
      * @param tokenizer the StreamTokenizer to use
      */
     protected abstract void initTokenizer(StreamTokenizer tokenizer);
 
     /**
      * Provide a logger
      *
      * @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
      */
     public void enableLogging(Logger logger)
     {
         this.log = logger.getChildLogger("DataStreamParser");
     }
 
     /**
      * Set the list of column names explicitly.
      *
      * @param columnNames A list of column names.
      */
     public void setColumnNames(List<String> columnNames)
     {
         this.columnNames = columnNames;
     }
 
     /**
      * Read the list of column names from the input reader using the
      * tokenizer.
      *
      * @exception IOException an IOException occurred.
      */
     public void readColumnNames()
         throws IOException
     {
         columnNames = new ArrayList<String>();
 
         neverRead = false;
         tokenizer.nextToken();
         while (tokenizer.ttype == StreamTokenizer.TT_WORD
                || tokenizer.ttype == '"')
         {
             columnNames.add(tokenizer.sval);
             tokenizer.nextToken();
         }
     }
 
     /**
      * Determine whether a further row of values exists in the input.
      *
      * @return true if the input has more rows.
      * @exception IOException an IOException occurred.
      */
     public boolean hasNextRow()
         throws IOException
     {
         // check for end of line ensures that an empty last line doesn't
         // give a false positive for hasNextRow
         if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
         {
             tokenizer.nextToken();
             tokenizer.pushBack();
             neverRead = false;
         }
         return tokenizer.ttype != StreamTokenizer.TT_EOF;
     }
 
     /**
      * Returns a ValueParser object containing the next row of values.
      *
      * @return a ValueParser object.
      * @exception IOException an IOException occurred.
      * @exception NoSuchElementException there are no more rows in the input.
      */
     public ValueParser nextRow()
         throws IOException, NoSuchElementException
     {
         if (!hasNextRow())
         {
             throw new NoSuchElementException();
         }
 
         if (lineValues == null)
         {
             lineValues = new BaseValueParser(characterEncoding);
         }
         else
         {
             lineValues.clear();
         }
 
         Iterator<String> it = columnNames.iterator();
         tokenizer.nextToken();
         while (tokenizer.ttype == StreamTokenizer.TT_WORD
                || tokenizer.ttype == '"')
         {
             // note this means that if there are more values than
             // column names, the extra values are discarded.
             if (it.hasNext())
             {
                 String colname = it.next().toString();
                 String colval  = tokenizer.sval;
                 if (log.isDebugEnabled())
                 {
                     log.debug("DataStreamParser.nextRow(): " +
                               colname + '=' + colval);
                 }
                 lineValues.add(colname, colval);
             }
             tokenizer.nextToken();
         }
 
         return lineValues;
     }
 
     /**
      * Determine whether a further row of values exists in the input.
      *
      * @return true if the input has more rows.
      */
     public boolean hasNext()
     {
         boolean hasNext = false;
 
         try
         {
             hasNext = hasNextRow();
         }
         catch (IOException e)
         {
             log.error("IOException in CSVParser.hasNext", e);
         }
 
         return hasNext;
     }
 
     /**
      * Returns a ValueParser object containing the next row of values.
      *
      * @return a ValueParser object as an Object.
      * @exception NoSuchElementException there are no more rows in the input
      *                                   or an IOException occurred.
      */
     public ValueParser next()
         throws NoSuchElementException
     {
         ValueParser nextRow = null;
 
         try
         {
             nextRow = nextRow();
         }
         catch (IOException e)
         {
             log.error("IOException in CSVParser.next", e);
             throw new NoSuchElementException();
         }
 
         return nextRow;
     }
 
     /**
      * The optional Iterator.remove method is not supported.
      *
      * @exception UnsupportedOperationException the operation is not supported.
      */
     public void remove()
         throws UnsupportedOperationException
     {
         throw new UnsupportedOperationException();
     }
 }

1		package org.apache.fulcrum.parser;
2
3
4		/*
5		* Licensed to the Apache Software Foundation (ASF) under one
6		* or more contributor license agreements. See the NOTICE file
7		* distributed with this work for additional information
8		* regarding copyright ownership. The ASF licenses this file
9		* to you under the Apache License, Version 2.0 (the
10		* "License"); you may not use this file except in compliance
11		* with the License. You may obtain a copy of the License at
12		*
13		* http://www.apache.org/licenses/LICENSE-2.0
14		*
15		* Unless required by applicable law or agreed to in writing,
16		* software distributed under the License is distributed on an
17		* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18		* KIND, either express or implied. See the License for the
19		* specific language governing permissions and limitations
20		* under the License.
21		*/
22
23
24		import java.io.BufferedReader;
25		import java.io.IOException;
26		import java.io.InputStreamReader;
27		import java.io.Reader;
28		import java.io.StreamTokenizer;
29		import java.util.ArrayList;
30		import java.util.Iterator;
31		import java.util.List;
32		import java.util.NoSuchElementException;
33
34		import org.apache.avalon.framework.logger.LogEnabled;
35		import org.apache.avalon.framework.logger.Logger;
36
37		/**
38		* DataStreamParser is used to parse a stream with a fixed format and
39		* generate ValueParser objects which can be used to extract the values
40		* in the desired type.
41		*
42		* <p>The class itself is abstract - a concrete subclass which implements
43		* the initTokenizer method such as CSVParser or TSVParser is required
44		* to use the functionality.
45		*
46		* <p>The class implements the java.util.Iterator interface for convenience.
47		* This allows simple use in a Velocity template for example:
48		*
49		* <pre>
50		* #foreach ($row in $datastream)
51		* Name: $row.Name
52		* Description: $row.Description
53		* #end
54		* </pre>
55		*
56		* @author <a href="mailto:sean@informage.net">Sean Legassick</a>
57		* @version $Id: DataStreamParser.java 1844836 2018-10-25 14:59:49Z painter $
58		*/
59	0	public abstract class DataStreamParser
60		implements Iterator<ValueParser>, LogEnabled
61		{
62		/**
63		* The list of column names.
64		*/
65		private List<String> columnNames;
66
67		/**
68		* The stream tokenizer for reading values from the input reader.
69		*/
70		private final StreamTokenizer tokenizer;
71
72		/**
73		* The parameter parser holding the values of columns for the current line.
74		*/
75		private ValueParser lineValues;
76
77		/**
78		* Indicates whether or not the tokenizer has read anything yet.
79		*/
80	0	private boolean neverRead = true;
81
82		/**
83		* The character encoding of the input
84		*/
85		private String characterEncoding;
86
87		/**
88		* Logger to use
89		*/
90		protected Logger log;
91
92		/**
93		* Create a new DataStreamParser instance. Requires a Reader to read the
94		* comma-separated values from, a list of column names and a
95		* character encoding.
96		*
97		* @param in the input reader.
98		* @param columnNames a list of column names.
99		* @param characterEncoding the character encoding of the input.
100		*/
101		public DataStreamParser(Reader in, List<String> columnNames,
102		String characterEncoding)
103	0	{
104	0	this.columnNames = columnNames;
105	0	this.characterEncoding = characterEncoding;
106
107	0	if (this.characterEncoding == null)
108		{
109		// try and get the characterEncoding from the reader
110	0	this.characterEncoding = "US-ASCII";
111		try
112		{
113	0	this.characterEncoding = ((InputStreamReader)in).getEncoding();
114		}
115	0	catch (ClassCastException e)
116		{
117		// ignore
118	0	}
119		}
120
121	0	tokenizer = new StreamTokenizer(new BufferedReader(in));
122	0	initTokenizer(tokenizer);
123	0	}
124
125		/**
126		* Initialize the StreamTokenizer instance used to read the lines
127		* from the input reader. This must be implemented in subclasses to
128		* set up the tokenizing properties.
129		*
130		* @param tokenizer the StreamTokenizer to use
131		*/
132		protected abstract void initTokenizer(StreamTokenizer tokenizer);
133
134		/**
135		* Provide a logger
136		*
137		* @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
138		*/
139		public void enableLogging(Logger logger)
140		{
141	0	this.log = logger.getChildLogger("DataStreamParser");
142	0	}
143
144		/**
145		* Set the list of column names explicitly.
146		*
147		* @param columnNames A list of column names.
148		*/
149		public void setColumnNames(List<String> columnNames)
150		{
151	0	this.columnNames = columnNames;
152	0	}
153
154		/**
155		* Read the list of column names from the input reader using the
156		* tokenizer.
157		*
158		* @exception IOException an IOException occurred.
159		*/
160		public void readColumnNames()
161		throws IOException
162		{
163	0	columnNames = new ArrayList<String>();
164
165	0	neverRead = false;
166	0	tokenizer.nextToken();
167	0	while (tokenizer.ttype == StreamTokenizer.TT_WORD
168		\|\| tokenizer.ttype == '"')
169		{
170	0	columnNames.add(tokenizer.sval);
171	0	tokenizer.nextToken();
172		}
173	0	}
174
175		/**
176		* Determine whether a further row of values exists in the input.
177		*
178		* @return true if the input has more rows.
179		* @exception IOException an IOException occurred.
180		*/
181		public boolean hasNextRow()
182		throws IOException
183		{
184		// check for end of line ensures that an empty last line doesn't
185		// give a false positive for hasNextRow
186	0	if (neverRead \|\| tokenizer.ttype == StreamTokenizer.TT_EOL)
187		{
188	0	tokenizer.nextToken();
189	0	tokenizer.pushBack();
190	0	neverRead = false;
191		}
192	0	return tokenizer.ttype != StreamTokenizer.TT_EOF;
193		}
194
195		/**
196		* Returns a ValueParser object containing the next row of values.
197		*
198		* @return a ValueParser object.
199		* @exception IOException an IOException occurred.
200		* @exception NoSuchElementException there are no more rows in the input.
201		*/
202		public ValueParser nextRow()
203		throws IOException, NoSuchElementException
204		{
205	0	if (!hasNextRow())
206		{
207	0	throw new NoSuchElementException();
208		}
209
210	0	if (lineValues == null)
211		{
212	0	lineValues = new BaseValueParser(characterEncoding);
213		}
214		else
215		{
216	0	lineValues.clear();
217		}
218
219	0	Iterator<String> it = columnNames.iterator();
220	0	tokenizer.nextToken();
221	0	while (tokenizer.ttype == StreamTokenizer.TT_WORD
222		\|\| tokenizer.ttype == '"')
223		{
224		// note this means that if there are more values than
225		// column names, the extra values are discarded.
226	0	if (it.hasNext())
227		{
228	0	String colname = it.next().toString();
229	0	String colval = tokenizer.sval;
230	0	if (log.isDebugEnabled())
231		{
232	0	log.debug("DataStreamParser.nextRow(): " +
233		colname + '=' + colval);
234		}
235	0	lineValues.add(colname, colval);
236		}
237	0	tokenizer.nextToken();
238		}
239
240	0	return lineValues;
241		}
242
243		/**
244		* Determine whether a further row of values exists in the input.
245		*
246		* @return true if the input has more rows.
247		*/
248		public boolean hasNext()
249		{
250	0	boolean hasNext = false;
251
252		try
253		{
254	0	hasNext = hasNextRow();
255		}
256	0	catch (IOException e)
257		{
258	0	log.error("IOException in CSVParser.hasNext", e);
259	0	}
260
261	0	return hasNext;
262		}
263
264		/**
265		* Returns a ValueParser object containing the next row of values.
266		*
267		* @return a ValueParser object as an Object.
268		* @exception NoSuchElementException there are no more rows in the input
269		* or an IOException occurred.
270		*/
271		public ValueParser next()
272		throws NoSuchElementException
273		{
274	0	ValueParser nextRow = null;
275
276		try
277		{
278	0	nextRow = nextRow();
279		}
280	0	catch (IOException e)
281		{
282	0	log.error("IOException in CSVParser.next", e);
283	0	throw new NoSuchElementException();
284	0	}
285
286	0	return nextRow;
287		}
288
289		/**
290		* The optional Iterator.remove method is not supported.
291		*
292		* @exception UnsupportedOperationException the operation is not supported.
293		*/
294		public void remove()
295		throws UnsupportedOperationException
296		{
297	0	throw new UnsupportedOperationException();
298		}
299		}