Coverage Report - org.apache.fulcrum.parser.DataStreamParser
 
Classes in this File Line Coverage Branch Coverage Complexity
DataStreamParser
0%
0/59
0%
0/24
2,7
 
 1  
 package org.apache.fulcrum.parser;
 2  
 
 3  
 
 4  
 /*
 5  
  * Licensed to the Apache Software Foundation (ASF) under one
 6  
  * or more contributor license agreements.  See the NOTICE file
 7  
  * distributed with this work for additional information
 8  
  * regarding copyright ownership.  The ASF licenses this file
 9  
  * to you under the Apache License, Version 2.0 (the
 10  
  * "License"); you may not use this file except in compliance
 11  
  * with the License.  You may obtain a copy of the License at
 12  
  *
 13  
  *   http://www.apache.org/licenses/LICENSE-2.0
 14  
  *
 15  
  * Unless required by applicable law or agreed to in writing,
 16  
  * software distributed under the License is distributed on an
 17  
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 18  
  * KIND, either express or implied.  See the License for the
 19  
  * specific language governing permissions and limitations
 20  
  * under the License.
 21  
  */
 22  
 
 23  
 
 24  
 import java.io.BufferedReader;
 25  
 import java.io.IOException;
 26  
 import java.io.InputStreamReader;
 27  
 import java.io.Reader;
 28  
 import java.io.StreamTokenizer;
 29  
 import java.util.ArrayList;
 30  
 import java.util.Iterator;
 31  
 import java.util.List;
 32  
 import java.util.NoSuchElementException;
 33  
 
 34  
 import org.apache.avalon.framework.logger.LogEnabled;
 35  
 import org.apache.avalon.framework.logger.Logger;
 36  
 
 37  
 /**
 38  
  * DataStreamParser is used to parse a stream with a fixed format and
 39  
  * generate ValueParser objects which can be used to extract the values
 40  
  * in the desired type.
 41  
  *
 42  
  * <p>The class itself is abstract - a concrete subclass which implements
 43  
  * the initTokenizer method such as CSVParser or TSVParser is required
 44  
  * to use the functionality.
 45  
  *
 46  
  * <p>The class implements the java.util.Iterator interface for convenience.
 47  
  * This allows simple use in a Velocity template for example:
 48  
  *
 49  
  * <pre>
 50  
  * #foreach ($row in $datastream)
 51  
  *   Name: $row.Name
 52  
  *   Description: $row.Description
 53  
  * #end
 54  
  * </pre>
 55  
  *
 56  
  * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
 57  
  * @version $Id: DataStreamParser.java 1844836 2018-10-25 14:59:49Z painter $
 58  
  */
 59  0
 public abstract class DataStreamParser
 60  
     implements Iterator<ValueParser>, LogEnabled
 61  
 {
 62  
     /**
 63  
      * The list of column names.
 64  
      */
 65  
     private List<String>    columnNames;
 66  
 
 67  
     /**
 68  
      * The stream tokenizer for reading values from the input reader.
 69  
      */
 70  
     private final StreamTokenizer tokenizer;
 71  
 
 72  
     /**
 73  
      * The parameter parser holding the values of columns for the current line.
 74  
      */
 75  
     private ValueParser     lineValues;
 76  
 
 77  
     /**
 78  
      * Indicates whether or not the tokenizer has read anything yet.
 79  
      */
 80  0
     private boolean         neverRead = true;
 81  
 
 82  
     /**
 83  
      * The character encoding of the input
 84  
      */
 85  
     private String          characterEncoding;
 86  
 
 87  
     /**
 88  
      * Logger to use
 89  
      */
 90  
     protected Logger log;
 91  
 
 92  
     /**
 93  
      * Create a new DataStreamParser instance. Requires a Reader to read the
 94  
      * comma-separated values from, a list of column names and a
 95  
      * character encoding.
 96  
      *
 97  
      * @param in the input reader.
 98  
      * @param columnNames a list of column names.
 99  
      * @param characterEncoding the character encoding of the input.
 100  
      */
 101  
     public DataStreamParser(Reader in, List<String> columnNames,
 102  
             String characterEncoding)
 103  0
     {
 104  0
         this.columnNames = columnNames;
 105  0
         this.characterEncoding = characterEncoding;
 106  
 
 107  0
         if (this.characterEncoding == null)
 108  
         {
 109  
             // try and get the characterEncoding from the reader
 110  0
             this.characterEncoding = "US-ASCII";
 111  
             try
 112  
             {
 113  0
                 this.characterEncoding = ((InputStreamReader)in).getEncoding();
 114  
             }
 115  0
             catch (ClassCastException e)
 116  
             {
 117  
                 // ignore
 118  0
             }
 119  
         }
 120  
 
 121  0
         tokenizer = new StreamTokenizer(new BufferedReader(in));
 122  0
         initTokenizer(tokenizer);
 123  0
     }
 124  
 
 125  
     /**
 126  
      * Initialize the StreamTokenizer instance used to read the lines
 127  
      * from the input reader. This must be implemented in subclasses to
 128  
      * set up the tokenizing properties.
 129  
      * 
 130  
      * @param tokenizer the StreamTokenizer to use
 131  
      */
 132  
     protected abstract void initTokenizer(StreamTokenizer tokenizer);
 133  
 
 134  
     /**
 135  
      * Provide a logger
 136  
      *
 137  
      * @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
 138  
      */
 139  
     public void enableLogging(Logger logger)
 140  
     {
 141  0
         this.log = logger.getChildLogger("DataStreamParser");
 142  0
     }
 143  
 
 144  
     /**
 145  
      * Set the list of column names explicitly.
 146  
      *
 147  
      * @param columnNames A list of column names.
 148  
      */
 149  
     public void setColumnNames(List<String> columnNames)
 150  
     {
 151  0
         this.columnNames = columnNames;
 152  0
     }
 153  
 
 154  
     /**
 155  
      * Read the list of column names from the input reader using the
 156  
      * tokenizer.
 157  
      *
 158  
      * @exception IOException an IOException occurred.
 159  
      */
 160  
     public void readColumnNames()
 161  
         throws IOException
 162  
     {
 163  0
         columnNames = new ArrayList<String>();
 164  
 
 165  0
         neverRead = false;
 166  0
         tokenizer.nextToken();
 167  0
         while (tokenizer.ttype == StreamTokenizer.TT_WORD
 168  
                || tokenizer.ttype == '"')
 169  
         {
 170  0
             columnNames.add(tokenizer.sval);
 171  0
             tokenizer.nextToken();
 172  
         }
 173  0
     }
 174  
 
 175  
     /**
 176  
      * Determine whether a further row of values exists in the input.
 177  
      *
 178  
      * @return true if the input has more rows.
 179  
      * @exception IOException an IOException occurred.
 180  
      */
 181  
     public boolean hasNextRow()
 182  
         throws IOException
 183  
     {
 184  
         // check for end of line ensures that an empty last line doesn't
 185  
         // give a false positive for hasNextRow
 186  0
         if (neverRead || tokenizer.ttype == StreamTokenizer.TT_EOL)
 187  
         {
 188  0
             tokenizer.nextToken();
 189  0
             tokenizer.pushBack();
 190  0
             neverRead = false;
 191  
         }
 192  0
         return tokenizer.ttype != StreamTokenizer.TT_EOF;
 193  
     }
 194  
 
 195  
     /**
 196  
      * Returns a ValueParser object containing the next row of values.
 197  
      *
 198  
      * @return a ValueParser object.
 199  
      * @exception IOException an IOException occurred.
 200  
      * @exception NoSuchElementException there are no more rows in the input.
 201  
      */
 202  
     public ValueParser nextRow()
 203  
         throws IOException, NoSuchElementException
 204  
     {
 205  0
         if (!hasNextRow())
 206  
         {
 207  0
             throw new NoSuchElementException();
 208  
         }
 209  
 
 210  0
         if (lineValues == null)
 211  
         {
 212  0
             lineValues = new BaseValueParser(characterEncoding);
 213  
         }
 214  
         else
 215  
         {
 216  0
             lineValues.clear();
 217  
         }
 218  
 
 219  0
         Iterator<String> it = columnNames.iterator();
 220  0
         tokenizer.nextToken();
 221  0
         while (tokenizer.ttype == StreamTokenizer.TT_WORD
 222  
                || tokenizer.ttype == '"')
 223  
         {
 224  
             // note this means that if there are more values than
 225  
             // column names, the extra values are discarded.
 226  0
             if (it.hasNext())
 227  
             {
 228  0
                 String colname = it.next().toString();
 229  0
                 String colval  = tokenizer.sval;
 230  0
                 if (log.isDebugEnabled())
 231  
                 {
 232  0
                     log.debug("DataStreamParser.nextRow(): " +
 233  
                               colname + '=' + colval);
 234  
                 }
 235  0
                 lineValues.add(colname, colval);
 236  
             }
 237  0
             tokenizer.nextToken();
 238  
         }
 239  
 
 240  0
         return lineValues;
 241  
     }
 242  
 
 243  
     /**
 244  
      * Determine whether a further row of values exists in the input.
 245  
      *
 246  
      * @return true if the input has more rows.
 247  
      */
 248  
     public boolean hasNext()
 249  
     {
 250  0
         boolean hasNext = false;
 251  
 
 252  
         try
 253  
         {
 254  0
             hasNext = hasNextRow();
 255  
         }
 256  0
         catch (IOException e)
 257  
         {
 258  0
             log.error("IOException in CSVParser.hasNext", e);
 259  0
         }
 260  
 
 261  0
         return hasNext;
 262  
     }
 263  
 
 264  
     /**
 265  
      * Returns a ValueParser object containing the next row of values.
 266  
      *
 267  
      * @return a ValueParser object as an Object.
 268  
      * @exception NoSuchElementException there are no more rows in the input
 269  
      *                                   or an IOException occurred.
 270  
      */
 271  
     public ValueParser next()
 272  
         throws NoSuchElementException
 273  
     {
 274  0
         ValueParser nextRow = null;
 275  
 
 276  
         try
 277  
         {
 278  0
             nextRow = nextRow();
 279  
         }
 280  0
         catch (IOException e)
 281  
         {
 282  0
             log.error("IOException in CSVParser.next", e);
 283  0
             throw new NoSuchElementException();
 284  0
         }
 285  
 
 286  0
         return nextRow;
 287  
     }
 288  
 
 289  
     /**
 290  
      * The optional Iterator.remove method is not supported.
 291  
      *
 292  
      * @exception UnsupportedOperationException the operation is not supported.
 293  
      */
 294  
     public void remove()
 295  
         throws UnsupportedOperationException
 296  
     {
 297  0
         throw new UnsupportedOperationException();
 298  
     }
 299  
 }