View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one or more
3    * contributor license agreements.  See the NOTICE file distributed with
4    * this work for additional information regarding copyright ownership.
5    * The ASF licenses this file to You under the Apache License, Version 2.0
6    * (the "License"); you may not use this file except in compliance with
7    * the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   * Unless required by applicable law or agreed to in writing, software
12   * distributed under the License is distributed on an "AS IS" BASIS,
13   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   * See the License for the specific language governing permissions and
15   * limitations under the License.
16   */
17  package org.apache.commons.geometry.io.core.internal;
18  
19  import java.io.Reader;
20  import java.util.Arrays;
21  import java.util.List;
22  import java.util.function.IntConsumer;
23  import java.util.function.IntPredicate;
24  
25  /** Class providing basic text parsing capabilities. The goals of this class are to
26   * (1) provide a simple, flexible API for performing common text parsing operations and
27   * (2) provide a mechanism for creating consistent and informative parsing errors.
28   * This class is not intended as a replacement for grammar-based parsers and/or lexers.
29   */
30  public class SimpleTextParser {
31  
32      /** Constant indicating that the end of the input has been reached. */
33      private static final int EOF = -1;
34  
35      /** Carriage return character. */
36      private static final char CR = '\r';
37  
38      /** Line feed character. */
39      private static final char LF = '\n';
40  
41      /** Default value for the max string length property. */
42      private static final int DEFAULT_MAX_STRING_LENGTH = 1024;
43  
44      /** Error message used when a string exceeds the configured maximum length. */
45      private static final String STRING_LENGTH_ERR_MSG = "string length exceeds maximum value of ";
46  
47      /** Initial token position number. */
48      private static final int INITIAL_TOKEN_POS = -1;
49  
50      /** Int consumer that does nothing. */
51      private static final IntConsumer NOOP_CONSUMER = ch -> { };
52  
53      /** Current line number; line numbers start counting at 1. */
54      private int lineNumber = 1;
55  
56      /** Current character column on the current line; column numbers start at 1.*/
57      private int columnNumber = 1;
58  
59      /** Maximum length for strings returned by this instance. */
60      private int maxStringLength = DEFAULT_MAX_STRING_LENGTH;
61  
62      /** The current token. */
63      private String currentToken;
64  
65      /** The line number that the current token started on. */
66      private int currentTokenLineNumber = INITIAL_TOKEN_POS;
67  
68      /** The character number that the current token started on. */
69      private int currentTokenColumnNumber = INITIAL_TOKEN_POS;
70  
71      /** Flag used to indicate that at least one token has been read from the stream. */
72      private boolean hasSetToken;
73  
74      /** Character read buffer used to access the character stream. */
75      private final CharReadBuffer buffer;
76  
77      /** Construct a new instance that reads characters from the given reader. The
78       * reader will not be closed.
79       * @param reader reader instance to read characters from
80       */
81      public SimpleTextParser(final Reader reader) {
82          this(new CharReadBuffer(reader));
83      }
84  
85      /** Construct a new instance that reads characters from the given character buffer.
86       * @param buffer read buffer to read characters from
87       */
88      public SimpleTextParser(final CharReadBuffer buffer) {
89          this.buffer = buffer;
90      }
91  
92      /** Get the current line number. Line numbers start at 1.
93       * @return the current line number
94       */
95      public int getLineNumber() {
96          return lineNumber;
97      }
98  
99      /** Set the current line number. This does not affect the character stream position,
100      * only the value returned by {@link #getLineNumber()}.
101      * @param lineNumber line number to set; line numbers start at 1
102      */
103     public void setLineNumber(final int lineNumber) {
104         this.lineNumber = lineNumber;
105     }
106 
107     /** Get the current column number. This indicates the column position of the
108      * character that will returned by the next call to {@link #readChar()}. The first
109      * character of each line has a column number of 1.
110      * @return the current column number; column numbers start at 1
111      */
112     public int getColumnNumber() {
113         return columnNumber;
114     }
115 
116     /** Set the current column number. This does not affect the character stream position,
117      * only the value returned by {@link #getColumnNumber()}.
118      * @param column the column number to set; column numbers start at 1
119      */
120     public void setColumnNumber(final int column) {
121         this.columnNumber = column;
122     }
123 
124     /** Get the maximum length for strings returned by this instance. Operations
125      * that produce strings longer than this length will throw an exception.
126      * @return maximum length for strings returned by this instance
127      */
128     public int getMaxStringLength() {
129         return maxStringLength;
130     }
131 
132     /** Set the maximum length for strings returned by this instance. Operations
133      * that produce strings longer than this length will throw an exception.
134      * @param maxStringLength maximum length for strings returned by this instance
135      * @throws IllegalArgumentException if the argument is less than zero
136      */
137     public void setMaxStringLength(final int maxStringLength) {
138         if (maxStringLength < 0) {
139             throw new IllegalArgumentException("Maximum string length cannot be less than zero; was " +
140                     maxStringLength);
141         }
142         this.maxStringLength = maxStringLength;
143     }
144 
145     /** Get the current token. This is the most recent string read by one of the {@code nextXXX()}
146      * methods. This value will be null if no token has yet been read or if the end of content has
147      * been reached.
148      * @return the current token
149      * @see #next(int)
150      * @see #next(IntPredicate)
151      * @see #nextLine()
152      * @see #nextAlphanumeric()
153      */
154     public String getCurrentToken() {
155         return currentToken;
156     }
157 
158     /** Return true if the current token is not null or empty.
159      * @return true if the current token is not null or empty
160      * @see #getCurrentToken()
161      */
162     public boolean hasNonEmptyToken() {
163         return currentToken != null && !currentToken.isEmpty();
164     }
165 
166     /** Get the line number that the current token started on. This value will
167      * be -1 if no token has been read yet.
168      * @return current token starting line number or -1 if no token has been
169      *      read yet
170      * @see #getCurrentToken()
171      */
172     public int getCurrentTokenLineNumber() {
173         return currentTokenLineNumber;
174     }
175 
176     /** Get the column position that the current token started on. This value will
177      * be -1 if no token has been read yet.
178      * @return current token column number or -1 if no oken has been read yet
179      * @see #getCurrentToken()
180      */
181     public int getCurrentTokenColumnNumber() {
182         return currentTokenColumnNumber;
183     }
184 
185     /** Get the current token parsed as an integer.
186      * @return the current token parsed as an integer
187      * @throws IllegalStateException if no token has been read or the
188      *      current token cannot be parsed as an integer
189      */
190     public int getCurrentTokenAsInt() {
191         ensureHasSetToken();
192 
193         Throwable cause = null;
194 
195         if (currentToken != null) {
196             try {
197                 return Integer.parseInt(currentToken);
198             } catch (NumberFormatException exc) {
199                 cause = exc;
200             }
201         }
202 
203         throw unexpectedToken("integer", cause);
204     }
205 
206     /** Get the current token parsed as a double.
207      * @return the current token parsed as a double
208      * @throws IllegalStateException if no token has been read or the
209      *      current token cannot be parsed as a double
210      */
211     public double getCurrentTokenAsDouble() {
212         ensureHasSetToken();
213 
214         Throwable cause = null;
215 
216         if (currentToken != null) {
217             try {
218                 return Double.parseDouble(currentToken);
219             } catch (NumberFormatException exc) {
220                 cause = exc;
221             }
222         }
223 
224         throw unexpectedToken("double", cause);
225     }
226 
227     /** Return true if there are more characters to read from this instance.
228      * @return true if there are more characters to read from this instance
229      * @throws java.io.UncheckedIOException if an I/O error occurs
230      */
231     public boolean hasMoreCharacters() {
232         return buffer.hasMoreCharacters();
233     }
234 
235     /** Return true if there are more characters to read on the current line.
236      * @return true if there are more characters to read on the current line
237      * @throws java.io.UncheckedIOException if an I/O error occurs
238      */
239     public boolean hasMoreCharactersOnLine() {
240         return hasMoreCharacters() && isNotNewLinePart(peekChar());
241     }
242 
243     /** Read and return the next character in the stream and advance the parser position.
244      * This method updates the current line number and column number but does <strong>not</strong>
245      * set the {@link #getCurrentToken() current token}.
246      * @return the next character in the stream or -1 if the end of the stream has been
247      *      reached
248      * @throws java.io.UncheckedIOException if an I/O error occurs
249      * @see #peekChar()
250      */
251     public int readChar() {
252         final int value = buffer.read();
253         if (value == LF ||
254                 (value == CR && peekChar() != LF)) {
255             ++lineNumber;
256             columnNumber = 1;
257         } else if (value != EOF) {
258             ++columnNumber;
259         }
260 
261         return value;
262     }
263 
264     /** Read a string containing at most {@code len} characters from the stream and
265      * set it as the current token. Characters are added to the string until the string
266      * has the specified length or the end of the stream is reached. The characters are
267      * consumed from the stream. The token is set to null if no more characters are available
268      * from the character stream when this method is called.
269      * @param len the maximum length of the extracted string
270      * @return this instance
271      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
272      *      configured {@link #getMaxStringLength() maximum string length}
273      * @throws java.io.UncheckedIOException if an I/O error occurs
274      * @see #getCurrentToken()
275      * @see #consume(int, IntConsumer)
276      */
277     public SimpleTextParser next(final int len) {
278         validateRequestedStringLength(len);
279 
280         final int line = getLineNumber();
281         final int col = getColumnNumber();
282 
283         String token = null;
284         if (hasMoreCharacters()) {
285             final StringBuilder sb = new StringBuilder(len);
286 
287             consume(len, ch -> sb.append((char) ch));
288 
289             token = sb.toString();
290         }
291 
292         setToken(line, col, token);
293 
294         return this;
295     }
296 
297     /** Read a string containing at most {@code len} characters from the stream and
298      * set it as the current token. This is similar to {@link #next(int)} but with the exception
299      * that new line sequences beginning with {@code lineContinuationChar} are skipped.
300      * @param lineContinuationChar character used to indicate skipped new line sequences
301      * @param len the maximum length of the extracted string
302      * @return this instance
303      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
304      *      configured {@link #getMaxStringLength() maximum string length}
305      * @throws java.io.UncheckedIOException if an I/O error occurs
306      * @see #getCurrentToken()
307      * @see #consumeWithLineContinuation(char, int, IntConsumer)
308      */
309     public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final int len) {
310         validateRequestedStringLength(len);
311 
312         final int line = getLineNumber();
313         final int col = getColumnNumber();
314 
315         String token = null;
316         if (hasMoreCharacters()) {
317             final StringBuilder sb = new StringBuilder(len);
318 
319             consumeWithLineContinuation(lineContinuationChar, len,
320                     ch -> sb.append((char) ch));
321 
322             token = sb.toString();
323         }
324 
325         setToken(line, col, token);
326 
327         return this;
328     }
329 
330     /** Read characters from the stream while the given predicate returns true and set the result
331      * as the current token. The next call to {@link #readChar()} will return either a character
332      * that fails the predicate test or -1 if the end of the stream has been reached.
333      * The token will be null if the end of the stream has been reached prior to the method call.
334      * @param pred predicate function passed characters read from the input; reading continues
335      *      until the predicate returns false
336      * @return this instance
337      * @throws IllegalStateException if the length of the produced string exceeds the configured
338      *      {@link #getMaxStringLength() maximum string length}
339      * @throws java.io.UncheckedIOException if an I/O error occurs
340      * @see #getCurrentToken()
341      * @see #consume(IntPredicate, IntConsumer)
342      */
343     public SimpleTextParser next(final IntPredicate pred) {
344         final int line = getLineNumber();
345         final int col = getColumnNumber();
346 
347         String token = null;
348         if (hasMoreCharacters()) {
349             final StringCollector collector = new StringCollector(line, col, pred);
350 
351             consume(collector, collector);
352 
353             token = collector.getString();
354         }
355 
356         setToken(line, col, token);
357 
358         return this;
359     }
360 
361     /** Read characters from the stream while the given predicate returns true and set the result
362      * as the current token. This is similar to {@link #next(IntPredicate)} but with the exception
363      * that new line sequences prefixed with {@code lineContinuationChar} are skipped.
364      * @param lineContinuationChar character used to indicate skipped new line sequences
365      * @param pred predicate function passed characters read from the input; reading continues
366      *      until the predicate returns false
367      * @return this instance
368      * @throws IllegalStateException if the length of the produced string exceeds the configured
369      *      {@link #getMaxStringLength() maximum string length}
370      * @throws java.io.UncheckedIOException if an I/O error occurs
371      * @see #getCurrentToken()
372      * @see #consume(IntPredicate, IntConsumer)
373      */
374     public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final IntPredicate pred) {
375         final int line = getLineNumber();
376         final int col = getColumnNumber();
377 
378         String token = null;
379         if (hasMoreCharacters()) {
380             final StringCollector collector = new StringCollector(line, col, pred);
381 
382             consumeWithLineContinuation(lineContinuationChar, collector, collector);
383 
384             token = collector.getString();
385         }
386 
387         setToken(line, col, token);
388 
389         return this;
390     }
391 
392     /** Read characters from the current parser position to the next new line sequence and
393      * set the result as the current token . The newline character sequence
394      * ('\r', '\n', or '\r\n') at the end of the line is consumed but is not included in the token.
395      * The token will be null if the end of the stream has been reached prior to the method call.
396      * @return this instance
397      * @throws IllegalStateException if the length of the produced string exceeds the configured
398      *      {@link #getMaxStringLength() maximum string length}
399      * @throws java.io.UncheckedIOException if an I/O error occurs
400      * @see #getCurrentToken()
401      */
402     public SimpleTextParser nextLine() {
403         next(SimpleTextParser::isNotNewLinePart);
404 
405         discardNewLineSequence();
406 
407         return this;
408     }
409 
410     /** Read a sequence of alphanumeric characters starting from the current parser position
411      * and set the result as the current token. The token will be the empty string if the next
412      * character in the stream is not alphanumeric and will be null if the end of the stream has
413      * been reached prior to the method call.
414      * @return this instance
415      * @throws IllegalStateException if the length of the produced string exceeds the configured
416      *      {@link #getMaxStringLength() maximum string length}
417      * @throws java.io.UncheckedIOException if an I/O error occurs
418      * @see #getCurrentToken()
419      */
420     public SimpleTextParser nextAlphanumeric() {
421         return next(SimpleTextParser::isAlphanumeric);
422     }
423 
424     /** Discard {@code len} number of characters from the character stream. The
425      * parser position is updated but the current token is not changed.
426      * @param len number of characters to discard
427      * @return this instance
428      * @throws java.io.UncheckedIOException if an I/O error occurs
429      */
430     public SimpleTextParser discard(final int len) {
431         return consume(len, NOOP_CONSUMER);
432     }
433 
434     /** Discard {@code len} number of characters from the character stream. The
435      * parser position is updated but the current token is not changed. Lines beginning
436      * with {@code lineContinuationChar} are skipped.
437      * @param lineContinuationChar character used to indicate skipped new line sequences
438      * @param len number of characters to discard
439      * @return this instance
440      * @throws java.io.UncheckedIOException if an I/O error occurs
441      */
442     public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
443             final int len) {
444         return consumeWithLineContinuation(lineContinuationChar, len, NOOP_CONSUMER);
445     }
446 
447     /** Discard characters from the stream while the given predicate returns true. The next call
448      * to {@link #readChar()} will return either a character that fails the predicate test or -1
449      * if the end of the stream has been reached. The parser position is updated but the current
450      * token is not changed.
451      * @param pred predicate test for characters to discard
452      * @return this instance
453      * @throws java.io.UncheckedIOException if an I/O error occurs
454      */
455     public SimpleTextParser discard(final IntPredicate pred) {
456         return consume(pred, NOOP_CONSUMER);
457     }
458 
459     /** Discard characters from the stream while the given predicate returns true. New line sequences
460      * beginning with {@code lineContinuationChar} are skipped. The next call o {@link #readChar()}
461      * will return either a character that fails the predicate test or -1 if the end of the stream
462      * has been reached. The parser position is updated but the current token is not changed.
463      * @param lineContinuationChar character used to indicate skipped new line sequences
464      * @param pred predicate test for characters to discard
465      * @return this instance
466      * @throws java.io.UncheckedIOException if an I/O error occurs
467      */
468     public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar,
469             final IntPredicate pred) {
470         return consumeWithLineContinuation(lineContinuationChar, pred, NOOP_CONSUMER);
471     }
472 
473     /** Discard a sequence of whitespace characters from the character stream starting from the
474      * current parser position. The next call to {@link #readChar()} will return either a non-whitespace
475      * character or -1 if the end of the stream has been reached. The parser position is updated
476      * but the current token is not changed.
477      * @return this instance
478      * @throws java.io.UncheckedIOException if an I/O error occurs
479      */
480     public SimpleTextParser discardWhitespace() {
481         return discard(SimpleTextParser::isWhitespace);
482     }
483 
484     /** Discard the next whitespace characters on the current line. The next call to
485      * {@link #readChar()} will return either a non-whitespace character on the current line,
486      * the newline character sequence (indicating the end of the line), or -1 (indicating the
487      * end of the stream). The parser position is updated but the current token is not changed.
488      * @return this instance
489      * @throws java.io.UncheckedIOException if an I/O error occurs
490      */
491     public SimpleTextParser discardLineWhitespace() {
492         return discard(SimpleTextParser::isLineWhitespace);
493     }
494 
495     /** Discard the newline character sequence at the current reader position. The sequence
496      * is defined as one of "\r", "\n", or "\r\n". Does nothing if the reader is not positioned
497      * at a newline sequence. The parser position is updated but the current token is not changed.
498      * @return this instance
499      * @throws java.io.UncheckedIOException if an I/O error occurs
500      */
501     public SimpleTextParser discardNewLineSequence() {
502         final int value = peekChar();
503         if (value == LF) {
504             readChar();
505         } else if (value == CR) {
506             readChar();
507 
508             if (peekChar() == LF) {
509                 readChar();
510             }
511         }
512 
513         return this;
514     }
515 
516     /** Discard all remaining characters on the current line, including the terminating
517      * newline character sequence. The next call to {@link #readChar()} will return either the
518      * first character on the next line or -1 if the end of the stream has been reached.
519      * The parser position is updated but the current token is not changed.
520      * @return this instance
521      * @throws java.io.UncheckedIOException if an I/O error occurs
522      */
523     public SimpleTextParser discardLine() {
524         discard(SimpleTextParser::isNotNewLinePart);
525 
526         discardNewLineSequence();
527 
528         return this;
529     }
530 
531     /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
532      * returns true. The operation ends when the predicate returns false or the end of the stream is
533      * reached.
534      * @param pred predicate test for characters to consume
535      * @param consumer object to be passed each consumed character
536      * @return this instance
537      * @throws java.io.UncheckedIOException if an I/O error occurs
538      */
539     public SimpleTextParser consume(final IntPredicate pred, final IntConsumer consumer) {
540         int ch;
541         while ((ch = peekChar()) != EOF && pred.test(ch)) {
542             consumer.accept(readChar());
543         }
544 
545         return this;
546     }
547 
548     /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
549      * This method is similar to {@link #consume(int, IntConsumer)} with the exception that new line
550      * sequences prefixed with {@code lineContinuationChar} are skipped.
551      * @param lineContinuationChar character used to indicate skipped new line sequences
552      * @param len number of characters to consume
553      * @param consumer function to be passed each consumed character
554      * @return this instance
555      * @throws java.io.UncheckedIOException if an I/O error occurs
556      */
557     public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
558             final int len, final IntConsumer consumer) {
559         int i = -1;
560         int ch;
561         while (++i < len && (ch = readChar()) != EOF) {
562             if (ch == lineContinuationChar && isNewLinePart(peekChar())) {
563                 --i; // don't count the continuation char toward the total length
564                 discardNewLineSequence();
565             } else {
566                 consumer.accept(ch);
567             }
568         }
569 
570         return this;
571     }
572 
573     /** Consume at most {@code len} characters from the stream, passing each to the given consumer.
574      * The operation continues until {@code len} number of characters have been read or the end of
575      * the stream has been reached.
576      * @param len number of characters to consume
577      * @param consumer object to be passed each consumed character
578      * @return this instance
579      * @throws java.io.UncheckedIOException if an I/O error occurs
580      */
581     public SimpleTextParser consume(final int len, final IntConsumer consumer) {
582         int ch;
583         for (int i = 0; i < len; ++i) {
584             ch = readChar();
585             if (ch != EOF) {
586                 consumer.accept(ch);
587             } else {
588                 break;
589             }
590         }
591 
592         return this;
593     }
594 
595     /** Consume characters from the stream and pass them to {@code consumer} while the given predicate
596      * returns true. This method is similar to {@link #consume(IntPredicate, IntConsumer)} with the
597      * exception that new lines sequences beginning with {@code lineContinuationChar} are skipped.
598      * @param lineContinuationChar character used to indicate skipped new line sequences
599      * @param pred predicate test for characters to consume
600      * @param consumer object to be passed each consumed character
601      * @return this instance
602      * @throws java.io.UncheckedIOException if an I/O error occurs
603      */
604     public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar,
605             final IntPredicate pred, final IntConsumer consumer) {
606         int ch;
607         while ((ch = peekChar()) != EOF) {
608             if (ch == lineContinuationChar && isNewLinePart(buffer.charAt(1))) {
609                 readChar();
610                 discardNewLineSequence();
611             } else if (pred.test(ch)) {
612                 consumer.accept(readChar());
613             } else {
614                 break;
615             }
616         }
617 
618         return this;
619     }
620 
621     /** Return the next character in the stream but do not advance the parser position.
622      * @return the next character in the stream or -1 if the end of the stream has been
623      *      reached
624      * @throws java.io.UncheckedIOException if an I/O error occurs
625      * @see #readChar()
626      */
627     public int peekChar() {
628         return buffer.peek();
629     }
630 
631     /** Return a string containing containing at most {@code len} characters from the stream but
632      * without changing the parser position. Characters are added to the string until the
633      * string has the specified length or the end of the stream is reached.
634      * @param len the maximum length of the returned string
635      * @return a string containing containing at most {@code len} characters from the stream
636      *      or null if the parser has already reached the end of the stream
637      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the
638      *      configured {@link #getMaxStringLength() maximum string length}
639      * @throws java.io.UncheckedIOException if an I/O error occurs
640      * @see #next(int)
641      */
642     public String peek(final int len) {
643         validateRequestedStringLength(len);
644 
645         return buffer.peekString(len);
646     }
647 
648     /** Read characters from the stream while the given predicate returns true but do not
649      * change the current token or advance the parser position.
650      * @param pred predicate function passed characters read from the input; reading continues
651      *      until the predicate returns false
652      * @return string containing characters matching {@code pred} or null if the parser has already
653      *      reached the end of the stream
654      * @throws IllegalStateException if the length of the produced string exceeds the configured
655      *      {@link #getMaxStringLength() maximum string length}
656      * @throws java.io.UncheckedIOException if an I/O error occurs
657      * @see #getCurrentToken()
658      */
659     public String peek(final IntPredicate pred) {
660         String token = null;
661 
662         if (hasMoreCharacters()) {
663             final StringCollector collector = new StringCollector(lineNumber, columnNumber, pred);
664 
665             int i = -1;
666             int ch = buffer.charAt(++i);
667             while (ch != EOF && collector.test(ch)) {
668                 collector.accept(ch);
669 
670                 ch = buffer.charAt(++i);
671             }
672 
673             token = collector.getString();
674         }
675 
676         return token;
677     }
678 
679     /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
680      * exception if they are not equal. The comparison is case-sensitive.
681      * @param expected expected token
682      * @return this instance
683      * @throws IllegalStateException if no token has been read or {@code expected} does not exactly
684      *      equal the current token
685      */
686     public SimpleTextParser match(final String expected) {
687         matchInternal(expected, true, true);
688         return this;
689     }
690 
691     /** Compare the {@link #getCurrentToken() current token} with the argument and throw an
692      * exception if they are not equal. The comparison is <em>not</em> case-sensitive.
693      * @param expected expected token
694      * @return this instance
695      * @throws IllegalStateException if no token has been read or {@code expected} does not equal
696      *      the current token (ignoring case)
697      */
698     public SimpleTextParser matchIgnoreCase(final String expected) {
699         matchInternal(expected, false, true);
700         return this;
701     }
702 
703     /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
704      * The comparison is case-sensitive.
705      * @param expected expected token
706      * @return true if the argument exactly equals the current token
707      * @throws IllegalStateException if no token has been read
708      * @throws java.io.UncheckedIOException if an I/O error occurs
709      */
710     public boolean tryMatch(final String expected) {
711         return matchInternal(expected, true, false);
712     }
713 
714     /** Return true if the {@link #getCurrentToken() current token} is equal to the argument.
715      * The comparison is <em>not</em> case-sensitive.
716      * @param expected expected token
717      * @return true if the argument equals the current token (ignoring case)
718      * @throws IllegalStateException if no token has been read
719      */
720     public boolean tryMatchIgnoreCase(final String expected) {
721         return matchInternal(expected, false, false);
722     }
723 
724     /** Internal method to compare the current token with the argument.
725      * @param expected expected token
726      * @param caseSensitive if the comparison should be case-sensitive
727      * @param throwOnFailure if an exception should be thrown if the argument is not
728      *      equal to the current token
729      * @return true if the argument is equal to the current token
730      * @throws IllegalStateException if no token has been read or {@code expected} does not match the
731      *      current token and {@code throwOnFailure} is true
732      */
733     private boolean matchInternal(final String expected, final boolean caseSensitive,
734             final boolean throwOnFailure) {
735         ensureHasSetToken();
736 
737         if (!stringsEqual(expected, currentToken, caseSensitive)) {
738             if (throwOnFailure) {
739                 throw unexpectedToken("[" + expected + "]");
740             }
741 
742             return false;
743         }
744 
745         return true;
746     }
747 
748     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
749      * An exception is thrown if no match is found. String comparisons are case-sensitive.
750      * @param expected strings to compare with the current token
751      * @return index of the argument that exactly matches the current token
752      * @throws IllegalStateException if no token has been read or no match is found among the arguments
753      */
754     public int choose(final String... expected) {
755         return choose(Arrays.asList(expected));
756     }
757 
758     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}.
759      * An exception is thrown if no match is found. String comparisons are case-sensitive.
760      * @param expected strings to compare with the current token
761      * @return index of the argument that exactly matches the current token
762      * @throws IllegalStateException if no token has been read or no match is found among the arguments
763      */
764     public int choose(final List<String> expected) {
765         return chooseInternal(expected, true, true);
766     }
767 
768     /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
769      * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
770      * case-sensitive.
771      * @param expected strings to compare with the current token
772      * @return index of the argument that matches the current token (ignoring case)
773      * @throws IllegalStateException if no token has been read or no match is found among the arguments
774      */
775     public int chooseIgnoreCase(final String... expected) {
776         return chooseIgnoreCase(Arrays.asList(expected));
777     }
778 
779     /** Return the index of the argument that matches the {@link #getCurrentToken() current token},
780      * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em>
781      * case-sensitive.
782      * @param expected strings to compare with the current token
783      * @return index of the argument that matches the current token (ignoring case)
784      * @throws IllegalStateException if no token has been read or no match is found among the arguments
785      */
786     public int chooseIgnoreCase(final List<String> expected) {
787         return chooseInternal(expected, false, true);
788     }
789 
790     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
791      * or -1 if no match is found. String comparisons are case-sensitive.
792      * @param expected strings to compare with the current token
793      * @return index of the argument that exactly matches the current token or -1 if
794      *      no match is found
795      * @throws IllegalStateException if no token has been read
796      */
797     public int tryChoose(final String... expected) {
798         return tryChoose(Arrays.asList(expected));
799     }
800 
801     /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}
802      * or -1 if no match is found. String comparisons are case-sensitive.
803      * @param expected strings to compare with the current token
804      * @return index of the argument that exactly matches the current token or -1 if
805      *      no match is found
806      * @throws IllegalStateException if no token has been read
807      */
808     public int tryChoose(final List<String> expected) {
809         return chooseInternal(expected, true, false);
810     }
811 
812     /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
813      * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
814      * @param expected strings to compare with the current token
815      * @return index of the argument that matches the current token (ignoring case) or -1 if
816      *      no match is found
817      * @throws IllegalStateException if no token has been read
818      */
819     public int tryChooseIgnoreCase(final String... expected) {
820         return tryChooseIgnoreCase(Arrays.asList(expected));
821     }
822 
823     /** Return the index of the argument that matches the {@link #getCurrentToken() current token}
824      * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive.
825      * @param expected strings to compare with the current token
826      * @return index of the argument that matches the current token (ignoring case) or -1 if
827      *      no match is found
828      * @throws IllegalStateException if no token has been read
829      */
830     public int tryChooseIgnoreCase(final List<String> expected) {
831         return chooseInternal(expected, false, false);
832     }
833 
834     /** Internal method to compare the current token with a list of possible strings. The index of
835      * the matching argument is returned.
836      * @param expected strings to compare with the current token
837      * @param caseSensitive if the comparisons should be case-sensitive
838      * @param throwOnFailure if an exception should be thrown if no match is found
839      * @return the index of the matching argument or -1 if no match is found
840      * @throws IllegalStateException if no token has been read or no match is found and
841      *      {@code throwOnFailure} is true
842      */
843     private int chooseInternal(final List<String> expected, final boolean caseSensitive,
844             final boolean throwOnFailure) {
845         ensureHasSetToken();
846 
847         int i = 0;
848         for (final String str : expected) {
849             if (stringsEqual(str, currentToken, caseSensitive)) {
850                 return i;
851             }
852 
853             ++i;
854         }
855 
856         if (throwOnFailure) {
857             throw unexpectedToken("one of " + expected);
858         }
859 
860         return -1;
861     }
862 
863     /** Get an exception indicating that the current token was unexpected. The returned
864      * exception contains a message with the line number and column of the current token and
865      * a description of its value.
866      * @param expected string describing what was expected
867      * @return exception indicating that the current token was unexpected
868      */
869     public IllegalStateException unexpectedToken(final String expected) {
870         return unexpectedToken(expected, null);
871     }
872 
873     /** Get an exception indicating that the current token was unexpected. The returned
874      * exception contains a message with the line number and column of the current token and
875      * a description of its value.
876      * @param expected string describing what was expected
877      * @param cause cause of the error
878      * @return exception indicating that the current token was unexpected
879      */
880     public IllegalStateException unexpectedToken(final String expected, final Throwable cause) {
881 
882         StringBuilder msg = new StringBuilder();
883         msg.append("expected ")
884             .append(expected)
885             .append(" but found ")
886             .append(getCurrentTokenDescription());
887 
888         final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
889         final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
890 
891         return parseError(line, col, msg.toString(), cause);
892     }
893 
894     /** Get an exception indicating an error during parsing at the current token position.
895      * @param msg error message
896      * @return an exception indicating an error during parsing at the current token position
897      */
898     public IllegalStateException tokenError(final String msg) {
899         return tokenError(msg, null);
900     }
901 
902     /** Get an exception indicating an error during parsing at the current token position.
903      * @param msg error message
904      * @param cause the cause of the error; may be null
905      * @return an exception indicating an error during parsing at the current token position
906      */
907     public IllegalStateException tokenError(final String msg, final Throwable cause) {
908         final int line = hasSetToken ? currentTokenLineNumber : lineNumber;
909         final int col = hasSetToken ? currentTokenColumnNumber : columnNumber;
910 
911         return parseError(line, col, msg, cause);
912     }
913 
914     /** Return an exception indicating an error occurring at the current parser position.
915      * @param msg error message
916      * @return an exception indicating an error during parsing
917      */
918     public IllegalStateException parseError(final String msg) {
919         return parseError(msg, null);
920     }
921 
922     /** Return an exception indicating an error occurring at the current parser position.
923      * @param msg error message
924      * @param cause the cause of the error; may be null
925      * @return an exception indicating an error during parsing
926      */
927     public IllegalStateException parseError(final String msg, final Throwable cause) {
928         return parseError(lineNumber, columnNumber, msg, cause);
929     }
930 
931     /** Return an exception indicating an error during parsing.
932      * @param line line number of the error
933      * @param col column number of the error
934      * @param msg error message
935      * @return an exception indicating an error during parsing
936      */
937     public IllegalStateException parseError(final int line, final int col, final String msg) {
938         return parseError(line, col, msg, null);
939     }
940 
941     /** Return an exception indicating an error during parsing.
942      * @param line line number of the error
943      * @param col column number of the error
944      * @param msg error message
945      * @param cause the cause of the error
946      * @return an exception indicating an error during parsing
947      */
948     public IllegalStateException parseError(final int line, final int col, final String msg,
949             final Throwable cause) {
950         final String fullMsg = String.format("Parsing failed at line %d, column %d: %s",
951                 line, col, msg);
952         return GeometryIOUtils.parseError(fullMsg, cause);
953     }
954 
955     /** Set the current token string and position.
956      * @param line line number for the start of the token
957      * @param col column number for the start of the token
958      * @param token token to set
959      */
960     private void setToken(final int line, final int col, final String token) {
961         currentTokenLineNumber = line;
962         currentTokenColumnNumber = col;
963         currentToken = token;
964 
965         hasSetToken = true;
966     }
967 
968     /** Get a user-friendly description of the current token.
969      * @return a user-friendly description of the current token.
970      */
971     private String getCurrentTokenDescription() {
972         if (currentToken == null || currentToken.isEmpty()) {
973             // attempt to return a more helpful message about the location
974             // of empty tokens by checking the buffer content; if this fails
975             // we'll ignore the error and continue with a more generic message
976             try {
977                 if (!hasMoreCharacters()) {
978                     return "end of content";
979                 } else if (currentToken != null) {
980                     if (!hasMoreCharactersOnLine()) {
981                         return "end of line";
982                     }
983                     return "empty token followed by [" + peek(1) + "]";
984                 }
985             } catch (IllegalStateException exc) {
986                 // ignore
987             }
988         }
989 
990         if (currentToken == null) {
991             return "no current token";
992         } else if (currentToken.isEmpty()) {
993             return "empty token";
994         }
995 
996         return "[" + currentToken + "]";
997     }
998 
999     /** Validate the requested string length.
1000      * @param len requested string length
1001      * @throws IllegalArgumentException if {@code len} is less than 0 or greater than {@code maxStringLength}
1002      */
1003     private void validateRequestedStringLength(final int len) {
1004         if (len < 0) {
1005             throw new IllegalArgumentException("Requested string length cannot be negative; was " + len);
1006         } else if (len > maxStringLength) {
1007             throw new IllegalArgumentException("Requested string length of " + len + " exceeds maximum value of " +
1008                     maxStringLength);
1009         }
1010     }
1011 
1012     /** Ensure that a token read operation has been performed, throwing an exception if not.
1013      * @throws IllegalStateException if no token read operation has been performed
1014      */
1015     private void ensureHasSetToken() {
1016         if (!hasSetToken) {
1017             throw new IllegalStateException("No token has been read from the character stream");
1018         }
1019     }
1020 
1021     /** Return true if the given character (Unicode code point) is whitespace.
1022      * @param ch character (Unicode code point) to test
1023      * @return true if the given character is whitespace
1024      * @see Character#isWhitespace(int)
1025      */
1026     public static boolean isWhitespace(final int ch) {
1027         return Character.isWhitespace(ch);
1028     }
1029 
1030     /** Return true if the given character (Unicode code point) is not whitespace.
1031      * @param ch character (Unicode code point) to test
1032      * @return true if the given character is not whitespace
1033      * @see #isWhitespace(int)
1034      */
1035     public static boolean isNotWhitespace(final int ch) {
1036         return !isWhitespace(ch);
1037     }
1038 
1039     /** Return true if the given character (Unicode code point) is whitespace
1040      * that is not used in newline sequences (ie, not '\r' or '\n').
1041      * @param ch character (Unicode code point) to test
1042      * @return true if the given character is a whitespace character not used in newline
1043      *      sequences
1044      */
1045     public static boolean isLineWhitespace(final int ch) {
1046         return isWhitespace(ch) && isNotNewLinePart(ch);
1047     }
1048 
1049     /** Return true if the given character (Unicode code point) is used
1050      * as part of newline sequences (ie, is either '\r' or '\n').
1051      * @param ch character (Unicode code point) to test
1052      * @return true if the given character is used as part of newline sequences
1053      */
1054     public static boolean isNewLinePart(final int ch) {
1055         return ch == CR || ch == LF;
1056     }
1057 
1058     /** Return true if the given character (Unicode code point) is not used as
1059      * part of newline sequences (ie, not '\r' or '\n').
1060      * @param ch character (Unicode code point) to test
1061      * @return true if the given character is not used as part of newline sequences
1062      * @see #isNewLinePart(int)
1063      */
1064     public static boolean isNotNewLinePart(final int ch) {
1065         return !isNewLinePart(ch);
1066     }
1067 
1068     /** Return true if the given character (Unicode code point) is alphanumeric.
1069      * @param ch character (Unicode code point) to test
1070      * @return true if the argument is alphanumeric
1071      * @see Character#isAlphabetic(int)
1072      * @see Character#isDigit(int)
1073      */
1074     public static boolean isAlphanumeric(final int ch) {
1075         return Character.isAlphabetic(ch) ||
1076                 Character.isDigit(ch);
1077     }
1078 
1079     /** Return true if the given character (Unicode code point) is not alphanumeric.
1080      * @param ch character (Unicode code point) to test
1081      * @return true if the argument is not alphanumeric
1082      * @see #isAlphanumeric(int)
1083      */
1084     public static boolean isNotAlphanumeric(final int ch) {
1085         return !isAlphanumeric(ch);
1086     }
1087 
1088     /** Return true if the given character (Unicode code point) can be used as part of
1089      * the string representation of an integer. This will be true for the following types
1090      * of characters:
1091      * <ul>
1092      *  <li>{@link Character#isDigit(int) digits}</li>
1093      *  <li>the '-' (minus) character</li>
1094      *  <li>the '+' (plus) character</li>
1095      * </ul>
1096      * @param ch character (Unicode code point) to test
1097      * @return true if the given character can be used as part of an integer string
1098      */
1099     public static boolean isIntegerPart(final int ch) {
1100         return Character.isDigit(ch) ||
1101                 ch == '-' ||
1102                 ch == '+';
1103     }
1104 
1105     /** Return true if the given character (Unicode code point) can be used as part of
1106      * the string representation of a decimal number. This will be true for the following types
1107      * of characters:
1108      * <ul>
1109      *  <li>{@link Character#isDigit(int) digits}</li>
1110      *  <li>the '-' (minus) character</li>
1111      *  <li>the '+' (plus) character</li>
1112      *  <li>the '.' (period) character</li>
1113      *  <li>the 'e' character</li>
1114      *  <li>the 'E' character</li>
1115      * </ul>
1116      * @param ch character (Unicode code point) to test
1117      * @return true if the given character can be used as part of a decimal number string
1118      */
1119     public static boolean isDecimalPart(final int ch) {
1120         return Character.isDigit(ch) ||
1121             ch == '-' ||
1122             ch == '+' ||
1123             ch == '.' ||
1124             ch == 'e' ||
1125             ch == 'E';
1126     }
1127 
1128     /** Test two strings for equality. One or both arguments may be null.
1129      * @param a first string
1130      * @param b second string
1131      * @param caseSensitive comparison is case-sensitive if set to true
1132      * @return true if the string arguments are considered equal
1133      */
1134     private static boolean stringsEqual(final String a, final String b, final boolean caseSensitive) {
1135         if (a == null) {
1136             return b == null;
1137         }
1138 
1139         return caseSensitive ?
1140                 a.equals(b) :
1141                 a.equalsIgnoreCase(b);
1142     }
1143 
1144     /** Internal class used to collect strings from the character stream while ensuring that the
1145      * collected strings do not exceed the maximum configured string length.
1146      */
1147     private final class StringCollector implements IntPredicate, IntConsumer {
1148 
1149         /** String builder instance. */
1150         private final StringBuilder sb = new StringBuilder();
1151 
1152         /** Start position line. */
1153         private final int line;
1154 
1155         /** Start position column. */
1156         private final int col;
1157 
1158         /** Character predicate. */
1159         private final IntPredicate pred;
1160 
1161         /** Construct a new instance with the given start position and character predicate.
1162          * @param line start position line
1163          * @param col start position col
1164          * @param pred character predicate
1165          */
1166         StringCollector(final int line, final int col, final IntPredicate pred) {
1167             this.line = line;
1168             this.col = col;
1169             this.pred = pred;
1170         }
1171 
1172         /** {@inheritDoc} */
1173         @Override
1174         public boolean test(final int value) {
1175             return pred.test(value) && !hasExceededMaxStringLength();
1176         }
1177 
1178         /** {@inheritDoc} */
1179         @Override
1180         public void accept(final int value) {
1181             sb.append((char) value);
1182         }
1183 
1184         /** Get the string collected by this instance.
1185          * @return the string collected by this instance
1186          * @throws IllegalStateException if the string exceeds the maximum configured length
1187          */
1188         public String getString() {
1189             if (hasExceededMaxStringLength()) {
1190                 throw parseError(line, col, STRING_LENGTH_ERR_MSG + maxStringLength);
1191             }
1192 
1193             return sb.toString();
1194         }
1195 
1196         /** Return true if this collector has exceeded the maximum configured string length.
1197          * @return true if this collector has exceeded the maximum string length
1198          */
1199         private boolean hasExceededMaxStringLength() {
1200             return sb.length() > maxStringLength;
1201         }
1202     }
1203 }