1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 package org.apache.commons.geometry.io.core.internal; 18 19 import java.io.Reader; 20 import java.util.Arrays; 21 import java.util.List; 22 import java.util.function.IntConsumer; 23 import java.util.function.IntPredicate; 24 25 /** Class providing basic text parsing capabilities. The goals of this class are to 26 * (1) provide a simple, flexible API for performing common text parsing operations and 27 * (2) provide a mechanism for creating consistent and informative parsing errors. 28 * This class is not intended as a replacement for grammar-based parsers and/or lexers. 29 */ 30 public class SimpleTextParser { 31 32 /** Constant indicating that the end of the input has been reached. */ 33 private static final int EOF = -1; 34 35 /** Carriage return character. */ 36 private static final char CR = '\r'; 37 38 /** Line feed character. */ 39 private static final char LF = '\n'; 40 41 /** Default value for the max string length property. */ 42 private static final int DEFAULT_MAX_STRING_LENGTH = 1024; 43 44 /** Error message used when a string exceeds the configured maximum length. */ 45 private static final String STRING_LENGTH_ERR_MSG = "string length exceeds maximum value of "; 46 47 /** Initial token position number. */ 48 private static final int INITIAL_TOKEN_POS = -1; 49 50 /** Int consumer that does nothing. */ 51 private static final IntConsumer NOOP_CONSUMER = ch -> { }; 52 53 /** Current line number; line numbers start counting at 1. */ 54 private int lineNumber = 1; 55 56 /** Current character column on the current line; column numbers start at 1.*/ 57 private int columnNumber = 1; 58 59 /** Maximum length for strings returned by this instance. */ 60 private int maxStringLength = DEFAULT_MAX_STRING_LENGTH; 61 62 /** The current token. */ 63 private String currentToken; 64 65 /** The line number that the current token started on. */ 66 private int currentTokenLineNumber = INITIAL_TOKEN_POS; 67 68 /** The character number that the current token started on. */ 69 private int currentTokenColumnNumber = INITIAL_TOKEN_POS; 70 71 /** Flag used to indicate that at least one token has been read from the stream. */ 72 private boolean hasSetToken; 73 74 /** Character read buffer used to access the character stream. */ 75 private final CharReadBuffer buffer; 76 77 /** Construct a new instance that reads characters from the given reader. The 78 * reader will not be closed. 79 * @param reader reader instance to read characters from 80 */ 81 public SimpleTextParser(final Reader reader) { 82 this(new CharReadBuffer(reader)); 83 } 84 85 /** Construct a new instance that reads characters from the given character buffer. 86 * @param buffer read buffer to read characters from 87 */ 88 public SimpleTextParser(final CharReadBuffer buffer) { 89 this.buffer = buffer; 90 } 91 92 /** Get the current line number. Line numbers start at 1. 93 * @return the current line number 94 */ 95 public int getLineNumber() { 96 return lineNumber; 97 } 98 99 /** Set the current line number. This does not affect the character stream position, 100 * only the value returned by {@link #getLineNumber()}. 101 * @param lineNumber line number to set; line numbers start at 1 102 */ 103 public void setLineNumber(final int lineNumber) { 104 this.lineNumber = lineNumber; 105 } 106 107 /** Get the current column number. This indicates the column position of the 108 * character that will returned by the next call to {@link #readChar()}. The first 109 * character of each line has a column number of 1. 110 * @return the current column number; column numbers start at 1 111 */ 112 public int getColumnNumber() { 113 return columnNumber; 114 } 115 116 /** Set the current column number. This does not affect the character stream position, 117 * only the value returned by {@link #getColumnNumber()}. 118 * @param column the column number to set; column numbers start at 1 119 */ 120 public void setColumnNumber(final int column) { 121 this.columnNumber = column; 122 } 123 124 /** Get the maximum length for strings returned by this instance. Operations 125 * that produce strings longer than this length will throw an exception. 126 * @return maximum length for strings returned by this instance 127 */ 128 public int getMaxStringLength() { 129 return maxStringLength; 130 } 131 132 /** Set the maximum length for strings returned by this instance. Operations 133 * that produce strings longer than this length will throw an exception. 134 * @param maxStringLength maximum length for strings returned by this instance 135 * @throws IllegalArgumentException if the argument is less than zero 136 */ 137 public void setMaxStringLength(final int maxStringLength) { 138 if (maxStringLength < 0) { 139 throw new IllegalArgumentException("Maximum string length cannot be less than zero; was " + 140 maxStringLength); 141 } 142 this.maxStringLength = maxStringLength; 143 } 144 145 /** Get the current token. This is the most recent string read by one of the {@code nextXXX()} 146 * methods. This value will be null if no token has yet been read or if the end of content has 147 * been reached. 148 * @return the current token 149 * @see #next(int) 150 * @see #next(IntPredicate) 151 * @see #nextLine() 152 * @see #nextAlphanumeric() 153 */ 154 public String getCurrentToken() { 155 return currentToken; 156 } 157 158 /** Return true if the current token is not null or empty. 159 * @return true if the current token is not null or empty 160 * @see #getCurrentToken() 161 */ 162 public boolean hasNonEmptyToken() { 163 return currentToken != null && !currentToken.isEmpty(); 164 } 165 166 /** Get the line number that the current token started on. This value will 167 * be -1 if no token has been read yet. 168 * @return current token starting line number or -1 if no token has been 169 * read yet 170 * @see #getCurrentToken() 171 */ 172 public int getCurrentTokenLineNumber() { 173 return currentTokenLineNumber; 174 } 175 176 /** Get the column position that the current token started on. This value will 177 * be -1 if no token has been read yet. 178 * @return current token column number or -1 if no oken has been read yet 179 * @see #getCurrentToken() 180 */ 181 public int getCurrentTokenColumnNumber() { 182 return currentTokenColumnNumber; 183 } 184 185 /** Get the current token parsed as an integer. 186 * @return the current token parsed as an integer 187 * @throws IllegalStateException if no token has been read or the 188 * current token cannot be parsed as an integer 189 */ 190 public int getCurrentTokenAsInt() { 191 ensureHasSetToken(); 192 193 Throwable cause = null; 194 195 if (currentToken != null) { 196 try { 197 return Integer.parseInt(currentToken); 198 } catch (NumberFormatException exc) { 199 cause = exc; 200 } 201 } 202 203 throw unexpectedToken("integer", cause); 204 } 205 206 /** Get the current token parsed as a double. 207 * @return the current token parsed as a double 208 * @throws IllegalStateException if no token has been read or the 209 * current token cannot be parsed as a double 210 */ 211 public double getCurrentTokenAsDouble() { 212 ensureHasSetToken(); 213 214 Throwable cause = null; 215 216 if (currentToken != null) { 217 try { 218 return Double.parseDouble(currentToken); 219 } catch (NumberFormatException exc) { 220 cause = exc; 221 } 222 } 223 224 throw unexpectedToken("double", cause); 225 } 226 227 /** Return true if there are more characters to read from this instance. 228 * @return true if there are more characters to read from this instance 229 * @throws java.io.UncheckedIOException if an I/O error occurs 230 */ 231 public boolean hasMoreCharacters() { 232 return buffer.hasMoreCharacters(); 233 } 234 235 /** Return true if there are more characters to read on the current line. 236 * @return true if there are more characters to read on the current line 237 * @throws java.io.UncheckedIOException if an I/O error occurs 238 */ 239 public boolean hasMoreCharactersOnLine() { 240 return hasMoreCharacters() && isNotNewLinePart(peekChar()); 241 } 242 243 /** Read and return the next character in the stream and advance the parser position. 244 * This method updates the current line number and column number but does <strong>not</strong> 245 * set the {@link #getCurrentToken() current token}. 246 * @return the next character in the stream or -1 if the end of the stream has been 247 * reached 248 * @throws java.io.UncheckedIOException if an I/O error occurs 249 * @see #peekChar() 250 */ 251 public int readChar() { 252 final int value = buffer.read(); 253 if (value == LF || 254 (value == CR && peekChar() != LF)) { 255 ++lineNumber; 256 columnNumber = 1; 257 } else if (value != EOF) { 258 ++columnNumber; 259 } 260 261 return value; 262 } 263 264 /** Read a string containing at most {@code len} characters from the stream and 265 * set it as the current token. Characters are added to the string until the string 266 * has the specified length or the end of the stream is reached. The characters are 267 * consumed from the stream. The token is set to null if no more characters are available 268 * from the character stream when this method is called. 269 * @param len the maximum length of the extracted string 270 * @return this instance 271 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the 272 * configured {@link #getMaxStringLength() maximum string length} 273 * @throws java.io.UncheckedIOException if an I/O error occurs 274 * @see #getCurrentToken() 275 * @see #consume(int, IntConsumer) 276 */ 277 public SimpleTextParser next(final int len) { 278 validateRequestedStringLength(len); 279 280 final int line = getLineNumber(); 281 final int col = getColumnNumber(); 282 283 String token = null; 284 if (hasMoreCharacters()) { 285 final StringBuilder sb = new StringBuilder(len); 286 287 consume(len, ch -> sb.append((char) ch)); 288 289 token = sb.toString(); 290 } 291 292 setToken(line, col, token); 293 294 return this; 295 } 296 297 /** Read a string containing at most {@code len} characters from the stream and 298 * set it as the current token. This is similar to {@link #next(int)} but with the exception 299 * that new line sequences beginning with {@code lineContinuationChar} are skipped. 300 * @param lineContinuationChar character used to indicate skipped new line sequences 301 * @param len the maximum length of the extracted string 302 * @return this instance 303 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the 304 * configured {@link #getMaxStringLength() maximum string length} 305 * @throws java.io.UncheckedIOException if an I/O error occurs 306 * @see #getCurrentToken() 307 * @see #consumeWithLineContinuation(char, int, IntConsumer) 308 */ 309 public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final int len) { 310 validateRequestedStringLength(len); 311 312 final int line = getLineNumber(); 313 final int col = getColumnNumber(); 314 315 String token = null; 316 if (hasMoreCharacters()) { 317 final StringBuilder sb = new StringBuilder(len); 318 319 consumeWithLineContinuation(lineContinuationChar, len, 320 ch -> sb.append((char) ch)); 321 322 token = sb.toString(); 323 } 324 325 setToken(line, col, token); 326 327 return this; 328 } 329 330 /** Read characters from the stream while the given predicate returns true and set the result 331 * as the current token. The next call to {@link #readChar()} will return either a character 332 * that fails the predicate test or -1 if the end of the stream has been reached. 333 * The token will be null if the end of the stream has been reached prior to the method call. 334 * @param pred predicate function passed characters read from the input; reading continues 335 * until the predicate returns false 336 * @return this instance 337 * @throws IllegalStateException if the length of the produced string exceeds the configured 338 * {@link #getMaxStringLength() maximum string length} 339 * @throws java.io.UncheckedIOException if an I/O error occurs 340 * @see #getCurrentToken() 341 * @see #consume(IntPredicate, IntConsumer) 342 */ 343 public SimpleTextParser next(final IntPredicate pred) { 344 final int line = getLineNumber(); 345 final int col = getColumnNumber(); 346 347 String token = null; 348 if (hasMoreCharacters()) { 349 final StringCollector collector = new StringCollector(line, col, pred); 350 351 consume(collector, collector); 352 353 token = collector.getString(); 354 } 355 356 setToken(line, col, token); 357 358 return this; 359 } 360 361 /** Read characters from the stream while the given predicate returns true and set the result 362 * as the current token. This is similar to {@link #next(IntPredicate)} but with the exception 363 * that new line sequences prefixed with {@code lineContinuationChar} are skipped. 364 * @param lineContinuationChar character used to indicate skipped new line sequences 365 * @param pred predicate function passed characters read from the input; reading continues 366 * until the predicate returns false 367 * @return this instance 368 * @throws IllegalStateException if the length of the produced string exceeds the configured 369 * {@link #getMaxStringLength() maximum string length} 370 * @throws java.io.UncheckedIOException if an I/O error occurs 371 * @see #getCurrentToken() 372 * @see #consume(IntPredicate, IntConsumer) 373 */ 374 public SimpleTextParser nextWithLineContinuation(final char lineContinuationChar, final IntPredicate pred) { 375 final int line = getLineNumber(); 376 final int col = getColumnNumber(); 377 378 String token = null; 379 if (hasMoreCharacters()) { 380 final StringCollector collector = new StringCollector(line, col, pred); 381 382 consumeWithLineContinuation(lineContinuationChar, collector, collector); 383 384 token = collector.getString(); 385 } 386 387 setToken(line, col, token); 388 389 return this; 390 } 391 392 /** Read characters from the current parser position to the next new line sequence and 393 * set the result as the current token . The newline character sequence 394 * ('\r', '\n', or '\r\n') at the end of the line is consumed but is not included in the token. 395 * The token will be null if the end of the stream has been reached prior to the method call. 396 * @return this instance 397 * @throws IllegalStateException if the length of the produced string exceeds the configured 398 * {@link #getMaxStringLength() maximum string length} 399 * @throws java.io.UncheckedIOException if an I/O error occurs 400 * @see #getCurrentToken() 401 */ 402 public SimpleTextParser nextLine() { 403 next(SimpleTextParser::isNotNewLinePart); 404 405 discardNewLineSequence(); 406 407 return this; 408 } 409 410 /** Read a sequence of alphanumeric characters starting from the current parser position 411 * and set the result as the current token. The token will be the empty string if the next 412 * character in the stream is not alphanumeric and will be null if the end of the stream has 413 * been reached prior to the method call. 414 * @return this instance 415 * @throws IllegalStateException if the length of the produced string exceeds the configured 416 * {@link #getMaxStringLength() maximum string length} 417 * @throws java.io.UncheckedIOException if an I/O error occurs 418 * @see #getCurrentToken() 419 */ 420 public SimpleTextParser nextAlphanumeric() { 421 return next(SimpleTextParser::isAlphanumeric); 422 } 423 424 /** Discard {@code len} number of characters from the character stream. The 425 * parser position is updated but the current token is not changed. 426 * @param len number of characters to discard 427 * @return this instance 428 * @throws java.io.UncheckedIOException if an I/O error occurs 429 */ 430 public SimpleTextParser discard(final int len) { 431 return consume(len, NOOP_CONSUMER); 432 } 433 434 /** Discard {@code len} number of characters from the character stream. The 435 * parser position is updated but the current token is not changed. Lines beginning 436 * with {@code lineContinuationChar} are skipped. 437 * @param lineContinuationChar character used to indicate skipped new line sequences 438 * @param len number of characters to discard 439 * @return this instance 440 * @throws java.io.UncheckedIOException if an I/O error occurs 441 */ 442 public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar, 443 final int len) { 444 return consumeWithLineContinuation(lineContinuationChar, len, NOOP_CONSUMER); 445 } 446 447 /** Discard characters from the stream while the given predicate returns true. The next call 448 * to {@link #readChar()} will return either a character that fails the predicate test or -1 449 * if the end of the stream has been reached. The parser position is updated but the current 450 * token is not changed. 451 * @param pred predicate test for characters to discard 452 * @return this instance 453 * @throws java.io.UncheckedIOException if an I/O error occurs 454 */ 455 public SimpleTextParser discard(final IntPredicate pred) { 456 return consume(pred, NOOP_CONSUMER); 457 } 458 459 /** Discard characters from the stream while the given predicate returns true. New line sequences 460 * beginning with {@code lineContinuationChar} are skipped. The next call o {@link #readChar()} 461 * will return either a character that fails the predicate test or -1 if the end of the stream 462 * has been reached. The parser position is updated but the current token is not changed. 463 * @param lineContinuationChar character used to indicate skipped new line sequences 464 * @param pred predicate test for characters to discard 465 * @return this instance 466 * @throws java.io.UncheckedIOException if an I/O error occurs 467 */ 468 public SimpleTextParser discardWithLineContinuation(final char lineContinuationChar, 469 final IntPredicate pred) { 470 return consumeWithLineContinuation(lineContinuationChar, pred, NOOP_CONSUMER); 471 } 472 473 /** Discard a sequence of whitespace characters from the character stream starting from the 474 * current parser position. The next call to {@link #readChar()} will return either a non-whitespace 475 * character or -1 if the end of the stream has been reached. The parser position is updated 476 * but the current token is not changed. 477 * @return this instance 478 * @throws java.io.UncheckedIOException if an I/O error occurs 479 */ 480 public SimpleTextParser discardWhitespace() { 481 return discard(SimpleTextParser::isWhitespace); 482 } 483 484 /** Discard the next whitespace characters on the current line. The next call to 485 * {@link #readChar()} will return either a non-whitespace character on the current line, 486 * the newline character sequence (indicating the end of the line), or -1 (indicating the 487 * end of the stream). The parser position is updated but the current token is not changed. 488 * @return this instance 489 * @throws java.io.UncheckedIOException if an I/O error occurs 490 */ 491 public SimpleTextParser discardLineWhitespace() { 492 return discard(SimpleTextParser::isLineWhitespace); 493 } 494 495 /** Discard the newline character sequence at the current reader position. The sequence 496 * is defined as one of "\r", "\n", or "\r\n". Does nothing if the reader is not positioned 497 * at a newline sequence. The parser position is updated but the current token is not changed. 498 * @return this instance 499 * @throws java.io.UncheckedIOException if an I/O error occurs 500 */ 501 public SimpleTextParser discardNewLineSequence() { 502 final int value = peekChar(); 503 if (value == LF) { 504 readChar(); 505 } else if (value == CR) { 506 readChar(); 507 508 if (peekChar() == LF) { 509 readChar(); 510 } 511 } 512 513 return this; 514 } 515 516 /** Discard all remaining characters on the current line, including the terminating 517 * newline character sequence. The next call to {@link #readChar()} will return either the 518 * first character on the next line or -1 if the end of the stream has been reached. 519 * The parser position is updated but the current token is not changed. 520 * @return this instance 521 * @throws java.io.UncheckedIOException if an I/O error occurs 522 */ 523 public SimpleTextParser discardLine() { 524 discard(SimpleTextParser::isNotNewLinePart); 525 526 discardNewLineSequence(); 527 528 return this; 529 } 530 531 /** Consume characters from the stream and pass them to {@code consumer} while the given predicate 532 * returns true. The operation ends when the predicate returns false or the end of the stream is 533 * reached. 534 * @param pred predicate test for characters to consume 535 * @param consumer object to be passed each consumed character 536 * @return this instance 537 * @throws java.io.UncheckedIOException if an I/O error occurs 538 */ 539 public SimpleTextParser consume(final IntPredicate pred, final IntConsumer consumer) { 540 int ch; 541 while ((ch = peekChar()) != EOF && pred.test(ch)) { 542 consumer.accept(readChar()); 543 } 544 545 return this; 546 } 547 548 /** Consume at most {@code len} characters from the stream, passing each to the given consumer. 549 * This method is similar to {@link #consume(int, IntConsumer)} with the exception that new line 550 * sequences prefixed with {@code lineContinuationChar} are skipped. 551 * @param lineContinuationChar character used to indicate skipped new line sequences 552 * @param len number of characters to consume 553 * @param consumer function to be passed each consumed character 554 * @return this instance 555 * @throws java.io.UncheckedIOException if an I/O error occurs 556 */ 557 public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar, 558 final int len, final IntConsumer consumer) { 559 int i = -1; 560 int ch; 561 while (++i < len && (ch = readChar()) != EOF) { 562 if (ch == lineContinuationChar && isNewLinePart(peekChar())) { 563 --i; // don't count the continuation char toward the total length 564 discardNewLineSequence(); 565 } else { 566 consumer.accept(ch); 567 } 568 } 569 570 return this; 571 } 572 573 /** Consume at most {@code len} characters from the stream, passing each to the given consumer. 574 * The operation continues until {@code len} number of characters have been read or the end of 575 * the stream has been reached. 576 * @param len number of characters to consume 577 * @param consumer object to be passed each consumed character 578 * @return this instance 579 * @throws java.io.UncheckedIOException if an I/O error occurs 580 */ 581 public SimpleTextParser consume(final int len, final IntConsumer consumer) { 582 int ch; 583 for (int i = 0; i < len; ++i) { 584 ch = readChar(); 585 if (ch != EOF) { 586 consumer.accept(ch); 587 } else { 588 break; 589 } 590 } 591 592 return this; 593 } 594 595 /** Consume characters from the stream and pass them to {@code consumer} while the given predicate 596 * returns true. This method is similar to {@link #consume(IntPredicate, IntConsumer)} with the 597 * exception that new lines sequences beginning with {@code lineContinuationChar} are skipped. 598 * @param lineContinuationChar character used to indicate skipped new line sequences 599 * @param pred predicate test for characters to consume 600 * @param consumer object to be passed each consumed character 601 * @return this instance 602 * @throws java.io.UncheckedIOException if an I/O error occurs 603 */ 604 public SimpleTextParser consumeWithLineContinuation(final char lineContinuationChar, 605 final IntPredicate pred, final IntConsumer consumer) { 606 int ch; 607 while ((ch = peekChar()) != EOF) { 608 if (ch == lineContinuationChar && isNewLinePart(buffer.charAt(1))) { 609 readChar(); 610 discardNewLineSequence(); 611 } else if (pred.test(ch)) { 612 consumer.accept(readChar()); 613 } else { 614 break; 615 } 616 } 617 618 return this; 619 } 620 621 /** Return the next character in the stream but do not advance the parser position. 622 * @return the next character in the stream or -1 if the end of the stream has been 623 * reached 624 * @throws java.io.UncheckedIOException if an I/O error occurs 625 * @see #readChar() 626 */ 627 public int peekChar() { 628 return buffer.peek(); 629 } 630 631 /** Return a string containing containing at most {@code len} characters from the stream but 632 * without changing the parser position. Characters are added to the string until the 633 * string has the specified length or the end of the stream is reached. 634 * @param len the maximum length of the returned string 635 * @return a string containing containing at most {@code len} characters from the stream 636 * or null if the parser has already reached the end of the stream 637 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than the 638 * configured {@link #getMaxStringLength() maximum string length} 639 * @throws java.io.UncheckedIOException if an I/O error occurs 640 * @see #next(int) 641 */ 642 public String peek(final int len) { 643 validateRequestedStringLength(len); 644 645 return buffer.peekString(len); 646 } 647 648 /** Read characters from the stream while the given predicate returns true but do not 649 * change the current token or advance the parser position. 650 * @param pred predicate function passed characters read from the input; reading continues 651 * until the predicate returns false 652 * @return string containing characters matching {@code pred} or null if the parser has already 653 * reached the end of the stream 654 * @throws IllegalStateException if the length of the produced string exceeds the configured 655 * {@link #getMaxStringLength() maximum string length} 656 * @throws java.io.UncheckedIOException if an I/O error occurs 657 * @see #getCurrentToken() 658 */ 659 public String peek(final IntPredicate pred) { 660 String token = null; 661 662 if (hasMoreCharacters()) { 663 final StringCollector collector = new StringCollector(lineNumber, columnNumber, pred); 664 665 int i = -1; 666 int ch = buffer.charAt(++i); 667 while (ch != EOF && collector.test(ch)) { 668 collector.accept(ch); 669 670 ch = buffer.charAt(++i); 671 } 672 673 token = collector.getString(); 674 } 675 676 return token; 677 } 678 679 /** Compare the {@link #getCurrentToken() current token} with the argument and throw an 680 * exception if they are not equal. The comparison is case-sensitive. 681 * @param expected expected token 682 * @return this instance 683 * @throws IllegalStateException if no token has been read or {@code expected} does not exactly 684 * equal the current token 685 */ 686 public SimpleTextParser match(final String expected) { 687 matchInternal(expected, true, true); 688 return this; 689 } 690 691 /** Compare the {@link #getCurrentToken() current token} with the argument and throw an 692 * exception if they are not equal. The comparison is <em>not</em> case-sensitive. 693 * @param expected expected token 694 * @return this instance 695 * @throws IllegalStateException if no token has been read or {@code expected} does not equal 696 * the current token (ignoring case) 697 */ 698 public SimpleTextParser matchIgnoreCase(final String expected) { 699 matchInternal(expected, false, true); 700 return this; 701 } 702 703 /** Return true if the {@link #getCurrentToken() current token} is equal to the argument. 704 * The comparison is case-sensitive. 705 * @param expected expected token 706 * @return true if the argument exactly equals the current token 707 * @throws IllegalStateException if no token has been read 708 * @throws java.io.UncheckedIOException if an I/O error occurs 709 */ 710 public boolean tryMatch(final String expected) { 711 return matchInternal(expected, true, false); 712 } 713 714 /** Return true if the {@link #getCurrentToken() current token} is equal to the argument. 715 * The comparison is <em>not</em> case-sensitive. 716 * @param expected expected token 717 * @return true if the argument equals the current token (ignoring case) 718 * @throws IllegalStateException if no token has been read 719 */ 720 public boolean tryMatchIgnoreCase(final String expected) { 721 return matchInternal(expected, false, false); 722 } 723 724 /** Internal method to compare the current token with the argument. 725 * @param expected expected token 726 * @param caseSensitive if the comparison should be case-sensitive 727 * @param throwOnFailure if an exception should be thrown if the argument is not 728 * equal to the current token 729 * @return true if the argument is equal to the current token 730 * @throws IllegalStateException if no token has been read or {@code expected} does not match the 731 * current token and {@code throwOnFailure} is true 732 */ 733 private boolean matchInternal(final String expected, final boolean caseSensitive, 734 final boolean throwOnFailure) { 735 ensureHasSetToken(); 736 737 if (!stringsEqual(expected, currentToken, caseSensitive)) { 738 if (throwOnFailure) { 739 throw unexpectedToken("[" + expected + "]"); 740 } 741 742 return false; 743 } 744 745 return true; 746 } 747 748 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}. 749 * An exception is thrown if no match is found. String comparisons are case-sensitive. 750 * @param expected strings to compare with the current token 751 * @return index of the argument that exactly matches the current token 752 * @throws IllegalStateException if no token has been read or no match is found among the arguments 753 */ 754 public int choose(final String... expected) { 755 return choose(Arrays.asList(expected)); 756 } 757 758 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token}. 759 * An exception is thrown if no match is found. String comparisons are case-sensitive. 760 * @param expected strings to compare with the current token 761 * @return index of the argument that exactly matches the current token 762 * @throws IllegalStateException if no token has been read or no match is found among the arguments 763 */ 764 public int choose(final List<String> expected) { 765 return chooseInternal(expected, true, true); 766 } 767 768 /** Return the index of the argument that matches the {@link #getCurrentToken() current token}, 769 * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em> 770 * case-sensitive. 771 * @param expected strings to compare with the current token 772 * @return index of the argument that matches the current token (ignoring case) 773 * @throws IllegalStateException if no token has been read or no match is found among the arguments 774 */ 775 public int chooseIgnoreCase(final String... expected) { 776 return chooseIgnoreCase(Arrays.asList(expected)); 777 } 778 779 /** Return the index of the argument that matches the {@link #getCurrentToken() current token}, 780 * ignoring case. An exception is thrown if no match is found. String comparisons are <em>not</em> 781 * case-sensitive. 782 * @param expected strings to compare with the current token 783 * @return index of the argument that matches the current token (ignoring case) 784 * @throws IllegalStateException if no token has been read or no match is found among the arguments 785 */ 786 public int chooseIgnoreCase(final List<String> expected) { 787 return chooseInternal(expected, false, true); 788 } 789 790 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token} 791 * or -1 if no match is found. String comparisons are case-sensitive. 792 * @param expected strings to compare with the current token 793 * @return index of the argument that exactly matches the current token or -1 if 794 * no match is found 795 * @throws IllegalStateException if no token has been read 796 */ 797 public int tryChoose(final String... expected) { 798 return tryChoose(Arrays.asList(expected)); 799 } 800 801 /** Return the index of the argument that exactly matches the {@link #getCurrentToken() current token} 802 * or -1 if no match is found. String comparisons are case-sensitive. 803 * @param expected strings to compare with the current token 804 * @return index of the argument that exactly matches the current token or -1 if 805 * no match is found 806 * @throws IllegalStateException if no token has been read 807 */ 808 public int tryChoose(final List<String> expected) { 809 return chooseInternal(expected, true, false); 810 } 811 812 /** Return the index of the argument that matches the {@link #getCurrentToken() current token} 813 * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive. 814 * @param expected strings to compare with the current token 815 * @return index of the argument that matches the current token (ignoring case) or -1 if 816 * no match is found 817 * @throws IllegalStateException if no token has been read 818 */ 819 public int tryChooseIgnoreCase(final String... expected) { 820 return tryChooseIgnoreCase(Arrays.asList(expected)); 821 } 822 823 /** Return the index of the argument that matches the {@link #getCurrentToken() current token} 824 * or -1 if no match is found. String comparisons are <em>not</em> case-sensitive. 825 * @param expected strings to compare with the current token 826 * @return index of the argument that matches the current token (ignoring case) or -1 if 827 * no match is found 828 * @throws IllegalStateException if no token has been read 829 */ 830 public int tryChooseIgnoreCase(final List<String> expected) { 831 return chooseInternal(expected, false, false); 832 } 833 834 /** Internal method to compare the current token with a list of possible strings. The index of 835 * the matching argument is returned. 836 * @param expected strings to compare with the current token 837 * @param caseSensitive if the comparisons should be case-sensitive 838 * @param throwOnFailure if an exception should be thrown if no match is found 839 * @return the index of the matching argument or -1 if no match is found 840 * @throws IllegalStateException if no token has been read or no match is found and 841 * {@code throwOnFailure} is true 842 */ 843 private int chooseInternal(final List<String> expected, final boolean caseSensitive, 844 final boolean throwOnFailure) { 845 ensureHasSetToken(); 846 847 int i = 0; 848 for (final String str : expected) { 849 if (stringsEqual(str, currentToken, caseSensitive)) { 850 return i; 851 } 852 853 ++i; 854 } 855 856 if (throwOnFailure) { 857 throw unexpectedToken("one of " + expected); 858 } 859 860 return -1; 861 } 862 863 /** Get an exception indicating that the current token was unexpected. The returned 864 * exception contains a message with the line number and column of the current token and 865 * a description of its value. 866 * @param expected string describing what was expected 867 * @return exception indicating that the current token was unexpected 868 */ 869 public IllegalStateException unexpectedToken(final String expected) { 870 return unexpectedToken(expected, null); 871 } 872 873 /** Get an exception indicating that the current token was unexpected. The returned 874 * exception contains a message with the line number and column of the current token and 875 * a description of its value. 876 * @param expected string describing what was expected 877 * @param cause cause of the error 878 * @return exception indicating that the current token was unexpected 879 */ 880 public IllegalStateException unexpectedToken(final String expected, final Throwable cause) { 881 882 StringBuilder msg = new StringBuilder(); 883 msg.append("expected ") 884 .append(expected) 885 .append(" but found ") 886 .append(getCurrentTokenDescription()); 887 888 final int line = hasSetToken ? currentTokenLineNumber : lineNumber; 889 final int col = hasSetToken ? currentTokenColumnNumber : columnNumber; 890 891 return parseError(line, col, msg.toString(), cause); 892 } 893 894 /** Get an exception indicating an error during parsing at the current token position. 895 * @param msg error message 896 * @return an exception indicating an error during parsing at the current token position 897 */ 898 public IllegalStateException tokenError(final String msg) { 899 return tokenError(msg, null); 900 } 901 902 /** Get an exception indicating an error during parsing at the current token position. 903 * @param msg error message 904 * @param cause the cause of the error; may be null 905 * @return an exception indicating an error during parsing at the current token position 906 */ 907 public IllegalStateException tokenError(final String msg, final Throwable cause) { 908 final int line = hasSetToken ? currentTokenLineNumber : lineNumber; 909 final int col = hasSetToken ? currentTokenColumnNumber : columnNumber; 910 911 return parseError(line, col, msg, cause); 912 } 913 914 /** Return an exception indicating an error occurring at the current parser position. 915 * @param msg error message 916 * @return an exception indicating an error during parsing 917 */ 918 public IllegalStateException parseError(final String msg) { 919 return parseError(msg, null); 920 } 921 922 /** Return an exception indicating an error occurring at the current parser position. 923 * @param msg error message 924 * @param cause the cause of the error; may be null 925 * @return an exception indicating an error during parsing 926 */ 927 public IllegalStateException parseError(final String msg, final Throwable cause) { 928 return parseError(lineNumber, columnNumber, msg, cause); 929 } 930 931 /** Return an exception indicating an error during parsing. 932 * @param line line number of the error 933 * @param col column number of the error 934 * @param msg error message 935 * @return an exception indicating an error during parsing 936 */ 937 public IllegalStateException parseError(final int line, final int col, final String msg) { 938 return parseError(line, col, msg, null); 939 } 940 941 /** Return an exception indicating an error during parsing. 942 * @param line line number of the error 943 * @param col column number of the error 944 * @param msg error message 945 * @param cause the cause of the error 946 * @return an exception indicating an error during parsing 947 */ 948 public IllegalStateException parseError(final int line, final int col, final String msg, 949 final Throwable cause) { 950 final String fullMsg = String.format("Parsing failed at line %d, column %d: %s", 951 line, col, msg); 952 return GeometryIOUtils.parseError(fullMsg, cause); 953 } 954 955 /** Set the current token string and position. 956 * @param line line number for the start of the token 957 * @param col column number for the start of the token 958 * @param token token to set 959 */ 960 private void setToken(final int line, final int col, final String token) { 961 currentTokenLineNumber = line; 962 currentTokenColumnNumber = col; 963 currentToken = token; 964 965 hasSetToken = true; 966 } 967 968 /** Get a user-friendly description of the current token. 969 * @return a user-friendly description of the current token. 970 */ 971 private String getCurrentTokenDescription() { 972 if (currentToken == null || currentToken.isEmpty()) { 973 // attempt to return a more helpful message about the location 974 // of empty tokens by checking the buffer content; if this fails 975 // we'll ignore the error and continue with a more generic message 976 try { 977 if (!hasMoreCharacters()) { 978 return "end of content"; 979 } else if (currentToken != null) { 980 if (!hasMoreCharactersOnLine()) { 981 return "end of line"; 982 } 983 return "empty token followed by [" + peek(1) + "]"; 984 } 985 } catch (IllegalStateException exc) { 986 // ignore 987 } 988 } 989 990 if (currentToken == null) { 991 return "no current token"; 992 } else if (currentToken.isEmpty()) { 993 return "empty token"; 994 } 995 996 return "[" + currentToken + "]"; 997 } 998 999 /** Validate the requested string length. 1000 * @param len requested string length 1001 * @throws IllegalArgumentException if {@code len} is less than 0 or greater than {@code maxStringLength} 1002 */ 1003 private void validateRequestedStringLength(final int len) { 1004 if (len < 0) { 1005 throw new IllegalArgumentException("Requested string length cannot be negative; was " + len); 1006 } else if (len > maxStringLength) { 1007 throw new IllegalArgumentException("Requested string length of " + len + " exceeds maximum value of " + 1008 maxStringLength); 1009 } 1010 } 1011 1012 /** Ensure that a token read operation has been performed, throwing an exception if not. 1013 * @throws IllegalStateException if no token read operation has been performed 1014 */ 1015 private void ensureHasSetToken() { 1016 if (!hasSetToken) { 1017 throw new IllegalStateException("No token has been read from the character stream"); 1018 } 1019 } 1020 1021 /** Return true if the given character (Unicode code point) is whitespace. 1022 * @param ch character (Unicode code point) to test 1023 * @return true if the given character is whitespace 1024 * @see Character#isWhitespace(int) 1025 */ 1026 public static boolean isWhitespace(final int ch) { 1027 return Character.isWhitespace(ch); 1028 } 1029 1030 /** Return true if the given character (Unicode code point) is not whitespace. 1031 * @param ch character (Unicode code point) to test 1032 * @return true if the given character is not whitespace 1033 * @see #isWhitespace(int) 1034 */ 1035 public static boolean isNotWhitespace(final int ch) { 1036 return !isWhitespace(ch); 1037 } 1038 1039 /** Return true if the given character (Unicode code point) is whitespace 1040 * that is not used in newline sequences (ie, not '\r' or '\n'). 1041 * @param ch character (Unicode code point) to test 1042 * @return true if the given character is a whitespace character not used in newline 1043 * sequences 1044 */ 1045 public static boolean isLineWhitespace(final int ch) { 1046 return isWhitespace(ch) && isNotNewLinePart(ch); 1047 } 1048 1049 /** Return true if the given character (Unicode code point) is used 1050 * as part of newline sequences (ie, is either '\r' or '\n'). 1051 * @param ch character (Unicode code point) to test 1052 * @return true if the given character is used as part of newline sequences 1053 */ 1054 public static boolean isNewLinePart(final int ch) { 1055 return ch == CR || ch == LF; 1056 } 1057 1058 /** Return true if the given character (Unicode code point) is not used as 1059 * part of newline sequences (ie, not '\r' or '\n'). 1060 * @param ch character (Unicode code point) to test 1061 * @return true if the given character is not used as part of newline sequences 1062 * @see #isNewLinePart(int) 1063 */ 1064 public static boolean isNotNewLinePart(final int ch) { 1065 return !isNewLinePart(ch); 1066 } 1067 1068 /** Return true if the given character (Unicode code point) is alphanumeric. 1069 * @param ch character (Unicode code point) to test 1070 * @return true if the argument is alphanumeric 1071 * @see Character#isAlphabetic(int) 1072 * @see Character#isDigit(int) 1073 */ 1074 public static boolean isAlphanumeric(final int ch) { 1075 return Character.isAlphabetic(ch) || 1076 Character.isDigit(ch); 1077 } 1078 1079 /** Return true if the given character (Unicode code point) is not alphanumeric. 1080 * @param ch character (Unicode code point) to test 1081 * @return true if the argument is not alphanumeric 1082 * @see #isAlphanumeric(int) 1083 */ 1084 public static boolean isNotAlphanumeric(final int ch) { 1085 return !isAlphanumeric(ch); 1086 } 1087 1088 /** Return true if the given character (Unicode code point) can be used as part of 1089 * the string representation of an integer. This will be true for the following types 1090 * of characters: 1091 * <ul> 1092 * <li>{@link Character#isDigit(int) digits}</li> 1093 * <li>the '-' (minus) character</li> 1094 * <li>the '+' (plus) character</li> 1095 * </ul> 1096 * @param ch character (Unicode code point) to test 1097 * @return true if the given character can be used as part of an integer string 1098 */ 1099 public static boolean isIntegerPart(final int ch) { 1100 return Character.isDigit(ch) || 1101 ch == '-' || 1102 ch == '+'; 1103 } 1104 1105 /** Return true if the given character (Unicode code point) can be used as part of 1106 * the string representation of a decimal number. This will be true for the following types 1107 * of characters: 1108 * <ul> 1109 * <li>{@link Character#isDigit(int) digits}</li> 1110 * <li>the '-' (minus) character</li> 1111 * <li>the '+' (plus) character</li> 1112 * <li>the '.' (period) character</li> 1113 * <li>the 'e' character</li> 1114 * <li>the 'E' character</li> 1115 * </ul> 1116 * @param ch character (Unicode code point) to test 1117 * @return true if the given character can be used as part of a decimal number string 1118 */ 1119 public static boolean isDecimalPart(final int ch) { 1120 return Character.isDigit(ch) || 1121 ch == '-' || 1122 ch == '+' || 1123 ch == '.' || 1124 ch == 'e' || 1125 ch == 'E'; 1126 } 1127 1128 /** Test two strings for equality. One or both arguments may be null. 1129 * @param a first string 1130 * @param b second string 1131 * @param caseSensitive comparison is case-sensitive if set to true 1132 * @return true if the string arguments are considered equal 1133 */ 1134 private static boolean stringsEqual(final String a, final String b, final boolean caseSensitive) { 1135 if (a == null) { 1136 return b == null; 1137 } 1138 1139 return caseSensitive ? 1140 a.equals(b) : 1141 a.equalsIgnoreCase(b); 1142 } 1143 1144 /** Internal class used to collect strings from the character stream while ensuring that the 1145 * collected strings do not exceed the maximum configured string length. 1146 */ 1147 private final class StringCollector implements IntPredicate, IntConsumer { 1148 1149 /** String builder instance. */ 1150 private final StringBuilder sb = new StringBuilder(); 1151 1152 /** Start position line. */ 1153 private final int line; 1154 1155 /** Start position column. */ 1156 private final int col; 1157 1158 /** Character predicate. */ 1159 private final IntPredicate pred; 1160 1161 /** Construct a new instance with the given start position and character predicate. 1162 * @param line start position line 1163 * @param col start position col 1164 * @param pred character predicate 1165 */ 1166 StringCollector(final int line, final int col, final IntPredicate pred) { 1167 this.line = line; 1168 this.col = col; 1169 this.pred = pred; 1170 } 1171 1172 /** {@inheritDoc} */ 1173 @Override 1174 public boolean test(final int value) { 1175 return pred.test(value) && !hasExceededMaxStringLength(); 1176 } 1177 1178 /** {@inheritDoc} */ 1179 @Override 1180 public void accept(final int value) { 1181 sb.append((char) value); 1182 } 1183 1184 /** Get the string collected by this instance. 1185 * @return the string collected by this instance 1186 * @throws IllegalStateException if the string exceeds the maximum configured length 1187 */ 1188 public String getString() { 1189 if (hasExceededMaxStringLength()) { 1190 throw parseError(line, col, STRING_LENGTH_ERR_MSG + maxStringLength); 1191 } 1192 1193 return sb.toString(); 1194 } 1195 1196 /** Return true if this collector has exceeded the maximum configured string length. 1197 * @return true if this collector has exceeded the maximum string length 1198 */ 1199 private boolean hasExceededMaxStringLength() { 1200 return sb.length() > maxStringLength; 1201 } 1202 } 1203 }