TextFacetDefinitionReader.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.geometry.io.euclidean.threed.txt;

import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.commons.geometry.euclidean.threed.Vector3D;
import org.apache.commons.geometry.io.core.internal.GeometryIOUtils;
import org.apache.commons.geometry.io.core.internal.SimpleTextParser;
import org.apache.commons.geometry.io.euclidean.threed.FacetDefinition;
import org.apache.commons.geometry.io.euclidean.threed.FacetDefinitionReader;
import org.apache.commons.geometry.io.euclidean.threed.SimpleFacetDefinition;

/** Facet definition reader implementation that reads an extremely simple
 * text format. The format simply consists of sequences of decimal numbers
 * defining the vertices of each facet, with one facet defined per line.
 * Facet vertices are defined by listing their {@code x}, {@code y}, and {@code z}
 * components in that order. The format can be described as follows:
 * <p>
 * <code>
 *      p1<sub>x</sub> p1<sub>y</sub> p1<sub>z</sub> p2<sub>x</sub> p2<sub>y</sub> p2<sub>z</sub> p3<sub>x</sub> p3<sub>y</sub> p3<sub>z</sub> ...
 * </code>
 * </p>
 * <p>where the <em>p1</em> elements contain the coordinates of the first facet vertex,
 * <em>p2</em> those of the second, and so on. At least 3 vertices are required for each
 * facet but more can be specified as long as all {@code x, y, z} components are provided
 * for each vertex. The facet normal is defined implicitly from the facet vertices using
 * the right-hand rule (i.e. vertices are arranged counter-clockwise).</p>
 *
 * <p><strong>Delimiters</strong></p>
 * <p>Vertex coordinate values may be separated by any character that is
 * not a digit, alphabetic, '-' (minus), or '+' (plus). The character does
 * not need to be consistent between (or even within) lines and does not
 * need to be configured in the reader. This design provides configuration-free
 * support for common formats such as CSV as well as other formats designed
 * for human readability.</p>
 *
 * <p><strong>Comments</strong></p>
 * <p>Comments are supported through use of the {@link #getCommentToken() comment token}
 * property. Characters from the comment token through the end of the current line are
 * discarded. Setting the comment token to null or the empty string disables comment parsing.
 * The default comment token is {@value #DEFAULT_COMMENT_TOKEN}</p>
 *
 * <p><strong>Examples</strong></p>
 * <p>The following examples demonstrate the definition of two facets,
 * one with 3 vertices and one with 4 vertices, in different formats.</p>
 * <p><em>CSV</em></p>
 * <pre>
 *  0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0
 *  1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0
 * </pre>
 * <p><em>Whitespace and semicolons</em></p>
 * <pre>
 *  # line comment
 *  0 0 0; 1 0 0; 1 1 0 # 3 vertices
 *  1 0 0; 1 1 0; 1 1 1; 1 0 1 # 4 vertices
 * </pre>
 *
 * @see TextFacetDefinitionWriter
 */
public class TextFacetDefinitionReader implements FacetDefinitionReader {

    /** Default comment token string. */
    public static final String DEFAULT_COMMENT_TOKEN = "#";

    /** Reader for accessing the character stream. */
    private final Reader reader;

    /** Parser used to parse text content. */
    private final SimpleTextParser parser;

    /** Comment token string; may be null. */
    private String commentToken;

    /** True if the instance has a non-null, non-empty comment token. */
    private boolean hasCommentToken;

    /** First character of the comment token. */
    private int commentStartChar;

    /** Construct a new instance that reads characters from the argument and uses
     * the default comment token value of {@value TextFacetDefinitionReader#DEFAULT_COMMENT_TOKEN}.
     * @param reader reader to read characters from
     */
    public TextFacetDefinitionReader(final Reader reader) {
        this(reader, DEFAULT_COMMENT_TOKEN);
    }

    /** Construct a new instance with the given reader and comment token.
     * @param reader reader to read characters from
     * @param commentToken comment token string; set to null to disable comment parsing
     * @throws IllegalArgumentException if {@code commentToken} is non-null and contains whitespace
     */
    public TextFacetDefinitionReader(final Reader reader, final String commentToken) {
        this.reader = reader;
        this.parser = new SimpleTextParser(reader);

        setCommentTokenInternal(commentToken);
    }

    /** Get the comment token string. If not null or empty, any characters from
     * this token to the end of the current line are discarded during parsing.
     * @return comment token string; may be null
     */
    public String getCommentToken() {
        return commentToken;
    }

    /** Set the comment token string. If not null or empty, any characters from this
     * token to the end of the current line are discarded during parsing. Set to null
     * or the empty string to disable comment parsing. Comment tokens may not contain
     * whitespace.
     * @param commentToken token to set
     * @throws IllegalArgumentException if the argument is non-null and contains whitespace
     */
    public void setCommentToken(final String commentToken) {
        setCommentTokenInternal(commentToken);
    }

    /** {@inheritDoc} */
    @Override
    public FacetDefinition readFacet() {
        discardNonDataLines();
        if (parser.hasMoreCharacters()) {
            try {
                return readFacetInternal();
            } finally {
                // advance to the next line even if parsing failed for the
                // current line
                parser.discardLine();
            }
        }
        return null;
    }

    /** {@inheritDoc} */
    @Override
    public void close() {
        GeometryIOUtils.closeUnchecked(reader);
    }

    /** Internal method to read a facet definition starting from the current parser
     * position. Empty lines (including lines containing only comments) are discarded.
     * @return facet definition or null if the end of input is reached
     * @throws IllegalStateException if a data format error occurs
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    private FacetDefinition readFacetInternal() {
        final Vector3D p1 = readVector();
        discardNonData();
        final Vector3D p2 = readVector();
        discardNonData();
        final Vector3D p3 = readVector();

        final List<Vector3D> vertices;

        discardNonData();
        if (parser.hasMoreCharactersOnLine()) {
            vertices = new ArrayList<>();
            vertices.add(p1);
            vertices.add(p2);
            vertices.add(p3);

            do {
                vertices.add(readVector());
                discardNonData();
            } while (parser.hasMoreCharactersOnLine());
        } else {
            vertices = Arrays.asList(p1, p2, p3);
        }

        return new SimpleFacetDefinition(vertices);
    }

    /** Read a vector starting from the current parser position.
     * @return vector read from the parser
     * @throws IllegalStateException if a data format error occurs
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    private Vector3D readVector() {
        final double x = readDouble();
        discardNonData();
        final double y = readDouble();
        discardNonData();
        final double z = readDouble();

        return Vector3D.of(x, y, z);
    }

    /** Read a double starting from the current parser position.
     * @return double value read from the parser
     * @throws IllegalStateException if a data format error occurs
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    private double readDouble() {
        return parser
                .next(TextFacetDefinitionReader::isDataTokenPart)
                .getCurrentTokenAsDouble();
    }

    /** Discard lines that do not contain any data. This includes empty lines
     * and lines that only contain comments.
     * @throws IllegalStateException if a data format error occurs
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    private void discardNonDataLines() {
        parser.discardLineWhitespace();
        while (parser.hasMoreCharacters() &&
                (!parser.hasMoreCharactersOnLine() ||
                foundComment())) {

            parser
                .discardLine()
                .discardLineWhitespace();
        }
    }

    /** Discard a sequence of non-data characters on the current line starting
     * from the current parser position.
     * @throws IllegalStateException if a data format error occurs
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    private void discardNonData() {
        parser.discard(c ->
            !SimpleTextParser.isNewLinePart(c) &&
            !isDataTokenPart(c) &&
            c != commentStartChar);

        if (foundComment()) {
            // discard everything to the end of the line but do
            // not read the new line sequence
            parser.discard(SimpleTextParser::isNotNewLinePart);
        }
    }

    /** Return true if the parser is positioned at the start of the comment token.
     * @return true if the parser is positioned at the start of the comment token.
     * @throws IllegalStateException if a data format error occurs
     * @throws java.io.UncheckedIOException if an I/O error occurs
     */
    private boolean foundComment() {
        return hasCommentToken &&
                commentToken.equals(parser.peek(commentToken.length()));
    }

    /** Internal method called to set the comment token state.
     * @param commentTokenStr comment token to set
     * @throws IllegalArgumentException if the argument is non-null and contains whitespace
     */
    private void setCommentTokenInternal(final String commentTokenStr) {
        if (commentTokenStr != null && containsWhitespace(commentTokenStr)) {
            throw new IllegalArgumentException("Comment token cannot contain whitespace; was [" +
                    commentTokenStr + "]");
        }

        this.commentToken = commentTokenStr;
        this.hasCommentToken = commentTokenStr != null && commentTokenStr.length() > 0;
        this.commentStartChar = this.hasCommentToken ?
                commentTokenStr.charAt(0) :
                -1;
    }

    /** Return true if the given character is considered as part of a data token
     * for this reader.
     * @param ch character to test
     * @return true if {@code ch} is part of a data token
     */
    private static boolean isDataTokenPart(final int ch) {
        // include all alphabetic characters in the data tokens, which will help
        // to provide better error messages in case of failure (ie, tokens will
        // be split more naturally)
        return Character.isAlphabetic(ch) ||
                SimpleTextParser.isDecimalPart(ch);
    }

    /** Return true if the given string contains any whitespace characters.
     * @param str string to test
     * @return true if {@code str} contains any whitespace characters
     */
    private static boolean containsWhitespace(final String str) {
        for (final char ch : str.toCharArray()) {
            if (Character.isWhitespace(ch)) {
                return true;
            }
        }

        return false;
    }
}