001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     * 
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     * 
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.io.input;
018    
019    import java.io.IOException;
020    import java.io.InputStream;
021    import java.io.Reader;
022    import java.nio.ByteBuffer;
023    import java.nio.CharBuffer;
024    import java.nio.charset.Charset;
025    import java.nio.charset.CharsetEncoder;
026    import java.nio.charset.CoderResult;
027    
028    /**
029     * {@link InputStream} implementation that reads a character stream from a {@link Reader}
030     * and transforms it to a byte stream using a specified charset encoding. The stream
031     * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset
032     * encodings supported by the JRE are handled correctly. In particular for charsets such as
033     * UTF-16, the implementation ensures that one and only one byte order marker
034     * is produced.
035     * <p>
036     * Since in general it is not possible to predict the number of characters to be read from the
037     * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from
038     * the {@link Reader} are buffered. There is therefore no well defined correlation
039     * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}.
040     * This also implies that in general there is no need to wrap the underlying {@link Reader}
041     * in a {@link java.io.BufferedReader}.
042     * <p>
043     * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader};
044     * in the following example, reading from <tt>in2</tt> would return the same byte
045     * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal
046     * with respect to the charset encoding):
047     * <pre>
048     * InputStream in = ...
049     * Charset cs = ...
050     * InputStreamReader reader = new InputStreamReader(in, cs);
051     * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre>
052     * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter},
053     * except that the control flow is reversed: both classes transform a character stream
054     * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream,
055     * while {@link ReaderInputStream} pulls it from the underlying stream.
056     * <p>
057     * Note that while there are use cases where there is no alternative to using
058     * this class, very often the need to use this class is an indication of a flaw
059     * in the design of the code. This class is typically used in situations where an existing
060     * API only accepts an {@link InputStream}, but where the most natural way to produce the data
061     * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation
062     * where this problem may appear is when implementing the {@link javax.activation.DataSource}
063     * interface from the Java Activation Framework.
064     * <p>
065     * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next
066     * read operation will block or not, it is not possible to provide a meaningful
067     * implementation of the {@link InputStream#available()} method. A call to this method
068     * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}.
069     * <p>
070     * Instances of {@link ReaderInputStream} are not thread safe.
071     * 
072     * @see org.apache.commons.io.output.WriterOutputStream
073     * 
074     * @author <a href="mailto:veithen@apache.org">Andreas Veithen</a>
075     * @since Commons IO 2.0
076     */
077    public class ReaderInputStream extends InputStream {
078        private static final int DEFAULT_BUFFER_SIZE = 1024;
079    
080        private final Reader reader;
081        private final CharsetEncoder encoder;
082    
083        /**
084         * CharBuffer used as input for the decoder. It should be reasonably
085         * large as we read data from the underlying Reader into this buffer.
086         */
087        private final CharBuffer encoderIn;
088    
089        /**
090         * ByteBuffer used as output for the decoder. This buffer can be small
091         * as it is only used to transfer data from the decoder to the
092         * buffer provided by the caller.
093         */
094        private final ByteBuffer encoderOut = ByteBuffer.allocate(128);
095    
096        private CoderResult lastCoderResult;
097        private boolean endOfInput;
098    
099        /**
100         * Construct a new {@link ReaderInputStream}.
101         * 
102         * @param reader the target {@link Reader}
103         * @param charset the charset encoding
104         * @param bufferSize the size of the input buffer in number of characters
105         */
106        public ReaderInputStream(Reader reader, Charset charset, int bufferSize) {
107            this.reader = reader;
108            encoder = charset.newEncoder();
109            encoderIn = CharBuffer.allocate(bufferSize);
110            encoderIn.flip();
111        }
112    
113        /**
114         * Construct a new {@link ReaderInputStream} with a default input buffer size of
115         * 1024 characters.
116         * 
117         * @param reader the target {@link Reader}
118         * @param charset the charset encoding
119         */
120        public ReaderInputStream(Reader reader, Charset charset) {
121            this(reader, charset, DEFAULT_BUFFER_SIZE);
122        }
123    
124        /**
125         * Construct a new {@link ReaderInputStream}.
126         * 
127         * @param reader the target {@link Reader}
128         * @param charsetName the name of the charset encoding
129         * @param bufferSize the size of the input buffer in number of characters
130         */
131        public ReaderInputStream(Reader reader, String charsetName, int bufferSize) {
132            this(reader, Charset.forName(charsetName), bufferSize);
133        }
134    
135        /**
136         * Construct a new {@link ReaderInputStream} with a default input buffer size of
137         * 1024 characters.
138         * 
139         * @param reader the target {@link Reader}
140         * @param charsetName the name of the charset encoding
141         */
142        public ReaderInputStream(Reader reader, String charsetName) {
143            this(reader, charsetName, DEFAULT_BUFFER_SIZE);
144        }
145    
146        /**
147         * Construct a new {@link ReaderInputStream} that uses the default character encoding
148         * with a default input buffer size of 1024 characters.
149         * 
150         * @param reader the target {@link Reader}
151         */
152        public ReaderInputStream(Reader reader) {
153            this(reader, Charset.defaultCharset());
154        }
155    
156        /**
157         * Read the specified number of bytes into an array.
158         * 
159         * @param b the byte array to read into
160         * @param off the offset to start reading bytes into
161         * @param len the number of bytes to read
162         * @return the number of bytes read or <code>-1</code>
163         *         if the end of the stream has been reached
164         * @throws IOException if an I/O error occurs
165         */
166        @Override
167        public int read(byte[] b, int off, int len) throws IOException {
168            int read = 0;
169            while (len > 0) {
170                if (encoderOut.position() > 0) {
171                    encoderOut.flip();
172                    int c = Math.min(encoderOut.remaining(), len);
173                    encoderOut.get(b, off, c);
174                    off += c;
175                    len -= c;
176                    read += c;
177                    encoderOut.compact();
178                } else {
179                    if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) {
180                        encoderIn.compact();
181                        int position = encoderIn.position();
182                        // We don't use Reader#read(CharBuffer) here because it is more efficient
183                        // to write directly to the underlying char array (the default implementation
184                        // copies data to a temporary char array).
185                        int c = reader.read(encoderIn.array(), position, encoderIn.remaining());
186                        if (c == -1) {
187                            endOfInput = true;
188                        } else {
189                            encoderIn.position(position+c);
190                        }
191                        encoderIn.flip();
192                    }
193                    lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput);
194                    if (endOfInput && encoderOut.position() == 0) {
195                        break;
196                    }
197                }
198            }
199            return read == 0 && endOfInput ? -1 : read;
200        }
201    
202        /**
203         * Read the specified number of bytes into an array.
204         * 
205         * @param b the byte array to read into
206         * @return the number of bytes read or <code>-1</code>
207         *         if the end of the stream has been reached
208         * @throws IOException if an I/O error occurs
209         */
210        @Override
211        public int read(byte[] b) throws IOException {
212            return read(b, 0, b.length);
213        }
214    
215        /**
216         * Read a single byte.
217         *
218         * @return either the byte read or <code>-1</code> if the end of the stream
219         *         has been reached
220         * @throws IOException if an I/O error occurs
221         */
222        @Override
223        public int read() throws IOException {
224            byte[] b = new byte[1];
225            return read(b) == -1 ? -1 : b[0] & 0xFF;
226        }
227    
228        /**
229         * Close the stream. This method will cause the underlying {@link Reader}
230         * to be closed.
231         * @throws IOException if an I/O error occurs
232         */
233        @Override
234        public void close() throws IOException {
235            reader.close();
236        }
237    }