001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.io.input; 018 019 import java.io.IOException; 020 import java.io.InputStream; 021 import java.io.Reader; 022 import java.nio.ByteBuffer; 023 import java.nio.CharBuffer; 024 import java.nio.charset.Charset; 025 import java.nio.charset.CharsetEncoder; 026 import java.nio.charset.CoderResult; 027 028 /** 029 * {@link InputStream} implementation that reads a character stream from a {@link Reader} 030 * and transforms it to a byte stream using a specified charset encoding. The stream 031 * is transformed using a {@link CharsetEncoder} object, guaranteeing that all charset 032 * encodings supported by the JRE are handled correctly. In particular for charsets such as 033 * UTF-16, the implementation ensures that one and only one byte order marker 034 * is produced. 035 * <p> 036 * Since in general it is not possible to predict the number of characters to be read from the 037 * {@link Reader} to satisfy a read request on the {@link ReaderInputStream}, all reads from 038 * the {@link Reader} are buffered. There is therefore no well defined correlation 039 * between the current position of the {@link Reader} and that of the {@link ReaderInputStream}. 040 * This also implies that in general there is no need to wrap the underlying {@link Reader} 041 * in a {@link java.io.BufferedReader}. 042 * <p> 043 * {@link ReaderInputStream} implements the inverse transformation of {@link java.io.InputStreamReader}; 044 * in the following example, reading from <tt>in2</tt> would return the same byte 045 * sequence as reading from <tt>in</tt> (provided that the initial byte sequence is legal 046 * with respect to the charset encoding): 047 * <pre> 048 * InputStream in = ... 049 * Charset cs = ... 050 * InputStreamReader reader = new InputStreamReader(in, cs); 051 * ReaderInputStream in2 = new ReaderInputStream(reader, cs);</pre> 052 * {@link ReaderInputStream} implements the same transformation as {@link java.io.OutputStreamWriter}, 053 * except that the control flow is reversed: both classes transform a character stream 054 * into a byte stream, but {@link java.io.OutputStreamWriter} pushes data to the underlying stream, 055 * while {@link ReaderInputStream} pulls it from the underlying stream. 056 * <p> 057 * Note that while there are use cases where there is no alternative to using 058 * this class, very often the need to use this class is an indication of a flaw 059 * in the design of the code. This class is typically used in situations where an existing 060 * API only accepts an {@link InputStream}, but where the most natural way to produce the data 061 * is as a character stream, i.e. by providing a {@link Reader} instance. An example of a situation 062 * where this problem may appear is when implementing the {@link javax.activation.DataSource} 063 * interface from the Java Activation Framework. 064 * <p> 065 * Given the fact that the {@link Reader} class doesn't provide any way to predict whether the next 066 * read operation will block or not, it is not possible to provide a meaningful 067 * implementation of the {@link InputStream#available()} method. A call to this method 068 * will always return 0. Also, this class doesn't support {@link InputStream#mark(int)}. 069 * <p> 070 * Instances of {@link ReaderInputStream} are not thread safe. 071 * 072 * @see org.apache.commons.io.output.WriterOutputStream 073 * 074 * @author <a href="mailto:veithen@apache.org">Andreas Veithen</a> 075 * @since Commons IO 2.0 076 */ 077 public class ReaderInputStream extends InputStream { 078 private static final int DEFAULT_BUFFER_SIZE = 1024; 079 080 private final Reader reader; 081 private final CharsetEncoder encoder; 082 083 /** 084 * CharBuffer used as input for the decoder. It should be reasonably 085 * large as we read data from the underlying Reader into this buffer. 086 */ 087 private final CharBuffer encoderIn; 088 089 /** 090 * ByteBuffer used as output for the decoder. This buffer can be small 091 * as it is only used to transfer data from the decoder to the 092 * buffer provided by the caller. 093 */ 094 private final ByteBuffer encoderOut = ByteBuffer.allocate(128); 095 096 private CoderResult lastCoderResult; 097 private boolean endOfInput; 098 099 /** 100 * Construct a new {@link ReaderInputStream}. 101 * 102 * @param reader the target {@link Reader} 103 * @param charset the charset encoding 104 * @param bufferSize the size of the input buffer in number of characters 105 */ 106 public ReaderInputStream(Reader reader, Charset charset, int bufferSize) { 107 this.reader = reader; 108 encoder = charset.newEncoder(); 109 encoderIn = CharBuffer.allocate(bufferSize); 110 encoderIn.flip(); 111 } 112 113 /** 114 * Construct a new {@link ReaderInputStream} with a default input buffer size of 115 * 1024 characters. 116 * 117 * @param reader the target {@link Reader} 118 * @param charset the charset encoding 119 */ 120 public ReaderInputStream(Reader reader, Charset charset) { 121 this(reader, charset, DEFAULT_BUFFER_SIZE); 122 } 123 124 /** 125 * Construct a new {@link ReaderInputStream}. 126 * 127 * @param reader the target {@link Reader} 128 * @param charsetName the name of the charset encoding 129 * @param bufferSize the size of the input buffer in number of characters 130 */ 131 public ReaderInputStream(Reader reader, String charsetName, int bufferSize) { 132 this(reader, Charset.forName(charsetName), bufferSize); 133 } 134 135 /** 136 * Construct a new {@link ReaderInputStream} with a default input buffer size of 137 * 1024 characters. 138 * 139 * @param reader the target {@link Reader} 140 * @param charsetName the name of the charset encoding 141 */ 142 public ReaderInputStream(Reader reader, String charsetName) { 143 this(reader, charsetName, DEFAULT_BUFFER_SIZE); 144 } 145 146 /** 147 * Construct a new {@link ReaderInputStream} that uses the default character encoding 148 * with a default input buffer size of 1024 characters. 149 * 150 * @param reader the target {@link Reader} 151 */ 152 public ReaderInputStream(Reader reader) { 153 this(reader, Charset.defaultCharset()); 154 } 155 156 /** 157 * Read the specified number of bytes into an array. 158 * 159 * @param b the byte array to read into 160 * @param off the offset to start reading bytes into 161 * @param len the number of bytes to read 162 * @return the number of bytes read or <code>-1</code> 163 * if the end of the stream has been reached 164 * @throws IOException if an I/O error occurs 165 */ 166 @Override 167 public int read(byte[] b, int off, int len) throws IOException { 168 int read = 0; 169 while (len > 0) { 170 if (encoderOut.position() > 0) { 171 encoderOut.flip(); 172 int c = Math.min(encoderOut.remaining(), len); 173 encoderOut.get(b, off, c); 174 off += c; 175 len -= c; 176 read += c; 177 encoderOut.compact(); 178 } else { 179 if (!endOfInput && (lastCoderResult == null || lastCoderResult.isUnderflow())) { 180 encoderIn.compact(); 181 int position = encoderIn.position(); 182 // We don't use Reader#read(CharBuffer) here because it is more efficient 183 // to write directly to the underlying char array (the default implementation 184 // copies data to a temporary char array). 185 int c = reader.read(encoderIn.array(), position, encoderIn.remaining()); 186 if (c == -1) { 187 endOfInput = true; 188 } else { 189 encoderIn.position(position+c); 190 } 191 encoderIn.flip(); 192 } 193 lastCoderResult = encoder.encode(encoderIn, encoderOut, endOfInput); 194 if (endOfInput && encoderOut.position() == 0) { 195 break; 196 } 197 } 198 } 199 return read == 0 && endOfInput ? -1 : read; 200 } 201 202 /** 203 * Read the specified number of bytes into an array. 204 * 205 * @param b the byte array to read into 206 * @return the number of bytes read or <code>-1</code> 207 * if the end of the stream has been reached 208 * @throws IOException if an I/O error occurs 209 */ 210 @Override 211 public int read(byte[] b) throws IOException { 212 return read(b, 0, b.length); 213 } 214 215 /** 216 * Read a single byte. 217 * 218 * @return either the byte read or <code>-1</code> if the end of the stream 219 * has been reached 220 * @throws IOException if an I/O error occurs 221 */ 222 @Override 223 public int read() throws IOException { 224 byte[] b = new byte[1]; 225 return read(b) == -1 ? -1 : b[0] & 0xFF; 226 } 227 228 /** 229 * Close the stream. This method will cause the underlying {@link Reader} 230 * to be closed. 231 * @throws IOException if an I/O error occurs 232 */ 233 @Override 234 public void close() throws IOException { 235 reader.close(); 236 } 237 }