public class ReaderImpl extends Object implements Reader
Modifier and Type | Class and Description |
---|---|
static class |
ReaderImpl.StripeInformationImpl |
Reader.Options
Modifier and Type | Field and Description |
---|---|
protected int |
bufferSize |
protected CompressionCodec |
codec |
protected CompressionKind |
compressionKind |
protected org.apache.hadoop.conf.Configuration |
conf |
protected org.apache.hadoop.fs.FileSystem |
fileSystem |
protected OrcProto.Metadata |
metadata |
protected org.apache.hadoop.fs.Path |
path |
protected int |
rowIndexStride |
protected OrcTail |
tail |
protected List<OrcProto.Type> |
types |
Constructor and Description |
---|
ReaderImpl(org.apache.hadoop.fs.Path path,
OrcFile.ReaderOptions options)
Constructor that let's the user specify additional options.
|
Modifier and Type | Method and Description |
---|---|
protected static void |
checkOrcVersion(org.slf4j.Logger log,
org.apache.hadoop.fs.Path path,
List<Integer> version)
Check to see if this ORC file is from a future version and if so,
warn the user that we may not be able to read all of the column encodings.
|
protected static void |
ensureOrcFooter(ByteBuffer buffer,
int psLen)
Ensure this is an ORC file to prevent users from trying to read text
files or RC files as ORC files.
|
protected static void |
ensureOrcFooter(org.apache.hadoop.fs.FSDataInputStream in,
org.apache.hadoop.fs.Path path,
int psLen,
ByteBuffer buffer)
Ensure this is an ORC file to prevent users from trying to read text
files or RC files as ORC files.
|
static OrcTail |
extractFileTail(ByteBuffer buffer) |
static OrcTail |
extractFileTail(ByteBuffer buffer,
long fileLength,
long modificationTime) |
protected OrcTail |
extractFileTail(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path path,
long maxFileLength) |
static OrcProto.Metadata |
extractMetadata(ByteBuffer bb,
int metadataAbsPos,
int metadataSize,
CompressionCodec codec,
int bufferSize) |
CompressionKind |
getCompressionKind()
Get the compression kind.
|
int |
getCompressionSize()
Get the buffer size for the compression.
|
long |
getContentLength()
Get the length of the file.
|
OrcProto.FileTail |
getFileTail()
Get the file tail (footer + postscript)
|
OrcFile.Version |
getFileVersion()
Get the file format version.
|
List<String> |
getMetadataKeys()
Get the user metadata keys.
|
int |
getMetadataSize() |
ByteBuffer |
getMetadataValue(String key)
Get a user metadata value.
|
long |
getNumberOfRows()
Get the number of rows in the file.
|
List<OrcProto.ColumnStatistics> |
getOrcProtoFileStatistics() |
List<OrcProto.StripeStatistics> |
getOrcProtoStripeStatistics() |
List<OrcProto.UserMetadataItem> |
getOrcProtoUserMetadata() |
long |
getRawDataSize()
Get the deserialized data size of the file
|
long |
getRawDataSizeFromColIndices(List<Integer> colIndices)
Get the deserialized data size of the specified columns ids
|
static long |
getRawDataSizeFromColIndices(List<Integer> colIndices,
List<OrcProto.Type> types,
List<OrcProto.ColumnStatistics> stats) |
long |
getRawDataSizeOfColumns(List<String> colNames)
Get the deserialized data size of the specified columns
|
int |
getRowIndexStride()
Get the number of rows per a entry in the row index.
|
TypeDescription |
getSchema()
Get the type of rows in this ORC file.
|
ByteBuffer |
getSerializedFileFooter() |
ColumnStatistics[] |
getStatistics()
Get the statistics about the columns in the file.
|
List<StripeInformation> |
getStripes()
Get the list of stripes.
|
List<StripeStatistics> |
getStripeStatistics() |
List<OrcProto.Type> |
getTypes()
Get the list of types contained in the file.
|
List<Integer> |
getVersionList() |
OrcFile.WriterVersion |
getWriterVersion()
Get the version of the writer of this file.
|
static OrcFile.WriterVersion |
getWriterVersion(int writerVersion)
Get the WriterVersion based on the ORC file postscript.
|
boolean |
hasMetadataValue(String key)
Did the user set the given metadata value.
|
RecordReader |
rows()
Create a RecordReader that reads everything with the default options.
|
RecordReader |
rows(Reader.Options options)
Create a RecordReader that uses the options given.
|
String |
toString() |
protected final org.apache.hadoop.fs.FileSystem fileSystem
protected final org.apache.hadoop.fs.Path path
protected final CompressionKind compressionKind
protected CompressionCodec codec
protected int bufferSize
protected OrcProto.Metadata metadata
protected final List<OrcProto.Type> types
protected final int rowIndexStride
protected final org.apache.hadoop.conf.Configuration conf
protected OrcTail tail
public ReaderImpl(org.apache.hadoop.fs.Path path, OrcFile.ReaderOptions options) throws IOException
path
- pathname for fileoptions
- options for readingIOException
public long getNumberOfRows()
Reader
getNumberOfRows
in interface Reader
public List<String> getMetadataKeys()
Reader
getMetadataKeys
in interface Reader
public ByteBuffer getMetadataValue(String key)
Reader
getMetadataValue
in interface Reader
key
- a key given by the userpublic boolean hasMetadataValue(String key)
Reader
hasMetadataValue
in interface Reader
key
- the key to checkpublic CompressionKind getCompressionKind()
Reader
getCompressionKind
in interface Reader
public int getCompressionSize()
Reader
getCompressionSize
in interface Reader
public List<StripeInformation> getStripes()
Reader
getStripes
in interface Reader
public long getContentLength()
Reader
getContentLength
in interface Reader
public List<OrcProto.Type> getTypes()
Reader
public OrcFile.Version getFileVersion()
Reader
getFileVersion
in interface Reader
public OrcFile.WriterVersion getWriterVersion()
Reader
getWriterVersion
in interface Reader
public OrcProto.FileTail getFileTail()
Reader
getFileTail
in interface Reader
public int getRowIndexStride()
Reader
getRowIndexStride
in interface Reader
public ColumnStatistics[] getStatistics()
Reader
getStatistics
in interface Reader
public TypeDescription getSchema()
Reader
protected static void ensureOrcFooter(org.apache.hadoop.fs.FSDataInputStream in, org.apache.hadoop.fs.Path path, int psLen, ByteBuffer buffer) throws IOException
in
- the file being readpath
- the filename for error messagespsLen
- the postscript lengthbuffer
- the tail of the fileIOException
protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException
psLen
- the postscript lengthbuffer
- the tail of the fileIOException
protected static void checkOrcVersion(org.slf4j.Logger log, org.apache.hadoop.fs.Path path, List<Integer> version)
log
- the logger to write any error message topath
- the data source path for error messagesversion
- the version of hive that wrote the file.public static OrcFile.WriterVersion getWriterVersion(int writerVersion)
writerVersion
- the integer writer versionpublic static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos, int metadataSize, CompressionCodec codec, int bufferSize) throws IOException
IOException
public static OrcTail extractFileTail(ByteBuffer buffer) throws IOException
IOException
public static OrcTail extractFileTail(ByteBuffer buffer, long fileLength, long modificationTime) throws IOException
IOException
protected OrcTail extractFileTail(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path path, long maxFileLength) throws IOException
IOException
public ByteBuffer getSerializedFileFooter()
getSerializedFileFooter
in interface Reader
public RecordReader rows() throws IOException
Reader
rows
in interface Reader
IOException
public RecordReader rows(Reader.Options options) throws IOException
Reader
rows
in interface Reader
options
- the options to read withIOException
public long getRawDataSize()
Reader
getRawDataSize
in interface Reader
public long getRawDataSizeFromColIndices(List<Integer> colIndices)
Reader
getRawDataSizeFromColIndices
in interface Reader
colIndices
- - internal column id (check orcfiledump for column ids)public static long getRawDataSizeFromColIndices(List<Integer> colIndices, List<OrcProto.Type> types, List<OrcProto.ColumnStatistics> stats)
public long getRawDataSizeOfColumns(List<String> colNames)
Reader
getRawDataSizeOfColumns
in interface Reader
public List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics()
getOrcProtoStripeStatistics
in interface Reader
public List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics()
getOrcProtoFileStatistics
in interface Reader
public List<StripeStatistics> getStripeStatistics() throws IOException
getStripeStatistics
in interface Reader
IOException
public List<OrcProto.UserMetadataItem> getOrcProtoUserMetadata()
public List<Integer> getVersionList()
getVersionList
in interface Reader
public int getMetadataSize()
getMetadataSize
in interface Reader
Copyright © 2016 The Apache Software Foundation. All rights reserved.