Reader.Options
Modifier and Type | Field and Description |
---|---|
protected int |
bufferSize |
protected CompressionCodec |
codec |
protected CompressionKind |
compressionKind |
protected org.apache.hadoop.conf.Configuration |
conf |
protected org.apache.hadoop.fs.FileSystem |
fileSystem |
protected OrcProto.Footer |
footer |
protected org.apache.hadoop.fs.Path |
path |
Constructor and Description |
---|
ReaderImpl(org.apache.hadoop.fs.Path path,
OrcFile.ReaderOptions options)
Constructor that let's the user specify additional options.
|
Modifier and Type | Method and Description |
---|---|
CompressionKind |
getCompression()
Get the compression kind.
|
int |
getCompressionSize()
Get the buffer size for the compression.
|
long |
getContentLength()
Get the length of the file.
|
org.apache.hadoop.hive.ql.io.orc.ReaderImpl.FileMetaInfo |
getFileMetaInfo() |
OrcFile.Version |
getFileVersion()
Get the file format version.
|
Metadata |
getMetadata()
Get the metadata information like stripe level column statistics etc.
|
List<String> |
getMetadataKeys()
Get the user metadata keys.
|
ByteBuffer |
getMetadataValue(String key)
Get a user metadata value.
|
long |
getNumberOfRows()
Get the number of rows in the file.
|
ObjectInspector |
getObjectInspector()
Get the object inspector for looking at the objects.
|
List<OrcProto.UserMetadataItem> |
getOrcProtoUserMetadata() |
long |
getRawDataSize()
Get the deserialized data size of the file
|
long |
getRawDataSizeOfColumns(List<String> colNames)
Get the deserialized data size of the specified columns
|
int |
getRowIndexStride()
Get the number of rows per a entry in the row index.
|
ColumnStatistics[] |
getStatistics()
Get the statistics about the columns in the file.
|
List<StripeInformation> |
getStripes()
Get the list of stripes.
|
List<OrcProto.Type> |
getTypes()
Get the list of types contained in the file.
|
OrcFile.WriterVersion |
getWriterVersion()
Get the version of the writer of this file.
|
boolean |
hasMetadataValue(String key)
Did the user set the given metadata value.
|
MetadataReader |
metadata() |
RecordReader |
rows()
Create a RecordReader that reads everything with the default options.
|
RecordReader |
rows(boolean[] include)
Create a RecordReader that will scan the entire file.
|
RecordReader |
rows(long offset,
long length,
boolean[] include)
Create a RecordReader that will start reading at the first stripe after
offset up to the stripe that starts at offset + length.
|
RecordReader |
rows(long offset,
long length,
boolean[] include,
SearchArgument sarg,
String[] columnNames)
Create a RecordReader that will read a section of a file.
|
RecordReader |
rowsOptions(Reader.Options options)
Create a RecordReader that uses the options given.
|
protected final org.apache.hadoop.fs.FileSystem fileSystem
protected final org.apache.hadoop.fs.Path path
protected final CompressionKind compressionKind
protected final CompressionCodec codec
protected final int bufferSize
protected final OrcProto.Footer footer
protected final org.apache.hadoop.conf.Configuration conf
public ReaderImpl(org.apache.hadoop.fs.Path path, OrcFile.ReaderOptions options) throws IOException
path
- pathname for fileoptions
- options for readingIOException
public long getNumberOfRows()
Reader
getNumberOfRows
in interface Reader
public List<String> getMetadataKeys()
Reader
getMetadataKeys
in interface Reader
public ByteBuffer getMetadataValue(String key)
Reader
getMetadataValue
in interface Reader
key
- a key given by the userpublic boolean hasMetadataValue(String key)
Reader
hasMetadataValue
in interface Reader
key
- the key to checkpublic CompressionKind getCompression()
Reader
getCompression
in interface Reader
public int getCompressionSize()
Reader
getCompressionSize
in interface Reader
public List<StripeInformation> getStripes()
Reader
getStripes
in interface Reader
public ObjectInspector getObjectInspector()
Reader
getObjectInspector
in interface Reader
public long getContentLength()
Reader
getContentLength
in interface Reader
public List<OrcProto.Type> getTypes()
Reader
public OrcFile.Version getFileVersion()
Reader
getFileVersion
in interface Reader
public OrcFile.WriterVersion getWriterVersion()
Reader
getWriterVersion
in interface Reader
public int getRowIndexStride()
Reader
getRowIndexStride
in interface Reader
public ColumnStatistics[] getStatistics()
Reader
getStatistics
in interface Reader
public org.apache.hadoop.hive.ql.io.orc.ReaderImpl.FileMetaInfo getFileMetaInfo()
public RecordReader rows() throws IOException
Reader
rows
in interface Reader
IOException
public RecordReader rowsOptions(Reader.Options options) throws IOException
Reader
rowsOptions
in interface Reader
options
- the options to read withIOException
public RecordReader rows(boolean[] include) throws IOException
Reader
rows
in interface Reader
include
- true for each column that should be includedIOException
public RecordReader rows(long offset, long length, boolean[] include) throws IOException
Reader
rows
in interface Reader
offset
- a byte offset in the filelength
- a number of bytes in the fileinclude
- true for each column that should be includedIOException
public RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] columnNames) throws IOException
Reader
rows
in interface Reader
offset
- the minimum offset of the first stripe to readlength
- the distance from offset of the first address to stop reading
atinclude
- true for each column that should be includedsarg
- a search argument that limits the rows that should be read.columnNames
- the names of the included columnsIOException
public long getRawDataSize()
Reader
getRawDataSize
in interface Reader
public long getRawDataSizeOfColumns(List<String> colNames)
Reader
getRawDataSizeOfColumns
in interface Reader
public Metadata getMetadata() throws IOException
Reader
getMetadata
in interface Reader
IOException
public List<OrcProto.UserMetadataItem> getOrcProtoUserMetadata()
public MetadataReader metadata() throws IOException
metadata
in interface Reader
IOException
Copyright © 2017 The Apache Software Foundation. All rights reserved.