public class VectorizedParquetRecordReader extends ParquetRecordReaderBase implements org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>, RowPositionAwareVectorizedRecordReader
Modifier and Type | Field and Description |
---|---|
protected org.apache.parquet.schema.MessageType |
fileSchema |
static org.slf4j.Logger |
LOG |
protected org.apache.parquet.schema.MessageType |
requestedSchema |
protected long |
totalRowCount
The total number of rows this RecordReader will eventually read.
|
filePath, fileSplit, filteredBlocks, jobConf, legacyConversionEnabled, parquetInputSplit, parquetMetadata, projectionPusher, reader, schemaSize, serDeStats, skipProlepticConversion, skipTimestampConversion
Constructor and Description |
---|
VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf) |
VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf,
FileMetadataCache metadataCache,
DataCache dataCache,
org.apache.hadoop.conf.Configuration cacheConf) |
VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit,
org.apache.hadoop.mapred.JobConf conf,
FileMetadataCache metadataCache,
DataCache dataCache,
org.apache.hadoop.conf.Configuration cacheConf,
org.apache.parquet.hadoop.metadata.ParquetMetadata parquetMetadata) |
Modifier and Type | Method and Description |
---|---|
static CacheTag |
cacheTagOfParquetFile(org.apache.hadoop.fs.Path path,
org.apache.hadoop.conf.Configuration cacheConf,
org.apache.hadoop.mapred.JobConf jobConf) |
void |
close() |
org.apache.hadoop.io.NullWritable |
createKey() |
VectorizedRowBatch |
createValue() |
protected org.apache.parquet.hadoop.metadata.ParquetMetadata |
getParquetMetadata(org.apache.hadoop.fs.Path path,
org.apache.hadoop.mapred.JobConf conf) |
long |
getPos() |
float |
getProgress() |
long |
getRowNumber()
Returns the row position (in the file) of the first row in the last returned batch.
|
void |
initialize(org.apache.parquet.hadoop.ParquetInputSplit split,
org.apache.hadoop.mapred.JobConf configuration) |
boolean |
next(org.apache.hadoop.io.NullWritable nullWritable,
VectorizedRowBatch vectorizedRowBatch) |
getFilteredBlocks, getSplit, getStats, setFilter, setupMetadataAndParquetSplit
public static final org.slf4j.Logger LOG
protected org.apache.parquet.schema.MessageType fileSchema
protected org.apache.parquet.schema.MessageType requestedSchema
protected long totalRowCount
public VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit, org.apache.hadoop.mapred.JobConf conf) throws IOException
IOException
public VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit, org.apache.hadoop.mapred.JobConf conf, FileMetadataCache metadataCache, DataCache dataCache, org.apache.hadoop.conf.Configuration cacheConf, org.apache.parquet.hadoop.metadata.ParquetMetadata parquetMetadata) throws IOException
IOException
public VectorizedParquetRecordReader(org.apache.hadoop.mapred.InputSplit oldInputSplit, org.apache.hadoop.mapred.JobConf conf, FileMetadataCache metadataCache, DataCache dataCache, org.apache.hadoop.conf.Configuration cacheConf) throws IOException
IOException
protected org.apache.parquet.hadoop.metadata.ParquetMetadata getParquetMetadata(org.apache.hadoop.fs.Path path, org.apache.hadoop.mapred.JobConf conf) throws IOException
getParquetMetadata
in class ParquetRecordReaderBase
IOException
public void initialize(org.apache.parquet.hadoop.ParquetInputSplit split, org.apache.hadoop.mapred.JobConf configuration) throws IOException, InterruptedException, HiveException
public static CacheTag cacheTagOfParquetFile(org.apache.hadoop.fs.Path path, org.apache.hadoop.conf.Configuration cacheConf, org.apache.hadoop.mapred.JobConf jobConf)
public boolean next(org.apache.hadoop.io.NullWritable nullWritable, VectorizedRowBatch vectorizedRowBatch) throws IOException
next
in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>
IOException
public org.apache.hadoop.io.NullWritable createKey()
createKey
in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>
public VectorizedRowBatch createValue()
createValue
in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>
public long getPos() throws IOException
getPos
in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>
IOException
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
close
in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>
IOException
public float getProgress() throws IOException
getProgress
in interface org.apache.hadoop.mapred.RecordReader<org.apache.hadoop.io.NullWritable,VectorizedRowBatch>
IOException
public long getRowNumber() throws IOException
RowPositionAwareVectorizedRecordReader
getRowNumber
in interface RowPositionAwareVectorizedRecordReader
IOException
Copyright © 2023 The Apache Software Foundation. All rights reserved.