public abstract class AbstractRecordWriter extends Object implements RecordWriter
Modifier and Type | Class and Description |
---|---|
protected static class |
AbstractRecordWriter.OrcMemoryPressureMonitor |
Constructor and Description |
---|
AbstractRecordWriter(String lineDelimiter) |
Modifier and Type | Method and Description |
---|---|
protected void |
checkAutoFlush() |
void |
close()
Close the RecordUpdater.
|
protected RecordUpdater |
createRecordUpdater(org.apache.hadoop.fs.Path partitionPath,
int bucketId,
Long minWriteId,
Long maxWriteID) |
abstract AbstractSerDe |
createSerde()
Create SerDe for the record writer.
|
abstract Object |
encode(byte[] record)
Encode a record as an Object that Hive can read with the ObjectInspector associated with the
serde returned by
createSerde() . |
void |
flush()
Flush records from buffer.
|
protected int |
getBucket(Object row) |
protected List<Integer> |
getBucketColIDs(List<String> bucketCols,
List<FieldSchema> cols) |
protected Object[] |
getBucketFields(Object row) |
protected static ObjectInspector[] |
getObjectInspectorsForBucketedCols(List<Integer> bucketIds,
StructObjectInspector recordObjInspector) |
protected Object[] |
getPartitionFields(Object row) |
Set<String> |
getPartitions()
Get the set of partitions that were added by the record writer.
|
protected List<String> |
getPartitionValues(Object row) |
protected RecordUpdater |
getRecordUpdater(List<String> partitionValues,
int bucketId) |
protected String |
getWatermark(String partition)
used to tag error msgs to provided some breadcrumbs
|
void |
init(StreamingConnection conn,
long minWriteId,
long maxWriteId)
Initialize record writer.
|
protected List<RecordUpdater> |
initializeBuckets() |
protected void |
logStats(String prefix) |
protected void |
prepareBucketingFields() |
protected void |
preparePartitioningFields() |
protected void |
setupMemoryMonitoring() |
void |
write(long writeId,
byte[] record)
Writes using a hive RecordUpdater.
|
void |
write(long writeId,
InputStream inputStream)
Writes using a hive RecordUpdater.
|
protected HiveConf conf
protected StreamingConnection conn
protected Table table
protected String fullyQualifiedTableName
protected Map<String,List<RecordUpdater>> updaters
protected StructObjectInspector inputRowObjectInspector
protected ObjectInspector outputRowObjectInspector
protected ObjectInspector[] partitionObjInspectors
protected StructField[] partitionStructFields
protected Object[] partitionFieldData
protected ObjectInspector[] bucketObjInspectors
protected StructField[] bucketStructFields
protected Object[] bucketFieldData
protected int totalBuckets
protected String defaultPartitionName
protected boolean isBucketed
protected AcidOutputFormat<?,?> acidOutputFormat
protected Long curBatchMinWriteId
protected Long curBatchMaxWriteId
protected final String lineDelimiter
protected HeapMemoryMonitor heapMemoryMonitor
protected AtomicBoolean lowMemoryCanary
protected long ingestSizeBytes
protected boolean autoFlush
protected float memoryUsageThreshold
protected long ingestSizeThreshold
protected org.apache.hadoop.fs.FileSystem fs
public AbstractRecordWriter(String lineDelimiter)
public void init(StreamingConnection conn, long minWriteId, long maxWriteId) throws StreamingException
RecordWriter
init
in interface RecordWriter
conn
- - streaming connectionminWriteId
- - min write idmaxWriteId
- - max write idStreamingException
- - thrown when initialization failedprotected void setupMemoryMonitoring()
protected void prepareBucketingFields()
protected void preparePartitioningFields()
protected String getWatermark(String partition)
protected List<Integer> getBucketColIDs(List<String> bucketCols, List<FieldSchema> cols)
public abstract AbstractSerDe createSerde() throws SerializationError
SerializationError
- - if serde cannot be created.public abstract Object encode(byte[] record) throws SerializationError
createSerde()
. This is public so that test frameworks can use it.record
- record to be deserializedSerializationError
- - any error during serialization or deserialization of recordprotected int getBucket(Object row)
public void flush() throws StreamingIOFailure
RecordWriter
flush
in interface RecordWriter
StreamingIOFailure
public void close() throws StreamingIOFailure
RecordWriter
close
in interface RecordWriter
StreamingIOFailure
protected static ObjectInspector[] getObjectInspectorsForBucketedCols(List<Integer> bucketIds, StructObjectInspector recordObjInspector)
public void write(long writeId, InputStream inputStream) throws StreamingException
RecordWriter
write
in interface RecordWriter
writeId
- - the write ID of the table mapping to Txn in which the write occursinputStream
- - the record to be writtenStreamingException
- - thrown when write failspublic void write(long writeId, byte[] record) throws StreamingException
RecordWriter
write
in interface RecordWriter
writeId
- - the write ID of the table mapping to Txn in which the write occursrecord
- - the record to be writtenStreamingException
- - thrown when write failsprotected void checkAutoFlush() throws StreamingIOFailure
StreamingIOFailure
public Set<String> getPartitions()
RecordWriter
getPartitions
in interface RecordWriter
protected RecordUpdater createRecordUpdater(org.apache.hadoop.fs.Path partitionPath, int bucketId, Long minWriteId, Long maxWriteID) throws IOException
IOException
protected RecordUpdater getRecordUpdater(List<String> partitionValues, int bucketId) throws StreamingIOFailure
StreamingIOFailure
protected List<RecordUpdater> initializeBuckets()
protected void logStats(String prefix)
Copyright © 2022 The Apache Software Foundation. All rights reserved.