public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements Serializable
Modifier and Type | Class and Description |
---|---|
static class |
FileSinkOperator.Counter
Counters.
|
class |
FileSinkOperator.FSPaths |
static interface |
FileSinkOperator.RecordWriter
RecordWriter.
|
Operator.OperatorFunc, Operator.State
Modifier and Type | Field and Description |
---|---|
protected boolean |
autoDelete |
protected boolean |
bDynParts |
protected Map<Integer,Integer> |
bucketMap |
protected String |
childSpecPathDynLinkedPartitions |
protected long |
cntr |
protected List<String> |
dpColNames |
protected DynamicPartitionCtx |
dpCtx |
protected int |
dpStartCol |
protected List<String> |
dpVals |
protected List<Object> |
dpWritables |
protected boolean |
filesCreated |
protected org.apache.hadoop.fs.FileSystem |
fs |
protected FileSinkOperator.FSPaths |
fsp |
protected HiveOutputFormat<?,?> |
hiveOutputFormat |
protected boolean |
isCollectRWStats |
protected boolean |
isCompressed |
protected boolean |
isSkewedStoredAsSubDirectories |
protected boolean |
isTemporary |
protected org.apache.hadoop.mapred.JobConf |
jc |
protected HiveKey |
key |
protected ListBucketingCtx |
lbCtx |
static org.apache.commons.logging.Log |
LOG |
protected long |
logEveryNRows |
protected int |
maxPartitions |
protected boolean |
multiFileSpray |
protected int |
numDynParts |
protected long |
numRows |
protected org.apache.hadoop.fs.Path |
parent |
protected HivePartitioner<HiveKey,Object> |
prtner |
protected org.apache.hadoop.io.Writable |
recordValue |
protected org.apache.hadoop.io.LongWritable |
row_count |
protected FileSinkOperator.RecordWriter[] |
rowOutWriters |
protected Serializer |
serializer |
protected org.apache.hadoop.fs.Path |
specPath |
protected boolean |
statsCollectRawDataSize |
protected boolean[] |
statsFromRecordWriter |
protected int |
totalFiles |
protected HashMap<String,FileSinkOperator.FSPaths> |
valToPaths |
alias, childOperators, childOperatorsArray, childOperatorsTag, colExprMap, conf, CONTEXT_NAME_KEY, done, groupKeyObject, HIVECOUNTERCREATEDFILES, HIVECOUNTERFATAL, id, inputObjInspectors, isLogDebugEnabled, isLogInfoEnabled, isLogTraceEnabled, operatorId, out, outputObjInspector, parentOperators, PLOG, reporter, state, statsMap
Constructor and Description |
---|
FileSinkOperator() |
Modifier and Type | Method and Description |
---|---|
protected boolean |
areAllTrue(boolean[] statsFromRW) |
void |
augmentPlan()
Called during semantic analysis as operators are being added
in order to give them a chance to compute any additional plan information
needed.
|
void |
checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored,
org.apache.hadoop.mapred.JobConf job) |
void |
closeOp(boolean abort)
Operator specific close routine.
|
protected void |
createBucketFiles(FileSinkOperator.FSPaths fsp) |
protected void |
createBucketForFileIdx(FileSinkOperator.FSPaths fsp,
int filesIdx) |
protected String |
generateListBucketingDirName(Object row)
Generate list bucketing directory name from a row.
|
protected FileSinkOperator.FSPaths |
getDynOutPaths(List<String> row,
String lbDirName) |
String |
getName()
Implements the getName function for the Node Interface.
|
static String |
getOperatorName() |
OperatorType |
getType()
Return the type of the specific operator among the
types in OperatorType.
|
protected Collection<Future<?>> |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization.
|
void |
jobCloseOp(org.apache.hadoop.conf.Configuration hconf,
boolean success) |
protected FileSinkOperator.FSPaths |
lookupListBucketingPaths(String lbDirName)
Lookup list bucketing path.
|
void |
process(Object row,
int tag)
Process the row.
|
protected boolean |
updateProgress()
Report status to JT so that JT won't kill this task if closing takes too long
due to too many files to close and the NN is overloaded.
|
acceptLimitPushdown, allInitializedParentsAreClosed, areAllParentsInitialized, cleanUpInputFileChanged, cleanUpInputFileChangedOp, clone, cloneOp, cloneRecursiveChildren, close, columnNamesRowResolvedCanBeObtained, completeInitializationOp, createDummy, defaultEndGroup, defaultStartGroup, dump, dump, endGroup, flush, forward, getAdditionalCounters, getChildOperators, getChildren, getColumnExprMap, getConf, getConfiguration, getDone, getExecContext, getGroupKeyObject, getIdentifier, getInputObjInspectors, getNextCntr, getNumChild, getNumParent, getOperatorId, getOpTraits, getOutputObjInspector, getParentOperators, getSchema, getStatistics, getStats, initEvaluators, initEvaluators, initEvaluatorsAndReturnStruct, initialize, initialize, initializeChildren, initializeLocalWork, initOperatorId, isUseBucketizedHiveInputFormat, jobClose, logStats, opAllowedAfterMapJoin, opAllowedBeforeMapJoin, opAllowedBeforeSortMergeJoin, opAllowedConvertMapJoin, passExecContext, preorderMap, processGroup, removeChild, removeChildAndAdoptItsChildren, removeChildren, removeParent, replaceChild, replaceParent, reset, resetId, resetStats, setAlias, setChildOperators, setColumnExprMap, setConf, setDone, setExecContext, setGroupKeyObject, setId, setInputContext, setInputObjInspectors, setOperatorId, setOpTraits, setOutputCollector, setParentOperators, setReporter, setSchema, setStatistics, setUseBucketizedHiveInputFormat, startGroup, supportAutomaticSortMergeJoin, supportSkewJoinOptimization, supportUnionRemoveOptimization, toString, toString
public static final org.apache.commons.logging.Log LOG
protected transient HashMap<String,FileSinkOperator.FSPaths> valToPaths
protected transient int numDynParts
protected transient DynamicPartitionCtx dpCtx
protected transient boolean isCompressed
protected transient boolean isTemporary
protected transient org.apache.hadoop.fs.Path parent
protected transient HiveOutputFormat<?,?> hiveOutputFormat
protected transient org.apache.hadoop.fs.Path specPath
protected transient String childSpecPathDynLinkedPartitions
protected transient int dpStartCol
protected transient FileSinkOperator.RecordWriter[] rowOutWriters
protected transient int maxPartitions
protected transient ListBucketingCtx lbCtx
protected transient boolean isSkewedStoredAsSubDirectories
protected transient boolean statsCollectRawDataSize
protected transient boolean[] statsFromRecordWriter
protected transient boolean isCollectRWStats
protected transient long numRows
protected transient long cntr
protected transient long logEveryNRows
protected transient org.apache.hadoop.fs.FileSystem fs
protected transient Serializer serializer
protected final transient org.apache.hadoop.io.LongWritable row_count
protected transient int totalFiles
protected transient boolean multiFileSpray
protected transient HivePartitioner<HiveKey,Object> prtner
protected final transient HiveKey key
protected transient FileSinkOperator.FSPaths fsp
protected transient boolean bDynParts
protected transient boolean autoDelete
protected transient org.apache.hadoop.mapred.JobConf jc
protected boolean filesCreated
protected org.apache.hadoop.io.Writable recordValue
protected Collection<Future<?>> initializeOp(org.apache.hadoop.conf.Configuration hconf) throws HiveException
Operator
initializeOp
in class Operator<FileSinkDesc>
HiveException
protected void createBucketFiles(FileSinkOperator.FSPaths fsp) throws HiveException
HiveException
protected void createBucketForFileIdx(FileSinkOperator.FSPaths fsp, int filesIdx) throws HiveException
HiveException
protected boolean updateProgress()
public void process(Object row, int tag) throws HiveException
Operator
process
in class Operator<FileSinkDesc>
row
- The object representing the row.tag
- The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.HiveException
protected boolean areAllTrue(boolean[] statsFromRW)
protected FileSinkOperator.FSPaths lookupListBucketingPaths(String lbDirName) throws HiveException
lbDirName
- HiveException
protected String generateListBucketingDirName(Object row)
row
- row to process.protected FileSinkOperator.FSPaths getDynOutPaths(List<String> row, String lbDirName) throws HiveException
HiveException
public void closeOp(boolean abort) throws HiveException
Operator
closeOp
in class Operator<FileSinkDesc>
HiveException
public String getName()
Operator
getName
in interface Node
getName
in class Operator<FileSinkDesc>
public static String getOperatorName()
public void jobCloseOp(org.apache.hadoop.conf.Configuration hconf, boolean success) throws HiveException
jobCloseOp
in class Operator<FileSinkDesc>
HiveException
public OperatorType getType()
Operator
getType
in class Operator<FileSinkDesc>
public void augmentPlan()
Operator
augmentPlan
in class Operator<FileSinkDesc>
public void checkOutputSpecs(org.apache.hadoop.fs.FileSystem ignored, org.apache.hadoop.mapred.JobConf job) throws IOException
IOException
Copyright © 2017 The Apache Software Foundation. All rights reserved.