public abstract class Operator<T extends OperatorDesc> extends Object implements Serializable, Cloneable, Node
Modifier and Type | Class and Description |
---|---|
static interface |
Operator.OperatorFunc
OperatorFunc.
|
static class |
Operator.State
State.
|
Modifier and Type | Field and Description |
---|---|
protected String |
alias |
protected List<Operator<? extends OperatorDesc>> |
childOperators |
protected Operator<? extends OperatorDesc>[] |
childOperatorsArray
Cache childOperators in an array for faster access.
|
protected int[] |
childOperatorsTag |
protected Map<String,ExprNodeDesc> |
colExprMap
A map of output column name to input expression map.
|
protected T |
conf |
static String |
CONTEXT_NAME_KEY |
protected boolean |
done |
protected Object |
groupKeyObject |
static String |
HIVECOUNTERCREATEDFILES |
static String |
HIVECOUNTERFATAL |
protected String |
id |
protected ObjectInspector[] |
inputObjInspectors |
protected boolean |
isLogDebugEnabled |
protected boolean |
isLogInfoEnabled |
protected boolean |
isLogTraceEnabled |
protected org.apache.commons.logging.Log |
LOG |
protected String |
operatorId |
protected org.apache.hadoop.mapred.OutputCollector |
out |
protected ObjectInspector |
outputObjInspector |
protected List<Operator<? extends OperatorDesc>> |
parentOperators |
protected org.apache.commons.logging.Log |
PLOG |
protected org.apache.hadoop.mapred.Reporter |
reporter |
protected Operator.State |
state |
protected Map<String,org.apache.hadoop.io.LongWritable> |
statsMap |
Constructor and Description |
---|
Operator() |
Operator(org.apache.hadoop.mapred.Reporter reporter)
Create an operator with a reporter.
|
Modifier and Type | Method and Description |
---|---|
boolean |
acceptLimitPushdown()
used for LimitPushdownOptimizer
if all of the operators between limit and reduce-sink does not remove any input rows
in the range of limit count, limit can be pushed down to reduce-sink operator.
|
protected boolean |
allInitializedParentsAreClosed() |
protected boolean |
areAllParentsInitialized()
checks whether all parent operators are initialized or not.
|
void |
augmentPlan()
Called during semantic analysis as operators are being added
in order to give them a chance to compute any additional plan information
needed.
|
void |
cleanUpInputFileChanged() |
void |
cleanUpInputFileChangedOp() |
Operator<? extends OperatorDesc> |
clone() |
Operator<? extends OperatorDesc> |
cloneOp()
Clones only the operator.
|
Operator<? extends OperatorDesc> |
cloneRecursiveChildren()
Recursively clones all the children of the tree,
Fixes the pointers to children, parents and the pointers to itself coming from the children.
|
void |
close(boolean abort) |
protected void |
closeOp(boolean abort)
Operator specific close routine.
|
boolean |
columnNamesRowResolvedCanBeObtained() |
protected void |
completeInitializationOp(Object[] os)
This metod can be used to retrieve the results from async operations
started at init time - before the operator pipeline is started.
|
static Operator |
createDummy() |
protected void |
defaultEndGroup() |
protected void |
defaultStartGroup() |
String |
dump(int level) |
String |
dump(int level,
HashSet<Integer> seenOpts) |
void |
endGroup() |
void |
flush() |
protected void |
forward(Object row,
ObjectInspector rowInspector) |
protected List<String> |
getAdditionalCounters() |
List<Operator<? extends OperatorDesc>> |
getChildOperators() |
ArrayList<Node> |
getChildren()
Implements the getChildren function for the Node Interface.
|
Map<String,ExprNodeDesc> |
getColumnExprMap()
Returns a map of output column name to input expression map Note that
currently it returns only key columns for ReduceSink and GroupBy operators.
|
T |
getConf() |
org.apache.hadoop.conf.Configuration |
getConfiguration() |
boolean |
getDone() |
ExecMapperContext |
getExecContext() |
Object |
getGroupKeyObject() |
String |
getIdentifier()
This function is not named getId(), to make sure java serialization does
NOT serialize it.
|
ObjectInspector[] |
getInputObjInspectors() |
String |
getName()
Implements the getName function for the Node Interface.
|
protected long |
getNextCntr(long cntr) |
int |
getNumChild() |
int |
getNumParent() |
String |
getOperatorId() |
static String |
getOperatorName() |
OpTraits |
getOpTraits() |
ObjectInspector |
getOutputObjInspector() |
List<Operator<? extends OperatorDesc>> |
getParentOperators() |
RowSchema |
getSchema() |
Statistics |
getStatistics() |
Map<String,Long> |
getStats() |
abstract OperatorType |
getType()
Return the type of the specific operator among the
types in OperatorType.
|
protected static ObjectInspector[] |
initEvaluators(ExprNodeEvaluator<?>[] evals,
int start,
int length,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator from start, for specified length
and return the result ObjectInspectors.
|
protected static ObjectInspector[] |
initEvaluators(ExprNodeEvaluator<?>[] evals,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator and return the result
ObjectInspectors.
|
protected static StructObjectInspector |
initEvaluatorsAndReturnStruct(ExprNodeEvaluator<?>[] evals,
List<String> outputColName,
ObjectInspector rowInspector)
Initialize an array of ExprNodeEvaluator and put the return values into a
StructObjectInspector with integer field names.
|
void |
initialize(org.apache.hadoop.conf.Configuration hconf,
ObjectInspector[] inputOIs)
Initializes operators only if all parents have been initialized.
|
protected void |
initialize(org.apache.hadoop.conf.Configuration hconf,
ObjectInspector inputOI,
int parentId)
Collects all the parent's output object inspectors and calls actual
initialization method.
|
protected void |
initializeChildren(org.apache.hadoop.conf.Configuration hconf)
Calls initialize on each of the children with outputObjetInspector as the
output row format.
|
void |
initializeLocalWork(org.apache.hadoop.conf.Configuration hconf) |
protected Collection<Future<?>> |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization.
|
void |
initOperatorId() |
boolean |
isUseBucketizedHiveInputFormat() |
void |
jobClose(org.apache.hadoop.conf.Configuration conf,
boolean success)
Unlike other operator interfaces which are called from map or reduce task,
jobClose is called from the jobclient side once the job has completed.
|
void |
jobCloseOp(org.apache.hadoop.conf.Configuration conf,
boolean success) |
void |
logStats() |
boolean |
opAllowedAfterMapJoin() |
boolean |
opAllowedBeforeMapJoin() |
boolean |
opAllowedBeforeSortMergeJoin() |
boolean |
opAllowedConvertMapJoin() |
void |
passExecContext(ExecMapperContext execContext)
Pass the execContext reference to every child operator
|
void |
preorderMap(Operator.OperatorFunc opFunc) |
abstract void |
process(Object row,
int tag)
Process the row.
|
void |
processGroup(int tag) |
void |
removeChild(Operator<? extends OperatorDesc> child) |
void |
removeChildAndAdoptItsChildren(Operator<? extends OperatorDesc> child)
Remove a child and add all of the child's children to the location of the child
|
boolean |
removeChildren(int depth) |
void |
removeParent(Operator<? extends OperatorDesc> parent) |
void |
replaceChild(Operator<? extends OperatorDesc> child,
Operator<? extends OperatorDesc> newChild)
Replace one child with another at the same position.
|
void |
replaceParent(Operator<? extends OperatorDesc> parent,
Operator<? extends OperatorDesc> newParent)
Replace one parent with another at the same position.
|
void |
reset() |
static void |
resetId() |
void |
resetStats() |
void |
setAlias(String alias)
Store the alias this operator is working on behalf of.
|
void |
setChildOperators(List<Operator<? extends OperatorDesc>> childOperators) |
void |
setColumnExprMap(Map<String,ExprNodeDesc> colExprMap) |
void |
setConf(T conf) |
protected void |
setDone(boolean done) |
void |
setExecContext(ExecMapperContext execContext) |
void |
setGroupKeyObject(Object keyObject) |
void |
setId(String id) |
void |
setInputContext(String inputPath,
String tableName,
String partitionName) |
void |
setInputObjInspectors(ObjectInspector[] inputObjInspectors) |
void |
setOperatorId(String operatorId) |
void |
setOpTraits(OpTraits metaInfo) |
void |
setOutputCollector(org.apache.hadoop.mapred.OutputCollector out) |
void |
setParentOperators(List<Operator<? extends OperatorDesc>> parentOperators) |
void |
setReporter(org.apache.hadoop.mapred.Reporter rep) |
void |
setSchema(RowSchema rowSchema) |
void |
setStatistics(Statistics stats) |
void |
setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat)
Before setting this to
true make sure it's not reading ACID tables |
void |
startGroup() |
boolean |
supportAutomaticSortMergeJoin()
Whether this operator supports automatic sort merge join.
|
boolean |
supportSkewJoinOptimization() |
boolean |
supportUnionRemoveOptimization() |
String |
toString() |
static String |
toString(Collection<Operator<? extends OperatorDesc>> top) |
public static final String HIVECOUNTERCREATEDFILES
public static final String HIVECOUNTERFATAL
public static final String CONTEXT_NAME_KEY
protected List<Operator<? extends OperatorDesc>> childOperators
protected List<Operator<? extends OperatorDesc>> parentOperators
protected String operatorId
protected transient Operator.State state
protected T extends OperatorDesc conf
protected boolean done
protected transient org.apache.hadoop.mapred.OutputCollector out
protected final transient org.apache.commons.logging.Log LOG
protected final transient org.apache.commons.logging.Log PLOG
protected final transient boolean isLogInfoEnabled
protected final transient boolean isLogDebugEnabled
protected final transient boolean isLogTraceEnabled
protected transient String alias
protected transient org.apache.hadoop.mapred.Reporter reporter
protected transient String id
protected transient ObjectInspector[] inputObjInspectors
protected transient ObjectInspector outputObjInspector
protected Map<String,ExprNodeDesc> colExprMap
protected transient Operator<? extends OperatorDesc>[] childOperatorsArray
protected transient int[] childOperatorsTag
protected transient Object groupKeyObject
public Operator()
public Operator(org.apache.hadoop.mapred.Reporter reporter)
reporter
- Used to report progress of certain operators.public static void resetId()
public void setChildOperators(List<Operator<? extends OperatorDesc>> childOperators)
public org.apache.hadoop.conf.Configuration getConfiguration()
public List<Operator<? extends OperatorDesc>> getChildOperators()
public int getNumChild()
public ArrayList<Node> getChildren()
getChildren
in interface Node
public void setParentOperators(List<Operator<? extends OperatorDesc>> parentOperators)
public List<Operator<? extends OperatorDesc>> getParentOperators()
public int getNumParent()
public void setConf(T conf)
public T getConf()
public boolean getDone()
protected final void setDone(boolean done)
public void setSchema(RowSchema rowSchema)
public RowSchema getSchema()
public void setId(String id)
public String getIdentifier()
public void setReporter(org.apache.hadoop.mapred.Reporter rep)
public void setOutputCollector(org.apache.hadoop.mapred.OutputCollector out)
public void setAlias(String alias)
protected boolean areAllParentsInitialized()
public final void initialize(org.apache.hadoop.conf.Configuration hconf, ObjectInspector[] inputOIs) throws HiveException
hconf
- inputOIs
- input object inspector array indexes by tag id. null value is
ignored.HiveException
protected void completeInitializationOp(Object[] os) throws HiveException
os
- HiveException
public void initializeLocalWork(org.apache.hadoop.conf.Configuration hconf) throws HiveException
HiveException
protected Collection<Future<?>> initializeOp(org.apache.hadoop.conf.Configuration hconf) throws HiveException
HiveException
protected void initializeChildren(org.apache.hadoop.conf.Configuration hconf) throws HiveException
HiveException
public void passExecContext(ExecMapperContext execContext)
protected void initialize(org.apache.hadoop.conf.Configuration hconf, ObjectInspector inputOI, int parentId) throws HiveException
hconf
- inputOI
- OI of the row that this parent will pass to this opparentId
- parent operator idHiveException
public ObjectInspector[] getInputObjInspectors()
public void setInputObjInspectors(ObjectInspector[] inputObjInspectors)
public ObjectInspector getOutputObjInspector()
public abstract void process(Object row, int tag) throws HiveException
row
- The object representing the row.tag
- The tag of the row usually means which parent this row comes from.
Rows with the same tag should have exactly the same rowInspector
all the time.HiveException
protected final void defaultStartGroup() throws HiveException
HiveException
protected final void defaultEndGroup() throws HiveException
HiveException
public void startGroup() throws HiveException
HiveException
public void endGroup() throws HiveException
HiveException
public void flush() throws HiveException
HiveException
public void processGroup(int tag) throws HiveException
HiveException
protected boolean allInitializedParentsAreClosed()
public void close(boolean abort) throws HiveException
HiveException
protected void closeOp(boolean abort) throws HiveException
HiveException
public void jobCloseOp(org.apache.hadoop.conf.Configuration conf, boolean success) throws HiveException
HiveException
public void jobClose(org.apache.hadoop.conf.Configuration conf, boolean success) throws HiveException
conf
- Configuration with with which job was submittedsuccess
- whether the job was completed successfully or notHiveException
public void replaceChild(Operator<? extends OperatorDesc> child, Operator<? extends OperatorDesc> newChild)
child
- the old childnewChild
- the new childpublic void removeChild(Operator<? extends OperatorDesc> child)
public void removeChildAndAdoptItsChildren(Operator<? extends OperatorDesc> child) throws SemanticException
child
- If this operator is not the only parent of the child. There can be unpredictable result.SemanticException
public void removeParent(Operator<? extends OperatorDesc> parent)
public boolean removeChildren(int depth)
public void replaceParent(Operator<? extends OperatorDesc> parent, Operator<? extends OperatorDesc> newParent)
parent
- the old parentnewParent
- the new parentprotected long getNextCntr(long cntr)
protected void forward(Object row, ObjectInspector rowInspector) throws HiveException
HiveException
public void resetStats()
public void reset()
public void preorderMap(Operator.OperatorFunc opFunc)
public void logStats()
public String getName()
public static String getOperatorName()
public Map<String,ExprNodeDesc> getColumnExprMap()
public void setColumnExprMap(Map<String,ExprNodeDesc> colExprMap)
public String dump(int level)
protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator<?>[] evals, ObjectInspector rowInspector) throws HiveException
HiveException
protected static ObjectInspector[] initEvaluators(ExprNodeEvaluator<?>[] evals, int start, int length, ObjectInspector rowInspector) throws HiveException
HiveException
protected static StructObjectInspector initEvaluatorsAndReturnStruct(ExprNodeEvaluator<?>[] evals, List<String> outputColName, ObjectInspector rowInspector) throws HiveException
HiveException
public String getOperatorId()
public void initOperatorId()
public void setOperatorId(String operatorId)
public abstract OperatorType getType()
public void setGroupKeyObject(Object keyObject)
public Object getGroupKeyObject()
public void augmentPlan()
public ExecMapperContext getExecContext()
public void setExecContext(ExecMapperContext execContext)
public void cleanUpInputFileChanged() throws HiveException
HiveException
public void cleanUpInputFileChangedOp() throws HiveException
HiveException
public void setInputContext(String inputPath, String tableName, String partitionName)
public boolean supportSkewJoinOptimization()
public Operator<? extends OperatorDesc> clone() throws CloneNotSupportedException
clone
in class Object
CloneNotSupportedException
public Operator<? extends OperatorDesc> cloneOp() throws CloneNotSupportedException
CloneNotSupportedException
public Operator<? extends OperatorDesc> cloneRecursiveChildren() throws CloneNotSupportedException
CloneNotSupportedException
public boolean columnNamesRowResolvedCanBeObtained()
public boolean isUseBucketizedHiveInputFormat()
public void setUseBucketizedHiveInputFormat(boolean useBucketizedHiveInputFormat)
true
make sure it's not reading ACID tablesuseBucketizedHiveInputFormat
- public boolean supportAutomaticSortMergeJoin()
public boolean supportUnionRemoveOptimization()
public boolean opAllowedBeforeMapJoin()
public boolean opAllowedAfterMapJoin()
public boolean opAllowedConvertMapJoin()
public boolean opAllowedBeforeSortMergeJoin()
public boolean acceptLimitPushdown()
public static String toString(Collection<Operator<? extends OperatorDesc>> top)
public Statistics getStatistics()
public OpTraits getOpTraits()
public void setOpTraits(OpTraits metaInfo)
public void setStatistics(Statistics stats)
public static Operator createDummy()
Copyright © 2017 The Apache Software Foundation. All rights reserved.