public abstract class CommonJoinOperator<T extends JoinDesc> extends Operator<T> implements Serializable
Operator.OperatorFunc, Operator.State
Modifier and Type | Field and Description |
---|---|
protected Byte |
alias |
protected short[] |
aliasFilterTags
On filterTags
ANDed value of all filter tags in current join group
if any of values passes on outer join alias (which makes zero for the tag alias),
it means there exists a pair for it and safely regarded as a inner join
for example, with table a, b something like,
a = 100, 10 | 100, 20 | 100, 30
b = 100, 10 | 100, 20 | 100, 30
the query "a FO b ON a.k=b.k AND a.v>10 AND b.v>30" makes filter map
0(a) = [1(b),1] : a.v>10
1(b) = [0(a),1] : b.v>30
for filtered rows in a (100,10) create a-NULL
for filtered rows in b (100,10) (100,20) (100,30) create NULL-b
with 0(a) = [1(b),1] : a.v>10
100, 10 = 00000010 (filtered)
100, 20 = 00000000 (valid)
100, 30 = 00000000 (valid)
-------------------------
sum = 00000000 : for valid rows in b, there is at least one pair in a
with 1(b) = [0(a),1] : b.v>30
100, 10 = 00000001 (filtered)
100, 20 = 00000001 (filtered)
100, 30 = 00000001 (filtered)
-------------------------
sum = 00000001 : for valid rows in a (100,20) (100,30), there is no pair in b
result :
100, 10 : N, N
N, N : 100, 10
N, N : 100, 20
N, N : 100, 30
100, 20 : N, N
100, 30 : N, N
|
protected JoinCondDesc[] |
condn |
protected int |
countAfterReport |
protected ArrayList<Object>[] |
dummyObj |
protected RowContainer<List<Object>>[] |
dummyObjVectors |
protected int[][] |
filterMaps |
protected short[] |
filterTags |
protected Object[] |
forwardCache |
protected org.apache.hadoop.conf.Configuration |
hconf |
protected int |
heartbeatInterval |
protected List[] |
intermediate |
protected List<ObjectInspector>[] |
joinFilterObjectInspectors
The ObjectInspectors for join filters.
|
protected List<ExprNodeEvaluator>[] |
joinFilters
The filters for join
|
protected List<ExprNodeEvaluator>[] |
joinValues
The expressions for join inputs.
|
protected List<ObjectInspector>[] |
joinValuesObjectInspectors
The ObjectInspectors for the join inputs.
|
protected List<ObjectInspector>[] |
joinValuesStandardObjectInspectors
The standard ObjectInspectors for the join inputs.
|
protected static org.slf4j.Logger |
LOG |
protected boolean |
needsPostEvaluation
Will be true depending on content of residualJoinFilters.
|
boolean |
noOuterJoin |
protected static int |
NOTSKIPBIGTABLE |
protected boolean[] |
nullsafes |
protected int |
numAliases |
protected int[] |
offsets |
protected Byte[] |
order |
protected List<ExprNodeEvaluator> |
residualJoinFilters
List of evaluators for conditions which appear on on-clause and needs to be
evaluated before emitting rows.
|
protected List<ObjectInspector> |
residualJoinFiltersOIs
OIs corresponding to residualJoinFilters.
|
protected Map<Integer,Object[]> |
rowContainerPostFilteredOuterJoin
This data structure is used to keep track of rows on which residualFilters
evaluated to false.
|
protected List<ObjectInspector>[] |
rowContainerStandardObjectInspectors
The standard ObjectInspectors for the row container.
|
protected boolean[][] |
skipVectors |
protected TableDesc[] |
spillTableDesc |
protected int |
totalSz |
abortOp, asyncInitOperations, cContext, childOperators, childOperatorsArray, childOperatorsTag, colExprMap, conf, CONTEXT_NAME_KEY, done, groupKeyObject, HIVECOUNTERCREATEDFILES, HIVECOUNTERFATAL, id, indexForTezUnion, inputObjInspectors, isLogDebugEnabled, isLogInfoEnabled, isLogTraceEnabled, operatorId, out, outputObjInspector, parentOperators, PLOG, reporter, runTimeNumRows, state, statsMap
Modifier | Constructor and Description |
---|---|
protected |
CommonJoinOperator()
Kryo ctor.
|
|
CommonJoinOperator(CommonJoinOperator<T> clone) |
|
CommonJoinOperator(CompilationOpContext ctx) |
Modifier and Type | Method and Description |
---|---|
protected void |
checkAndGenObject() |
void |
closeOp(boolean abort)
All done.
|
void |
endGroup()
Forward a record of join results.
|
protected List<Object> |
getFilteredValue(byte alias,
Object row) |
protected short |
getFilterTag(List<Object> row) |
String |
getName()
Gets the name of the node.
|
protected long |
getNextSize(long sz) |
static String |
getOperatorName() |
Map<Integer,Set<String>> |
getPosToAliasMap() |
protected List<ObjectInspector> |
getValueObjectInspectors(byte alias,
List<ObjectInspector>[] aliasToObjectInspectors) |
protected boolean |
hasFilter(int alias) |
protected void |
initializeOp(org.apache.hadoop.conf.Configuration hconf)
Operator specific initialization.
|
protected void |
internalForward(Object row,
ObjectInspector outputOI) |
boolean |
opAllowedAfterMapJoin() |
boolean |
opAllowedBeforeMapJoin() |
protected void |
reportProgress() |
void |
setPosToAliasMap(Map<Integer,Set<String>> posToAliasMap) |
void |
startGroup() |
abort, acceptLimitPushdown, allInitializedParentsAreClosed, areAllParentsInitialized, augmentPlan, cleanUpInputFileChanged, cleanUpInputFileChangedOp, clone, cloneOp, cloneRecursiveChildren, close, columnNamesRowResolvedCanBeObtained, completeInitializationOp, createDummy, defaultEndGroup, defaultStartGroup, dump, dump, flush, forward, getAdditionalCounters, getChildOperators, getChildren, getColumnExprMap, getCompilationOpContext, getConf, getConfiguration, getDone, getExecContext, getGroupKeyObject, getIdentifier, getIndexForTezUnion, getInputObjInspectors, getIsReduceSink, getNextCntr, getNumChild, getNumParent, getOperatorId, getOpTraits, getOutputObjInspector, getParentOperators, getReduceOutputName, getSchema, getStatistics, getStats, getType, initEvaluators, initEvaluators, initEvaluatorsAndReturnStruct, initialize, initialize, initializeChildren, initializeLocalWork, initOperatorId, isUseBucketizedHiveInputFormat, jobClose, jobCloseOp, logStats, opAllowedBeforeSortMergeJoin, opAllowedConvertMapJoin, passExecContext, preorderMap, process, processGroup, removeChild, removeChildAndAdoptItsChildren, removeParent, removeParents, replaceChild, replaceParent, reset, resetStats, setAlias, setChildOperators, setColumnExprMap, setCompilationOpContext, setConf, setDone, setExecContext, setGroupKeyObject, setId, setIndexForTezUnion, setInputContext, setInputObjInspectors, setOperatorId, setOpTraits, setOutputCollector, setParentOperators, setReporter, setSchema, setStatistics, setUseBucketizedHiveInputFormat, supportAutomaticSortMergeJoin, supportSkewJoinOptimization, supportUnionRemoveOptimization, toString, toString
protected static final org.slf4j.Logger LOG
protected transient int numAliases
protected transient List<ExprNodeEvaluator>[] joinValues
protected transient List<ExprNodeEvaluator>[] joinFilters
protected transient List<ExprNodeEvaluator> residualJoinFilters
protected transient int[][] filterMaps
protected transient List<ObjectInspector>[] joinValuesObjectInspectors
protected transient List<ObjectInspector>[] joinFilterObjectInspectors
protected transient List<ObjectInspector> residualJoinFiltersOIs
protected transient boolean needsPostEvaluation
protected transient Map<Integer,Object[]> rowContainerPostFilteredOuterJoin
protected transient List<ObjectInspector>[] joinValuesStandardObjectInspectors
protected transient List<ObjectInspector>[] rowContainerStandardObjectInspectors
protected transient Byte[] order
protected transient JoinCondDesc[] condn
protected transient boolean[] nullsafes
public transient boolean noOuterJoin
protected transient RowContainer<List<Object>>[] dummyObjVectors
protected transient int totalSz
protected transient TableDesc[] spillTableDesc
protected transient int countAfterReport
protected transient int heartbeatInterval
protected static final int NOTSKIPBIGTABLE
protected org.apache.hadoop.conf.Configuration hconf
protected transient Byte alias
protected transient Object[] forwardCache
protected transient int[] offsets
protected transient boolean[][] skipVectors
protected transient List[] intermediate
protected transient short[] filterTags
protected transient short[] aliasFilterTags
protected CommonJoinOperator()
public CommonJoinOperator(CompilationOpContext ctx)
public CommonJoinOperator(CommonJoinOperator<T> clone)
protected List<ObjectInspector> getValueObjectInspectors(byte alias, List<ObjectInspector>[] aliasToObjectInspectors)
protected void initializeOp(org.apache.hadoop.conf.Configuration hconf) throws HiveException
Operator
initializeOp
in class Operator<T extends JoinDesc>
HiveException
public void startGroup() throws HiveException
startGroup
in class Operator<T extends JoinDesc>
HiveException
protected long getNextSize(long sz)
protected List<Object> getFilteredValue(byte alias, Object row) throws HiveException
HiveException
protected final boolean hasFilter(int alias)
public void endGroup() throws HiveException
endGroup
in class Operator<T extends JoinDesc>
HiveException
protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException
HiveException
protected void checkAndGenObject() throws HiveException
HiveException
protected void reportProgress()
public void closeOp(boolean abort) throws HiveException
closeOp
in class Operator<T extends JoinDesc>
HiveException
public String getName()
Node
public static String getOperatorName()
public void setPosToAliasMap(Map<Integer,Set<String>> posToAliasMap)
posToAliasMap
- the posToAliasMap to setpublic boolean opAllowedBeforeMapJoin()
opAllowedBeforeMapJoin
in class Operator<T extends JoinDesc>
public boolean opAllowedAfterMapJoin()
opAllowedAfterMapJoin
in class Operator<T extends JoinDesc>
Copyright © 2021 The Apache Software Foundation. All rights reserved.