PREHOOK: query: create table smallTbl1(key string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table smallTbl1(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@smallTbl1 PREHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl1 POSTHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl1 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: create table smallTbl2(key string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table smallTbl2(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@smallTbl2 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl2 POSTHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl2 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: create table smallTbl3(key string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table smallTbl3(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@smallTbl3 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl3 POSTHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl3 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: create table smallTbl4(key string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table smallTbl4(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@smallTbl4 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: insert overwrite table smallTbl4 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl4 POSTHOOK: query: insert overwrite table smallTbl4 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl4 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: create table bigTbl(key string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table bigTbl(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bigTbl POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: insert overwrite table bigTbl select * from ( select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src ) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@bigtbl POSTHOOK: query: insert overwrite table bigTbl select * from ( select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src ) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bigtbl POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: Stage-7 is a root stage Stage-2 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 Fetch Operator limit: -1 smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: firstjoin:smalltbl1 TableScan alias: smalltbl1 HashTable Sink Operator condition expressions: 0 {value} 1 handleSkewJoin: false keys: 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 smalltbl2 TableScan alias: smalltbl2 HashTable Sink Operator condition expressions: 0 1 handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[value]] Position of Big Table: 0 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: firstjoin:bigtbl TableScan alias: bigtbl Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 handleSkewJoin: false keys: 0 [Column[key]] 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 Select Operator expressions: expr: _col1 type: string outputColumnNames: _col1 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[value]] Position of Big Table: 0 Select Operator Group By Operator aggregations: expr: count() bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 580 PREHOOK: query: -- Now run a query with two-way join, which should be converted into a -- map-join followed by groupby - two MR jobs overall explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY POSTHOOK: query: -- Now run a query with two-way join, which should be converted into a -- map-join followed by groupby - two MR jobs overall explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))))) STAGE DEPENDENCIES: Stage-7 is a root stage Stage-2 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 Fetch Operator limit: -1 smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: firstjoin:smalltbl1 TableScan alias: smalltbl1 HashTable Sink Operator condition expressions: 0 {value} 1 handleSkewJoin: false keys: 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 smalltbl2 TableScan alias: smalltbl2 HashTable Sink Operator condition expressions: 0 1 handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[value]] Position of Big Table: 0 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: firstjoin:bigtbl TableScan alias: bigtbl Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 handleSkewJoin: false keys: 0 [Column[key]] 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 Select Operator expressions: expr: _col1 type: string outputColumnNames: _col1 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[value]] Position of Big Table: 0 Select Operator Group By Operator aggregations: expr: count() bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 580 PREHOOK: query: -- Now run a query with two-way join, which should first be converted into a -- map-join followed by groupby and then finally into a single MR job. explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key PREHOOK: type: QUERY POSTHOOK: query: -- Now run a query with two-way join, which should first be converted into a -- map-join followed by groupby and then finally into a single MR job. explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key POSTHOOK: type: QUERY POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) firstjoin) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL firstjoin) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL smallTbl2) key)))) STAGE DEPENDENCIES: Stage-7 is a root stage Stage-2 depends on stages: Stage-7 Stage-0 is a root stage STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 Fetch Operator limit: -1 smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: firstjoin:smalltbl1 TableScan alias: smalltbl1 HashTable Sink Operator condition expressions: 0 {value} 1 handleSkewJoin: false keys: 0 [Column[key]] 1 [Column[key]] Position of Big Table: 0 smalltbl2 TableScan alias: smalltbl2 HashTable Sink Operator condition expressions: 0 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[value]] Position of Big Table: 0 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: firstjoin:bigtbl TableScan alias: bigtbl Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {value} 1 handleSkewJoin: false keys: 0 [Column[key]] 1 [Column[key]] outputColumnNames: _col1 Position of Big Table: 0 Select Operator expressions: expr: _col1 type: string outputColumnNames: _col1 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[value]] outputColumnNames: _col3 Position of Big Table: 0 Select Operator expressions: expr: _col3 type: string outputColumnNames: _col3 Group By Operator aggregations: expr: count() bucketGroup: false keys: expr: _col3 type: string mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: -1 value expressions: expr: _col1 type: bigint Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col1 type: bigint outputColumnNames: _col0 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 270 10 10 270 10 10 PREHOOK: query: drop table bigTbl PREHOOK: type: DROPTABLE PREHOOK: Input: default@bigtbl PREHOOK: Output: default@bigtbl POSTHOOK: query: drop table bigTbl POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@bigtbl POSTHOOK: Output: default@bigtbl POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: create table bigTbl(key1 string, key2 string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: create table bigTbl(key1 string, key2 string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@bigTbl POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: insert overwrite table bigTbl select * from ( select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src ) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@bigtbl POSTHOOK: query: insert overwrite table bigTbl select * from ( select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src ) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bigtbl POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- First disable noconditionaltask EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- First disable noconditionaltask EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) STAGE DEPENDENCIES: Stage-20 is a root stage , consists of Stage-27, Stage-28, Stage-5 Stage-27 has a backup stage: Stage-5 Stage-18 depends on stages: Stage-27 Stage-17 depends on stages: Stage-5, Stage-18, Stage-19 , consists of Stage-25, Stage-26, Stage-1 Stage-25 has a backup stage: Stage-1 Stage-15 depends on stages: Stage-25 Stage-14 depends on stages: Stage-1, Stage-15, Stage-16 , consists of Stage-23, Stage-24, Stage-2 Stage-23 has a backup stage: Stage-2 Stage-12 depends on stages: Stage-23 Stage-11 depends on stages: Stage-2, Stage-12, Stage-13 , consists of Stage-21, Stage-22, Stage-3 Stage-21 has a backup stage: Stage-3 Stage-9 depends on stages: Stage-21 Stage-4 depends on stages: Stage-3, Stage-9, Stage-10 Stage-22 has a backup stage: Stage-3 Stage-10 depends on stages: Stage-22 Stage-3 Stage-24 has a backup stage: Stage-2 Stage-13 depends on stages: Stage-24 Stage-2 Stage-26 has a backup stage: Stage-1 Stage-16 depends on stages: Stage-26 Stage-1 Stage-28 has a backup stage: Stage-5 Stage-19 depends on stages: Stage-28 Stage-5 Stage-0 is a root stage STAGE PLANS: Stage: Stage-20 Conditional Operator Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 HashTable Sink Operator condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-18 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-17 Conditional Operator Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:smalltbl2 TableScan alias: smalltbl2 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] Position of Big Table: 0 Stage: Stage-15 Map Reduce Alias -> Map Operator Tree: $INTNAME Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-14 Conditional Operator Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-12 Map Reduce Alias -> Map Operator Tree: $INTNAME Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-11 Conditional Operator Stage: Stage-21 Map Reduce Local Work Alias -> Map Local Tables: smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: smalltbl4 TableScan alias: smalltbl4 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-9 Map Reduce Alias -> Map Operator Tree: $INTNAME Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: sum(VALUE._col0) expr: sum(VALUE._col1) expr: sum(VALUE._col2) expr: sum(VALUE._col3) expr: sum(VALUE._col4) expr: sum(VALUE._col5) expr: sum(VALUE._col6) expr: sum(VALUE._col7) bucketGroup: false mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] Position of Big Table: 1 Stage: Stage-10 Map Reduce Alias -> Map Operator Tree: smalltbl4 TableScan alias: smalltbl4 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col2 type: string sort order: + Map-reduce partition columns: expr: _col2 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col5 type: string expr: _col6 type: string smalltbl4 TableScan alias: smalltbl4 Reduce Output Operator key expressions: expr: key type: string sort order: + Map-reduce partition columns: expr: key type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-24 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] Position of Big Table: 1 Stage: Stage-13 Map Reduce Alias -> Map Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col1 type: string sort order: + Map-reduce partition columns: expr: _col1 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col5 type: string join3:smalltbl3 TableScan alias: smalltbl3 Reduce Output Operator key expressions: expr: key type: string sort order: + Map-reduce partition columns: expr: key type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-26 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] Position of Big Table: 1 Stage: Stage-16 Map Reduce Alias -> Map Operator Tree: join3:join2:smalltbl2 TableScan alias: smalltbl2 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col3 type: string sort order: + Map-reduce partition columns: expr: _col3 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string join3:join2:smalltbl2 TableScan alias: smalltbl2 Reduce Output Operator key expressions: expr: value type: string sort order: + Map-reduce partition columns: expr: value type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-28 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:bigtbl Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl HashTable Sink Operator condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] Position of Big Table: 1 Stage: Stage-19 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col5 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Reduce Output Operator key expressions: expr: key1 type: string sort order: + Map-reduce partition columns: expr: key1 type: string tag: 0 value expressions: expr: key1 type: string expr: key2 type: string expr: value type: string join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Reduce Output Operator key expressions: expr: key type: string sort order: + Map-reduce partition columns: expr: key type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4). -- We will use a single MR job to evaluate this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4). -- We will use a single MR job to evaluate this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) STAGE DEPENDENCIES: Stage-13 is a root stage Stage-4 depends on stages: Stage-13 Stage-0 is a root stage STAGE PLANS: Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 join3:join2:smalltbl2 Fetch Operator limit: -1 join3:smalltbl3 Fetch Operator limit: -1 smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 HashTable Sink Operator condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] Position of Big Table: 0 join3:join2:smalltbl2 TableScan alias: smalltbl2 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] Position of Big Table: 0 join3:smalltbl3 TableScan alias: smalltbl3 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] Position of Big Table: 0 smalltbl4 TableScan alias: smalltbl4 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: expr: sum(VALUE._col0) expr: sum(VALUE._col1) expr: sum(VALUE._col2) expr: sum(VALUE._col3) expr: sum(VALUE._col4) expr: sum(VALUE._col5) expr: sum(VALUE._col6) expr: sum(VALUE._col7) bucketGroup: false mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 200, which is large enough to fit two small tables. We will have two jobs to evaluate this -- query. The first job is a Map-only job to evaluate join1 and join2. -- The second job will evaluate the rest of this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 200, which is large enough to fit two small tables. We will have two jobs to evaluate this -- query. The first job is a Map-only job to evaluate join1 and join2. -- The second job will evaluate the rest of this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) STAGE DEPENDENCIES: Stage-14 is a root stage Stage-11 depends on stages: Stage-14 Stage-13 depends on stages: Stage-11 Stage-4 depends on stages: Stage-13 Stage-0 is a root stage STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 join3:join2:smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 HashTable Sink Operator condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] Position of Big Table: 0 join3:join2:smalltbl2 TableScan alias: smalltbl2 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] Position of Big Table: 0 Stage: Stage-11 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 Fetch Operator limit: -1 smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] Position of Big Table: 0 smalltbl4 TableScan alias: smalltbl4 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: $INTNAME Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: expr: sum(VALUE._col0) expr: sum(VALUE._col1) expr: sum(VALUE._col2) expr: sum(VALUE._col3) expr: sum(VALUE._col4) expr: sum(VALUE._col5) expr: sum(VALUE._col6) expr: sum(VALUE._col7) bucketGroup: false mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and but set the size of hive.auto.convert.join.noconditionaltask.size -- to 0. The plan will be the same as the one with a disabled nonconditionaltask. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- Enable noconditionaltask and but set the size of hive.auto.convert.join.noconditionaltask.size -- to 0. The plan will be the same as the one with a disabled nonconditionaltask. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME bigTbl)) (TOK_TABREF (TOK_TABNAME smallTbl1)) (= (. (TOK_TABLE_OR_COL bigTbl) key1) (. (TOK_TABLE_OR_COL smallTbl1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl1) key) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL bigTbl) value) value2)))) join1) (TOK_TABREF (TOK_TABNAME smallTbl2)) (= (. (TOK_TABLE_OR_COL join1) value1) (. (TOK_TABLE_OR_COL smallTbl2) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl2) key) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join1) value2) value2)))) join2) (TOK_TABREF (TOK_TABNAME smallTbl3)) (= (. (TOK_TABLE_OR_COL join2) key2) (. (TOK_TABLE_OR_COL smallTbl3) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key2) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key3) key3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) key4) key4) (TOK_SELEXPR (. (TOK_TABLE_OR_COL smallTbl3) key) key5) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value1) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL join2) value2) value2)))) join3) (TOK_TABREF (TOK_TABNAME smallTbl4)) (= (. (TOK_TABLE_OR_COL join3) key3) (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key2)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key3)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key4)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) key5)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL smallTbl4) key)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value1)))) (TOK_SELEXPR (TOK_FUNCTION SUM (TOK_FUNCTION HASH (. (TOK_TABLE_OR_COL join3) value2))))))) STAGE DEPENDENCIES: Stage-20 is a root stage , consists of Stage-27, Stage-28, Stage-5 Stage-27 has a backup stage: Stage-5 Stage-18 depends on stages: Stage-27 Stage-17 depends on stages: Stage-5, Stage-18, Stage-19 , consists of Stage-25, Stage-26, Stage-1 Stage-25 has a backup stage: Stage-1 Stage-15 depends on stages: Stage-25 Stage-14 depends on stages: Stage-1, Stage-15, Stage-16 , consists of Stage-23, Stage-24, Stage-2 Stage-23 has a backup stage: Stage-2 Stage-12 depends on stages: Stage-23 Stage-11 depends on stages: Stage-2, Stage-12, Stage-13 , consists of Stage-21, Stage-22, Stage-3 Stage-21 has a backup stage: Stage-3 Stage-9 depends on stages: Stage-21 Stage-4 depends on stages: Stage-3, Stage-9, Stage-10 Stage-22 has a backup stage: Stage-3 Stage-10 depends on stages: Stage-22 Stage-3 Stage-24 has a backup stage: Stage-2 Stage-13 depends on stages: Stage-24 Stage-2 Stage-26 has a backup stage: Stage-1 Stage-16 depends on stages: Stage-26 Stage-1 Stage-28 has a backup stage: Stage-5 Stage-19 depends on stages: Stage-28 Stage-5 Stage-0 is a root stage STAGE PLANS: Stage: Stage-20 Conditional Operator Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 HashTable Sink Operator condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-18 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-17 Conditional Operator Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:smalltbl2 TableScan alias: smalltbl2 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] Position of Big Table: 0 Stage: Stage-15 Map Reduce Alias -> Map Operator Tree: $INTNAME Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-14 Conditional Operator Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-12 Map Reduce Alias -> Map Operator Tree: $INTNAME Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-11 Conditional Operator Stage: Stage-21 Map Reduce Local Work Alias -> Map Local Tables: smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: smalltbl4 TableScan alias: smalltbl4 HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] Position of Big Table: 0 Stage: Stage-9 Map Reduce Alias -> Map Operator Tree: $INTNAME Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Position of Big Table: 0 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: sum(VALUE._col0) expr: sum(VALUE._col1) expr: sum(VALUE._col2) expr: sum(VALUE._col3) expr: sum(VALUE._col4) expr: sum(VALUE._col5) expr: sum(VALUE._col6) expr: sum(VALUE._col7) bucketGroup: false mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: expr: _col0 type: bigint expr: _col1 type: bigint expr: _col2 type: bigint expr: _col3 type: bigint expr: _col4 type: bigint expr: _col5 type: bigint expr: _col6 type: bigint expr: _col7 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] Position of Big Table: 1 Stage: Stage-10 Map Reduce Alias -> Map Operator Tree: smalltbl4 TableScan alias: smalltbl4 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} {_col6} 1 {key} handleSkewJoin: false keys: 0 [Column[_col2]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col2 type: string sort order: + Map-reduce partition columns: expr: _col2 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col5 type: string expr: _col6 type: string smalltbl4 TableScan alias: smalltbl4 Reduce Output Operator key expressions: expr: key type: string sort order: + Map-reduce partition columns: expr: key type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} {VALUE._col6} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col7 type: string expr: _col5 type: string expr: _col6 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: expr: sum(hash(_col0)) expr: sum(hash(_col1)) expr: sum(hash(_col2)) expr: sum(hash(_col3)) expr: sum(hash(_col4)) expr: sum(hash(_col7)) expr: sum(hash(_col5)) expr: sum(hash(_col6)) bucketGroup: false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-24 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] Position of Big Table: 1 Stage: Stage-13 Map Reduce Alias -> Map Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} {_col5} 1 {key} handleSkewJoin: false keys: 0 [Column[_col1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col1 type: string sort order: + Map-reduce partition columns: expr: _col1 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string expr: _col5 type: string join3:smalltbl3 TableScan alias: smalltbl3 Reduce Output Operator key expressions: expr: key type: string sort order: + Map-reduce partition columns: expr: key type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} {VALUE._col5} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col6 type: string expr: _col4 type: string expr: _col5 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-26 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME HashTable Sink Operator condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] Position of Big Table: 1 Stage: Stage-16 Map Reduce Alias -> Map Operator Tree: join3:join2:smalltbl2 TableScan alias: smalltbl2 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} {_col1} {_col2} {_col3} {_col4} 1 {key} handleSkewJoin: false keys: 0 [Column[_col3]] 1 [Column[value]] outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col3 type: string sort order: + Map-reduce partition columns: expr: _col3 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string expr: _col4 type: string join3:join2:smalltbl2 TableScan alias: smalltbl2 Reduce Output Operator key expressions: expr: value type: string sort order: + Map-reduce partition columns: expr: value type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} {VALUE._col4} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col5 type: string expr: _col3 type: string expr: _col4 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-28 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:bigtbl Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl HashTable Sink Operator condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] Position of Big Table: 1 Stage: Stage-19 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key1} {key2} {value} 1 {key} handleSkewJoin: false keys: 0 [Column[key1]] 1 [Column[key]] outputColumnNames: _col0, _col1, _col2, _col5 Position of Big Table: 1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Reduce Output Operator key expressions: expr: key1 type: string sort order: + Map-reduce partition columns: expr: key1 type: string tag: 0 value expressions: expr: key1 type: string expr: key2 type: string expr: value type: string join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Reduce Output Operator key expressions: expr: key type: string sort order: + Map-reduce partition columns: expr: key type: string tag: 1 value expressions: expr: key type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col5 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col5 type: string expr: _col2 type: string expr: _col2 type: string outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 247580 247580 247580 247580 247580 247580 548662743780 548662743780