PREHOOK: query: create table smallTbl1(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@smallTbl1 POSTHOOK: query: create table smallTbl1(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smallTbl1 RUN: Stage-0:DDL PREHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl1 POSTHOOK: query: insert overwrite table smallTbl1 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl1 POSTHOOK: Lineage: smalltbl1.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-1:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-4:MOVE RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: create table smallTbl2(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@smallTbl2 POSTHOOK: query: create table smallTbl2(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smallTbl2 RUN: Stage-0:DDL PREHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl2 POSTHOOK: query: insert overwrite table smallTbl2 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl2 POSTHOOK: Lineage: smalltbl2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-1:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-4:MOVE RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: create table smallTbl3(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@smallTbl3 POSTHOOK: query: create table smallTbl3(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smallTbl3 RUN: Stage-0:DDL PREHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl3 POSTHOOK: query: insert overwrite table smallTbl3 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl3 POSTHOOK: Lineage: smalltbl3.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-1:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-4:MOVE RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: create table smallTbl4(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@smallTbl4 POSTHOOK: query: create table smallTbl4(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@smallTbl4 RUN: Stage-0:DDL PREHOOK: query: insert overwrite table smallTbl4 select * from src where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@smalltbl4 POSTHOOK: query: insert overwrite table smallTbl4 select * from src where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@smalltbl4 POSTHOOK: Lineage: smalltbl4.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: smalltbl4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-1:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-4:MOVE RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: create table bigTbl(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bigTbl POSTHOOK: query: create table bigTbl(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bigTbl RUN: Stage-0:DDL PREHOOK: query: insert overwrite table bigTbl select * from ( select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src ) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@bigtbl POSTHOOK: query: insert overwrite table bigTbl select * from ( select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src union all select * from src ) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bigtbl POSTHOOK: Lineage: bigtbl.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-1:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-4:MOVE RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY POSTHOOK: query: explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 Fetch Operator limit: -1 smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: firstjoin:smalltbl1 TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) 1 key (type: string) smalltbl2 TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col1 (type: string) 1 value (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: bigtbl Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 value (type: string) Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### RUN: Stage-8:MAPREDLOCAL RUN: Stage-3:MAPRED 580 PREHOOK: query: -- Now run a query with two-way join, which should be converted into a -- map-join followed by groupby - two MR jobs overall explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY POSTHOOK: query: -- Now run a query with two-way join, which should be converted into a -- map-join followed by groupby - two MR jobs overall explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 Fetch Operator limit: -1 smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: firstjoin:smalltbl1 TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) 1 key (type: string) smalltbl2 TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col1 (type: string) 1 value (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: bigtbl Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 value (type: string) Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### RUN: Stage-8:MAPREDLOCAL RUN: Stage-3:MAPRED 580 PREHOOK: query: -- Now run a query with two-way join, which should first be converted into a -- map-join followed by groupby and then finally into a single MR job. explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key PREHOOK: type: QUERY POSTHOOK: query: -- Now run a query with two-way join, which should first be converted into a -- map-join followed by groupby and then finally into a single MR job. explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-8 is a root stage Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: firstjoin:smalltbl1 Fetch Operator limit: -1 smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: firstjoin:smalltbl1 TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) 1 key (type: string) smalltbl2 TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col1 (type: string) 1 value (type: string) Stage: Stage-3 Map Reduce Map Operator Tree: TableScan alias: bigtbl Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 1250 Data size: 13280 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: string) 1 key (type: string) outputColumnNames: _col1 Statistics: Num rows: 1375 Data size: 14608 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 value (type: string) outputColumnNames: _col3 Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() keys: _col3 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1512 Data size: 16068 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 756 Data size: 8034 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 756 Data size: 8034 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 756 Data size: 8034 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 #### A masked pattern was here #### POSTHOOK: query: select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 on (bigTbl.key = smallTbl1.key) ) firstjoin JOIN smallTbl2 on (firstjoin.value1 = smallTbl2.value) group by smallTbl2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 #### A masked pattern was here #### RUN: Stage-8:MAPREDLOCAL RUN: Stage-3:MAPRED 270 10 10 270 10 10 PREHOOK: query: drop table bigTbl PREHOOK: type: DROPTABLE PREHOOK: Input: default@bigtbl PREHOOK: Output: default@bigtbl POSTHOOK: query: drop table bigTbl POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@bigtbl POSTHOOK: Output: default@bigtbl RUN: Stage-0:DDL PREHOOK: query: create table bigTbl(key1 string, key2 string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bigTbl POSTHOOK: query: create table bigTbl(key1 string, key2 string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bigTbl RUN: Stage-0:DDL PREHOOK: query: insert overwrite table bigTbl select * from ( select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src ) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@bigtbl POSTHOOK: query: insert overwrite table bigTbl select * from ( select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src union all select key as key1, key as key2, value from src ) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@bigtbl POSTHOOK: Lineage: bigtbl.key1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: bigtbl.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-1:MAPRED RUN: Stage-7:CONDITIONAL RUN: Stage-4:MOVE RUN: Stage-0:MOVE RUN: Stage-2:STATS PREHOOK: query: -- First disable noconditionaltask EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- First disable noconditionaltask EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-21 is a root stage , consists of Stage-28, Stage-29, Stage-1 Stage-28 has a backup stage: Stage-1 Stage-19 depends on stages: Stage-28 Stage-18 depends on stages: Stage-1, Stage-19, Stage-20 , consists of Stage-26, Stage-27, Stage-2 Stage-26 has a backup stage: Stage-2 Stage-16 depends on stages: Stage-26 Stage-15 depends on stages: Stage-2, Stage-16, Stage-17 , consists of Stage-24, Stage-25, Stage-3 Stage-24 has a backup stage: Stage-3 Stage-13 depends on stages: Stage-24 Stage-12 depends on stages: Stage-3, Stage-13, Stage-14 , consists of Stage-22, Stage-23, Stage-4 Stage-22 has a backup stage: Stage-4 Stage-10 depends on stages: Stage-22 Stage-5 depends on stages: Stage-4, Stage-10, Stage-11 Stage-23 has a backup stage: Stage-4 Stage-11 depends on stages: Stage-23 Stage-4 Stage-25 has a backup stage: Stage-3 Stage-14 depends on stages: Stage-25 Stage-3 Stage-27 has a backup stage: Stage-2 Stage-17 depends on stages: Stage-27 Stage-2 Stage-29 has a backup stage: Stage-1 Stage-20 depends on stages: Stage-29 Stage-1 Stage-0 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-21 Conditional Operator Stage: Stage-28 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Filter Operator predicate: key is not null (type: boolean) HashTable Sink Operator keys: 0 key1 (type: string) 1 key (type: string) Stage: Stage-19 Map Reduce Map Operator Tree: TableScan alias: bigtbl Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-18 Conditional Operator Stage: Stage-26 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:smalltbl2 TableScan alias: smalltbl2 Filter Operator predicate: value is not null (type: boolean) HashTable Sink Operator keys: 0 _col3 (type: string) 1 value (type: string) Stage: Stage-16 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-15 Conditional Operator Stage: Stage-24 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 Filter Operator predicate: key is not null (type: boolean) HashTable Sink Operator keys: 0 _col1 (type: string) 1 key (type: string) Stage: Stage-13 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-12 Conditional Operator Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: smalltbl4 TableScan alias: smalltbl4 Filter Operator predicate: key is not null (type: boolean) HashTable Sink Operator keys: 0 _col2 (type: string) 1 key (type: string) Stage: Stage-10 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME TableScan HashTable Sink Operator keys: 0 _col2 (type: string) 1 key (type: string) Stage: Stage-11 Map Reduce Map Operator Tree: TableScan alias: smalltbl4 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: smalltbl4 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: join3:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:$INTNAME TableScan HashTable Sink Operator keys: 0 _col1 (type: string) 1 key (type: string) Stage: Stage-14 Map Reduce Map Operator Tree: TableScan alias: smalltbl3 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: smalltbl3 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:$INTNAME TableScan HashTable Sink Operator keys: 0 _col3 (type: string) 1 value (type: string) Stage: Stage-17 Map Reduce Map Operator Tree: TableScan alias: smalltbl2 Filter Operator predicate: value is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-29 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:bigtbl Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) HashTable Sink Operator keys: 0 key1 (type: string) 1 key (type: string) Stage: Stage-20 Map Reduce Map Operator Tree: TableScan alias: smalltbl1 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: bigtbl Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) Statistics: Num rows: 625 Data size: 9022 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key1 (type: string) sort order: + Map-reduce partition columns: key1 (type: string) Statistics: Num rows: 625 Data size: 9022 Basic stats: COMPLETE Column stats: NONE value expressions: key2 (type: string), value (type: string) TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### RUN: Stage-21:CONDITIONAL RUN: Stage-28:MAPREDLOCAL RUN: Stage-19:MAPRED RUN: Stage-18:CONDITIONAL RUN: Stage-26:MAPREDLOCAL RUN: Stage-16:MAPRED RUN: Stage-15:CONDITIONAL RUN: Stage-24:MAPREDLOCAL RUN: Stage-13:MAPRED RUN: Stage-12:CONDITIONAL RUN: Stage-22:MAPREDLOCAL RUN: Stage-10:MAPRED RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4). -- We will use a single MR job to evaluate this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 10000, which is large enough to fit all four small tables (smallTbl1 to smallTbl4). -- We will use a single MR job to evaluate this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-14 is a root stage Stage-5 depends on stages: Stage-14 Stage-0 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 join3:join2:smalltbl2 Fetch Operator limit: -1 join3:smalltbl3 Fetch Operator limit: -1 smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key1 (type: string) 1 key (type: string) join3:join2:smalltbl2 TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col3 (type: string) 1 value (type: string) join3:smalltbl3 TableScan alias: smalltbl3 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col1 (type: string) 1 key (type: string) smalltbl4 TableScan alias: smalltbl4 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col2 (type: string) 1 key (type: string) Stage: Stage-5 Map Reduce Map Operator Tree: TableScan alias: bigtbl Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) Statistics: Num rows: 625 Data size: 9022 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### RUN: Stage-14:MAPREDLOCAL RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 200, which is large enough to fit two small tables. We will have two jobs to evaluate this -- query. The first job is a Map-only job to evaluate join1 and join2. -- The second job will evaluate the rest of this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- Enable noconditionaltask and set the size of hive.auto.convert.join.noconditionaltask.size -- to 200, which is large enough to fit two small tables. We will have two jobs to evaluate this -- query. The first job is a Map-only job to evaluate join1 and join2. -- The second job will evaluate the rest of this query. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-15 is a root stage Stage-12 depends on stages: Stage-15 Stage-14 depends on stages: Stage-12 Stage-5 depends on stages: Stage-14 Stage-0 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 join3:join2:smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key1 (type: string) 1 key (type: string) join3:join2:smalltbl2 TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col3 (type: string) 1 value (type: string) Stage: Stage-12 Map Reduce Map Operator Tree: TableScan alias: bigtbl Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) Statistics: Num rows: 625 Data size: 9022 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 Fetch Operator limit: -1 smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col1 (type: string) 1 key (type: string) smalltbl4 TableScan alias: smalltbl4 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 _col2 (type: string) 1 key (type: string) Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### RUN: Stage-15:MAPREDLOCAL RUN: Stage-12:MAPRED RUN: Stage-14:MAPREDLOCAL RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780 PREHOOK: query: -- Enable noconditionaltask and but set the size of hive.auto.convert.join.noconditionaltask.size -- to 0. The plan will be the same as the one with a disabled nonconditionaltask. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY POSTHOOK: query: -- Enable noconditionaltask and but set the size of hive.auto.convert.join.noconditionaltask.size -- to 0. The plan will be the same as the one with a disabled nonconditionaltask. EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-21 is a root stage , consists of Stage-28, Stage-29, Stage-1 Stage-28 has a backup stage: Stage-1 Stage-19 depends on stages: Stage-28 Stage-18 depends on stages: Stage-1, Stage-19, Stage-20 , consists of Stage-26, Stage-27, Stage-2 Stage-26 has a backup stage: Stage-2 Stage-16 depends on stages: Stage-26 Stage-15 depends on stages: Stage-2, Stage-16, Stage-17 , consists of Stage-24, Stage-25, Stage-3 Stage-24 has a backup stage: Stage-3 Stage-13 depends on stages: Stage-24 Stage-12 depends on stages: Stage-3, Stage-13, Stage-14 , consists of Stage-22, Stage-23, Stage-4 Stage-22 has a backup stage: Stage-4 Stage-10 depends on stages: Stage-22 Stage-5 depends on stages: Stage-4, Stage-10, Stage-11 Stage-23 has a backup stage: Stage-4 Stage-11 depends on stages: Stage-23 Stage-4 Stage-25 has a backup stage: Stage-3 Stage-14 depends on stages: Stage-25 Stage-3 Stage-27 has a backup stage: Stage-2 Stage-17 depends on stages: Stage-27 Stage-2 Stage-29 has a backup stage: Stage-1 Stage-20 depends on stages: Stage-29 Stage-1 Stage-0 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-21 Conditional Operator Stage: Stage-28 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:smalltbl1 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:smalltbl1 TableScan alias: smalltbl1 Filter Operator predicate: key is not null (type: boolean) HashTable Sink Operator keys: 0 key1 (type: string) 1 key (type: string) Stage: Stage-19 Map Reduce Map Operator Tree: TableScan alias: bigtbl Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-18 Conditional Operator Stage: Stage-26 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:smalltbl2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:smalltbl2 TableScan alias: smalltbl2 Filter Operator predicate: value is not null (type: boolean) HashTable Sink Operator keys: 0 _col3 (type: string) 1 value (type: string) Stage: Stage-16 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-15 Conditional Operator Stage: Stage-24 Map Reduce Local Work Alias -> Map Local Tables: join3:smalltbl3 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:smalltbl3 TableScan alias: smalltbl3 Filter Operator predicate: key is not null (type: boolean) HashTable Sink Operator keys: 0 _col1 (type: string) 1 key (type: string) Stage: Stage-13 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-12 Conditional Operator Stage: Stage-22 Map Reduce Local Work Alias -> Map Local Tables: smalltbl4 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: smalltbl4 TableScan alias: smalltbl4 Filter Operator predicate: key is not null (type: boolean) HashTable Sink Operator keys: 0 _col2 (type: string) 1 key (type: string) Stage: Stage-10 Map Reduce Map Operator Tree: TableScan Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-23 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: $INTNAME TableScan HashTable Sink Operator keys: 0 _col2 (type: string) 1 key (type: string) Stage: Stage-11 Map Reduce Map Operator Tree: TableScan alias: smalltbl4 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) TableScan alias: smalltbl4 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col7 (type: string), _col5 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col5, _col6 Statistics: Num rows: 913 Data size: 13207 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(hash(_col0)), sum(hash(_col1)), sum(hash(_col2)), sum(hash(_col3)), sum(hash(_col4)), sum(hash(_col7)), sum(hash(_col5)), sum(hash(_col6)) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-25 Map Reduce Local Work Alias -> Map Local Tables: join3:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:$INTNAME TableScan HashTable Sink Operator keys: 0 _col1 (type: string) 1 key (type: string) Stage: Stage-14 Map Reduce Map Operator Tree: TableScan alias: smalltbl3 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) TableScan alias: smalltbl3 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 830 Data size: 12007 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-27 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:$INTNAME Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:$INTNAME TableScan HashTable Sink Operator keys: 0 _col3 (type: string) 1 value (type: string) Stage: Stage-17 Map Reduce Map Operator Tree: TableScan alias: smalltbl2 Filter Operator predicate: value is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-2 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col4 (type: string) TableScan alias: smalltbl2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 _col3 (type: string) 1 value (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 755 Data size: 10916 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-29 Map Reduce Local Work Alias -> Map Local Tables: join3:join2:join1:bigtbl Fetch Operator limit: -1 Alias -> Map Local Operator Tree: join3:join2:join1:bigtbl TableScan alias: bigtbl Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) HashTable Sink Operator keys: 0 key1 (type: string) 1 key (type: string) Stage: Stage-20 Map Reduce Map Operator Tree: TableScan alias: smalltbl1 Filter Operator predicate: key is not null (type: boolean) Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: bigtbl Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((key1 is not null and value is not null) and key2 is not null) (type: boolean) Statistics: Num rows: 625 Data size: 9022 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key1 (type: string) sort order: + Map-reduce partition columns: key1 (type: string) Statistics: Num rows: 625 Data size: 9022 Basic stats: COMPLETE Column stats: NONE value expressions: key2 (type: string), value (type: string) TableScan alias: smalltbl1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: 0 key1 (type: string) 1 key (type: string) outputColumnNames: _col0, _col1, _col2, _col6 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col6 (type: string), _col2 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 687 Data size: 9924 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) PREHOOK: type: QUERY PREHOOK: Input: default@bigtbl PREHOOK: Input: default@smalltbl1 PREHOOK: Input: default@smalltbl2 PREHOOK: Input: default@smalltbl3 PREHOOK: Input: default@smalltbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), SUM(HASH(join3.key3)), SUM(HASH(join3.key4)), SUM(HASH(join3.key5)), SUM(HASH(smallTbl4.key)), SUM(HASH(join3.value1)), SUM(HASH(join3.value2)) FROM (SELECT join2.key1 as key1, join2.key2 as key2, join2.key3 as key3, join2.key4 as key4, smallTbl3.key as key5, join2.value1 as value1, join2.value2 as value2 FROM (SELECT join1.key1 as key1, join1.key2 as key2, join1.key3 as key3, smallTbl2.key as key4, join1.value1 as value1, join1.value2 as value2 FROM (SELECT bigTbl.key1 as key1, bigTbl.key2 as key2, smallTbl1.key as key3, bigTbl.value as value1, bigTbl.value as value2 FROM bigTbl JOIN smallTbl1 ON (bigTbl.key1 = smallTbl1.key)) join1 JOIN smallTbl2 ON (join1.value1 = smallTbl2.value)) join2 JOIN smallTbl3 ON (join2.key2 = smallTbl3.key)) join3 JOIN smallTbl4 ON (join3.key3 = smallTbl4.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@bigtbl POSTHOOK: Input: default@smalltbl1 POSTHOOK: Input: default@smalltbl2 POSTHOOK: Input: default@smalltbl3 POSTHOOK: Input: default@smalltbl4 #### A masked pattern was here #### RUN: Stage-21:CONDITIONAL RUN: Stage-28:MAPREDLOCAL RUN: Stage-19:MAPRED RUN: Stage-18:CONDITIONAL RUN: Stage-26:MAPREDLOCAL RUN: Stage-16:MAPRED RUN: Stage-15:CONDITIONAL RUN: Stage-24:MAPREDLOCAL RUN: Stage-13:MAPRED RUN: Stage-12:CONDITIONAL RUN: Stage-22:MAPREDLOCAL RUN: Stage-10:MAPRED RUN: Stage-5:MAPRED 247580 247580 247580 247580 247580 247580 548662743780 548662743780