PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tab_part POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab_part PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin_part PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin_part PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: Output: default@tab_part@ds=2008-04-08 POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_part POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: Output: default@tab_part@ds=2008-04-08 POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tab POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key is not null and value is not null) (type: boolean) Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 key (type: int) 1 key (type: int) Local Work: Map Reduce Local Work Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: value is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col1 (type: string) 1 value (type: string) Local Work: Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 value (type: string) outputColumnNames: _col0, _col12 input vertices: 1 Map 3 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col12 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@tab1 POSTHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab1 PREHOOK: query: insert overwrite table tab1 select key,value from srcbucket_mapjoin PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: Output: default@tab1 POSTHOOK: query: insert overwrite table tab1 select key,value from srcbucket_mapjoin POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab1 POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain select a.key, a.value, b.value from tab1 a join src b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, a.value, b.value from tab1 a join src b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) Local Work: Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 2 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col6 input vertices: 0 Map 1 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: 1 Map 2 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: 0 Map 1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) 1 key (type: int) Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 input vertices: 0 Reducer 2 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(value) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reducer 2 Local Work: Map Reduce Local Work Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 60 Data size: 636 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(key) (type: double) Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: UDFToDouble(key) is not null (type: boolean) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col2 input vertices: 0 Reducer 2 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Local Work: Map Reduce Local Work Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink