PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin PREHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab_part PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin_part PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin_part PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: Output: default@tab_part@ds=2008-04-08 POSTHOOK: query: insert overwrite table tab_part partition (ds='2008-04-08') select key,value from srcbucket_mapjoin_part POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin_part POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: Output: default@tab_part@ds=2008-04-08 POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: query: insert overwrite table tab partition (ds='2008-04-08') select key,value from srcbucket_mapjoin POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab@ds=2008-04-08 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from tab_part a join tab_part c on a.key = c.key join tab_part b on a.value = b.value POSTHOOK: type: QUERY POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Tez Edges: Map 3 <- Map 2 (CUSTOM_EDGE), Map 1 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: value (type: string) sort order: + Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int) Map 2 Map Operator Tree: TableScan alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map 3 Map Operator Tree: TableScan alias: a Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 keys: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} 1 {key} keys: 0 _col1 (type: string) 1 value (type: string) outputColumnNames: _col5, _col10 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col5 (type: int), _col10 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE tab1(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@tab1 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: insert overwrite table tab1 select key,value from srcbucket_mapjoin PREHOOK: type: QUERY PREHOOK: Input: default@srcbucket_mapjoin PREHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 PREHOOK: Output: default@tab1 POSTHOOK: query: insert overwrite table tab1 select key,value from srcbucket_mapjoin POSTHOOK: type: QUERY POSTHOOK: Input: default@srcbucket_mapjoin POSTHOOK: Input: default@srcbucket_mapjoin@ds=2008-04-08 POSTHOOK: Output: default@tab1 POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: explain select a.key, a.value, b.value from tab1 a join src b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, a.value, b.value from tab1 a join src b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Tez Edges: Map 1 <- Map 2 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {key} {value} 1 {value} keys: 0 UDFToDouble(key) (type: double) 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col1, _col5 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan alias: a Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: UDFToDouble(key) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(key) (type: double) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: int), value (type: string) Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Tez Edges: Map 1 <- Map 2 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} 1 {_col0} keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a left outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Tez Edges: Map 1 <- Map 2 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Outer Join0 to 1 condition expressions: 0 {_col0} 1 {_col0} keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select key from tab_part where key > 1) a right outer join (select key from tab_part where key > 2) b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Tez Edges: Map 2 <- Map 1 (CUSTOM_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 1) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Map 2 Map Operator Tree: TableScan alias: tab_part Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key > 2) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 condition expressions: 0 {_col0} 1 {_col0} keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 182 Data size: 1939 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key PREHOOK: type: QUERY POSTHOOK: query: explain select a.key, b.key from (select distinct key from tab) a join tab b on b.key = a.key POSTHOOK: type: QUERY POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Tez Edges: Map 1 <- Reducer 3 (CUSTOM_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} 1 {key} keys: 0 _col0 (type: int) 1 key (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: key Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int) Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.value, b.value from (select distinct value from tab) a join tab b on b.key = a.value POSTHOOK: type: QUERY POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab1.key SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab1.value SIMPLE [(srcbucket_mapjoin)srcbucket_mapjoin.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).key SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: tab_part PARTITION(ds=2008-04-08).value SIMPLE [(srcbucket_mapjoin_part)srcbucket_mapjoin_part.FieldSchema(name:value, type:string, comment:null), ] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Tez Edges: Map 1 <- Reducer 3 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {_col0} 1 {value} keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(key) (type: double) outputColumnNames: _col0, _col2 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Map 2 Map Operator Tree: TableScan alias: tab Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: value (type: string) outputColumnNames: value Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: value (type: string) mode: hash outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 121 Data size: 1283 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator limit: -1