PREHOOK: query: CREATE TABLE T1(key STRING, val STRING)
SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@T1
POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING)
SKEWED BY (key) ON ((2)) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@T1
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@t1
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@t1
PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@T2
POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@T2
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@t2
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T2.txt' INTO TABLE T2
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@t2
PREHOOK: query: -- copy from skewjoinopt9
-- test compile time skew join and auto map join
-- no skew join compile time optimization would be performed if one of the
-- join sources is a sub-query consisting of a union all
-- adding a order by at the end to make the results deterministic
EXPLAIN
select * from
(
select key, val from T1
  union all 
select key, val from T1
) subq1
join T2 b on subq1.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: -- copy from skewjoinopt9
-- test compile time skew join and auto map join
-- no skew join compile time optimization would be performed if one of the
-- join sources is a sub-query consisting of a union all
-- adding a order by at the end to make the results deterministic
EXPLAIN
select * from
(
select key, val from T1
  union all 
select key, val from T1
) subq1
join T2 b on subq1.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-5 is a root stage
  Stage-4 depends on stages: Stage-5
  Stage-0 depends on stages: Stage-4

STAGE PLANS:
  Stage: Stage-5
    Map Reduce Local Work
      Alias -> Map Local Tables:
        b 
          Fetch Operator
            limit: -1
      Alias -> Map Local Operator Tree:
        b 
          TableScan
            alias: b
            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              HashTable Sink Operator
                keys:
                  0 _col0 (type: string)
                  1 key (type: string)

  Stage: Stage-4
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: t1
            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              Select Operator
                expressions: key (type: string), val (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                Union
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  Map Join Operator
                    condition map:
                         Inner Join 0 to 1
                    keys:
                      0 _col0 (type: string)
                      1 key (type: string)
                    outputColumnNames: _col0, _col1, _col2, _col3
                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                    Select Operator
                      expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
                      outputColumnNames: _col0, _col1, _col2, _col3
                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                      File Output Operator
                        compressed: false
                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          TableScan
            alias: t1
            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              Select Operator
                expressions: key (type: string), val (type: string)
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                Union
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  Map Join Operator
                    condition map:
                         Inner Join 0 to 1
                    keys:
                      0 _col0 (type: string)
                      1 key (type: string)
                    outputColumnNames: _col0, _col1, _col2, _col3
                    Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                    Select Operator
                      expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
                      outputColumnNames: _col0, _col1, _col2, _col3
                      Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                      File Output Operator
                        compressed: false
                        Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                        table:
                            input format: org.apache.hadoop.mapred.TextInputFormat
                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
      Local Work:
        Map Reduce Local Work

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: select * from
(
select key, val from T1
  union all 
select key, val from T1
) subq1
join T2 b on subq1.key = b.key
ORDER BY subq1.key, b.key, subq1.val, b.val
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: select * from
(
select key, val from T1
  union all 
select key, val from T1
) subq1
join T2 b on subq1.key = b.key
ORDER BY subq1.key, b.key, subq1.val, b.val
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
2	12	2	22
2	12	2	22
3	13	3	13
3	13	3	13
8	18	8	18
8	18	8	18
8	18	8	18
8	18	8	18
8	28	8	18
8	28	8	18
8	28	8	18
8	28	8	18
PREHOOK: query: -- no skew join compile time optimization would be performed if one of the
-- join sources is a sub-query consisting of a group by
EXPLAIN
select * from
(
select key, count(1) as cnt from T1 group by key
) subq1
join T2 b on subq1.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: -- no skew join compile time optimization would be performed if one of the
-- join sources is a sub-query consisting of a group by
EXPLAIN
select * from
(
select key, count(1) as cnt from T1 group by key
) subq1
join T2 b on subq1.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-5 depends on stages: Stage-1
  Stage-4 depends on stages: Stage-5
  Stage-0 depends on stages: Stage-4

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: t1
            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              Group By Operator
                aggregations: count(1)
                keys: key (type: string)
                mode: hash
                outputColumnNames: _col0, _col1
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string)
                  sort order: +
                  Map-reduce partition columns: _col0 (type: string)
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  value expressions: _col1 (type: bigint)
      Reduce Operator Tree:
        Group By Operator
          aggregations: count(VALUE._col0)
          keys: KEY._col0 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-5
    Map Reduce Local Work
      Alias -> Map Local Tables:
        b 
          Fetch Operator
            limit: -1
      Alias -> Map Local Operator Tree:
        b 
          TableScan
            alias: b
            Statistics: Num rows: 0 Data size: 30 Basic stats: PARTIAL Column stats: NONE
            Filter Operator
              predicate: key is not null (type: boolean)
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              HashTable Sink Operator
                keys:
                  0 _col0 (type: string)
                  1 key (type: string)

  Stage: Stage-4
    Map Reduce
      Map Operator Tree:
          TableScan
            Map Join Operator
              condition map:
                   Inner Join 0 to 1
              keys:
                0 _col0 (type: string)
                1 key (type: string)
              outputColumnNames: _col0, _col1, _col2, _col3
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              Select Operator
                expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string)
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                File Output Operator
                  compressed: false
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  table:
                      input format: org.apache.hadoop.mapred.TextInputFormat
                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
      Local Work:
        Map Reduce Local Work

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: select * from
(
select key, count(1) as cnt from T1 group by key
) subq1
join T2 b on subq1.key = b.key
ORDER BY subq1.key, b.key, subq1.cnt, b.val
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@t2
#### A masked pattern was here ####
POSTHOOK: query: select * from
(
select key, count(1) as cnt from T1 group by key
) subq1
join T2 b on subq1.key = b.key
ORDER BY subq1.key, b.key, subq1.cnt, b.val
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@t2
#### A masked pattern was here ####
2	1	2	22
3	1	3	13
8	2	8	18
8	2	8	18