PREHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b,
-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, 
-- this tests that the aggregate function stores the partial aggregate state correctly even if an 
-- additional MR job is created for processing the grouping sets.
CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@T1
POSTHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b,
-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, 
-- this tests that the aggregate function stores the partial aggregate state correctly even if an 
-- additional MR job is created for processing the grouping sets.
CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@T1
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@t1
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets1.txt' INTO TABLE T1
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@t1
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@t1
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/grouping_sets2.txt' INTO TABLE T1
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@t1
PREHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row
-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and 
-- hive.new.job.grouping.set.cardinality is more than 4.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row
-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and 
-- hive.new.job.grouping.set.cardinality is more than 4.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: t1
            Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
            Select Operator
              expressions: a (type: string), b (type: string), c (type: string)
              outputColumnNames: _col0, _col1, _col2
              Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
              Group By Operator
                aggregations: avg(_col2), count()
                keys: _col0 (type: string), _col1 (type: string), '0' (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2, _col3, _col4
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                  sort order: +++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  value expressions: _col3 (type: struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
      Reduce Operator Tree:
        Group By Operator
          aggregations: avg(VALUE._col0), count(VALUE._col1)
          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
          mode: mergepartial
          outputColumnNames: _col0, _col1, _col3, _col4
          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
          pruneGroupingSetId: true
          Select Operator
            expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
            outputColumnNames: _col0, _col1, _col2, _col3
            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
            File Output Operator
              compressed: false
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
NULL	NULL	3.8333333333333335	12
NULL	1	2.0	5
NULL	2	5.2	5
NULL	3	5.0	2
1	NULL	2.6666666666666665	3
1	1	3.0	2
1	2	2.0	1
2	NULL	5.2	5
2	2	5.333333333333333	3
2	3	5.0	2
3	NULL	8.0	1
3	2	8.0	1
5	NULL	2.0	1
5	1	2.0	1
8	NULL	1.0	2
8	1	1.0	2
PREHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2.
-- The partial aggregation state should be maintained correctly across MR jobs.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2.
-- The partial aggregation state should be maintained correctly across MR jobs.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1
  Stage-0 depends on stages: Stage-2

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Map Operator Tree:
          TableScan
            alias: t1
            Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
            Select Operator
              expressions: a (type: string), b (type: string), c (type: string)
              outputColumnNames: _col0, _col1, _col2
              Statistics: Num rows: 0 Data size: 72 Basic stats: PARTIAL Column stats: NONE
              Group By Operator
                aggregations: avg(_col2), count()
                keys: _col0 (type: string), _col1 (type: string)
                mode: hash
                outputColumnNames: _col0, _col1, _col2, _col3
                Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                Reduce Output Operator
                  key expressions: _col0 (type: string), _col1 (type: string)
                  sort order: ++
                  Map-reduce partition columns: _col0 (type: string), _col1 (type: string)
                  Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
                  value expressions: _col2 (type: struct<count:bigint,sum:double,input:string>), _col3 (type: bigint)
      Reduce Operator Tree:
        Group By Operator
          aggregations: avg(VALUE._col0), count(VALUE._col1)
          keys: KEY._col0 (type: string), KEY._col1 (type: string), '0' (type: string)
          mode: partials
          outputColumnNames: _col0, _col1, _col2, _col3, _col4
          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
          File Output Operator
            compressed: false
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe

  Stage: Stage-2
    Map Reduce
      Map Operator Tree:
          TableScan
            Reduce Output Operator
              key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
              sort order: +++
              Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string)
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              value expressions: _col3 (type: struct<count:bigint,sum:double,input:string>), _col4 (type: bigint)
      Reduce Operator Tree:
        Group By Operator
          aggregations: avg(VALUE._col0), count(VALUE._col1)
          keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string)
          mode: final
          outputColumnNames: _col0, _col1, _col3, _col4
          Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
          pruneGroupingSetId: true
          Select Operator
            expressions: _col0 (type: string), _col1 (type: string), _col3 (type: double), _col4 (type: bigint)
            outputColumnNames: _col0, _col1, _col2, _col3
            Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
            File Output Operator
              compressed: false
              Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
NULL	NULL	3.8333333333333335	12
NULL	1	2.0	5
NULL	2	5.2	5
NULL	3	5.0	2
1	NULL	2.6666666666666665	3
1	1	3.0	2
1	2	2.0	1
2	NULL	5.2	5
2	2	5.333333333333333	3
2	3	5.0	2
3	NULL	8.0	1
3	2	8.0	1
5	NULL	2.0	1
5	1	2.0	1
8	NULL	1.0	2
8	1	1.0	2