PREHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b,
-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, 
-- this tests that the aggregate function stores the partial aggregate state correctly even if an 
-- additional MR job is created for processing the grouping sets.
CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
POSTHOOK: query: -- In this test, 2 files are loaded into table T1. The data contains rows with the same value of a and b,
-- with different number of rows for a and b in each file. Since bucketizedHiveInputFormat is used, 
-- this tests that the aggregate function stores the partial aggregate state correctly even if an 
-- additional MR job is created for processing the grouping sets.
CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@T1
PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1
PREHOOK: type: LOAD
PREHOOK: Output: default@t1
POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets1.txt' INTO TABLE T1
POSTHOOK: type: LOAD
POSTHOOK: Output: default@t1
PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1
PREHOOK: type: LOAD
PREHOOK: Output: default@t1
POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/grouping_sets2.txt' INTO TABLE T1
POSTHOOK: type: LOAD
POSTHOOK: Output: default@t1
PREHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row
-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and 
-- hive.new.job.grouping.set.cardinality is more than 4.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- The query below will execute in a single MR job, since 4 rows are generated per input row
-- (cube of a,b will lead to (a,b), (a, null), (null, b) and (null, null) and 
-- hive.new.job.grouping.set.cardinality is more than 4.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        t1 
          TableScan
            alias: t1
            Select Operator
              expressions:
                    expr: a
                    type: string
                    expr: b
                    type: string
                    expr: c
                    type: string
              outputColumnNames: a, b, c
              Group By Operator
                aggregations:
                      expr: avg(c)
                      expr: count()
                bucketGroup: false
                keys:
                      expr: a
                      type: string
                      expr: b
                      type: string
                      expr: '0'
                      type: string
                mode: hash
                outputColumnNames: _col0, _col1, _col2, _col3, _col4
                Reduce Output Operator
                  key expressions:
                        expr: _col0
                        type: string
                        expr: _col1
                        type: string
                        expr: _col2
                        type: string
                  sort order: +++
                  Map-reduce partition columns:
                        expr: _col0
                        type: string
                        expr: _col1
                        type: string
                        expr: _col2
                        type: string
                  tag: -1
                  value expressions:
                        expr: _col3
                        type: struct<count:bigint,sum:double>
                        expr: _col4
                        type: bigint
      Reduce Operator Tree:
        Group By Operator
          aggregations:
                expr: avg(VALUE._col0)
                expr: count(VALUE._col1)
          bucketGroup: false
          keys:
                expr: KEY._col0
                type: string
                expr: KEY._col1
                type: string
                expr: KEY._col2
                type: string
          mode: mergepartial
          outputColumnNames: _col0, _col1, _col2, _col3, _col4
          Select Operator
            expressions:
                  expr: _col0
                  type: string
                  expr: _col1
                  type: string
                  expr: _col3
                  type: double
                  expr: _col4
                  type: bigint
            outputColumnNames: _col0, _col1, _col2, _col3
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: -1


PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
NULL	NULL	3.8333333333333335	12
NULL	1	2.0	5
NULL	2	5.2	5
NULL	3	5.0	2
1	NULL	2.6666666666666665	3
1	1	3.0	2
1	2	2.0	1
2	NULL	5.2	5
2	2	5.333333333333333	3
2	3	5.0	2
3	NULL	8.0	1
3	2	8.0	1
5	NULL	2.0	1
5	1	2.0	1
8	NULL	1.0	2
8	1	1.0	2
PREHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2.
-- The partial aggregation state should be maintained correctly across MR jobs.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
POSTHOOK: query: -- The query below will execute in 2 MR jobs, since hive.new.job.grouping.set.cardinality is set to 2.
-- The partial aggregation state should be maintained correctly across MR jobs.
EXPLAIN
SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL a)) (TOK_SELEXPR (TOK_TABLE_OR_COL b)) (TOK_SELEXPR (TOK_FUNCTION avg (TOK_TABLE_OR_COL c))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_CUBE_GROUPBY (TOK_TABLE_OR_COL a) (TOK_TABLE_OR_COL b))))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        t1 
          TableScan
            alias: t1
            Select Operator
              expressions:
                    expr: a
                    type: string
                    expr: b
                    type: string
                    expr: c
                    type: string
              outputColumnNames: a, b, c
              Group By Operator
                aggregations:
                      expr: avg(c)
                      expr: count()
                bucketGroup: false
                keys:
                      expr: a
                      type: string
                      expr: b
                      type: string
                mode: hash
                outputColumnNames: _col0, _col1, _col2, _col3
                Reduce Output Operator
                  key expressions:
                        expr: _col0
                        type: string
                        expr: _col1
                        type: string
                  sort order: ++
                  Map-reduce partition columns:
                        expr: _col0
                        type: string
                        expr: _col1
                        type: string
                  tag: -1
                  value expressions:
                        expr: _col2
                        type: struct<count:bigint,sum:double>
                        expr: _col3
                        type: bigint
      Reduce Operator Tree:
        Group By Operator
          aggregations:
                expr: avg(VALUE._col0)
                expr: count(VALUE._col1)
          bucketGroup: false
          keys:
                expr: KEY._col0
                type: string
                expr: KEY._col1
                type: string
                expr: '0'
                type: string
          mode: partials
          outputColumnNames: _col0, _col1, _col2, _col3, _col4
          File Output Operator
            compressed: false
            GlobalTableId: 0
            table:
                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

  Stage: Stage-2
    Map Reduce
      Alias -> Map Operator Tree:
#### A masked pattern was here ####
            Reduce Output Operator
              key expressions:
                    expr: _col0
                    type: string
                    expr: _col1
                    type: string
                    expr: _col2
                    type: string
              sort order: +++
              Map-reduce partition columns:
                    expr: _col0
                    type: string
                    expr: _col1
                    type: string
                    expr: _col2
                    type: string
              tag: -1
              value expressions:
                    expr: _col3
                    type: struct<count:bigint,sum:double>
                    expr: _col4
                    type: bigint
      Reduce Operator Tree:
        Group By Operator
          aggregations:
                expr: avg(VALUE._col0)
                expr: count(VALUE._col1)
          bucketGroup: false
          keys:
                expr: KEY._col0
                type: string
                expr: KEY._col1
                type: string
                expr: KEY._col2
                type: string
          mode: final
          outputColumnNames: _col0, _col1, _col2, _col3, _col4
          Select Operator
            expressions:
                  expr: _col0
                  type: string
                  expr: _col1
                  type: string
                  expr: _col3
                  type: double
                  expr: _col4
                  type: bigint
            outputColumnNames: _col0, _col1, _col2, _col3
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: -1


PREHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: SELECT a, b, avg(c), count(*) from T1 group by a, b with cube
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
NULL	NULL	3.8333333333333335	12
NULL	1	2.0	5
NULL	2	5.2	5
NULL	3	5.0	2
1	NULL	2.6666666666666665	3
1	1	3.0	2
1	2	2.0	1
2	NULL	5.2	5
2	2	5.333333333333333	3
2	3	5.0	2
3	NULL	8.0	1
3	2	8.0	1
5	NULL	2.0	1
5	1	2.0	1
8	NULL	1.0	2
8	1	1.0	2