Saving all output to "!!{outputDirectory}!!/groupby_sort_1.q.raw". Enter "record" with no arguments to stop it.
>>>  !run !!{qFileDirectory}!!/groupby_sort_1.q
>>>  set hive.enforce.bucketing = true;
No rows affected 
>>>  set hive.enforce.sorting = true;
No rows affected 
>>>  set hive.exec.reducers.max = 10;
No rows affected 
>>>  set hive.map.groupby.sorted=true;
No rows affected 
>>>  
>>>  CREATE TABLE T1(key STRING, val STRING) 
CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
No rows affected 
>>>  
>>>  LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1;
No rows affected 
>>>  
>>>  -- perform an insert to make sure there are 2 files
>>>  INSERT OVERWRITE TABLE T1 select key, val from T1;
'key','val'
No rows selected 
>>>  
>>>  CREATE TABLE outputTbl1(key int, cnt int);
No rows affected 
>>>  
>>>  -- The plan should be converted to a map-side group by if the group by key
>>>  -- matches the skewed key
>>>  -- addind a order by at the end to make the test results deterministic
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT key, count(1) FROM T1 GROUP BY key;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Select Operator'
'                    expressions:'
'                          expr: UDFToInteger(_col0)'
'                          type: int'
'                          expr: UDFToInteger(_col1)'
'                          type: int'
'                    outputColumnNames: _col0, _col1'
'                    File Output Operator'
'                      compressed: false'
'                      GlobalTableId: 1'
'                      directory: pfile:!!{hive.exec.scratchdir}!!'
'                      NumFilesPerFileSink: 1'
'                      Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                      table:'
'                          input format: org.apache.hadoop.mapred.TextInputFormat'
'                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                          properties:'
'                            bucket_count -1'
'                            columns key,cnt'
'                            columns.types int:int'
'                            file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                            file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                            location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                            name groupby_sort_1.outputtbl1'
'                            serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                            serialization.format 1'
'                            serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                            transient_lastDdlTime !!UNIXTIME!!'
'                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                          name: groupby_sort_1.outputtbl1'
'                      TotalFiles: 1'
'                      GatherStats: true'
'                      MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
154 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT key, count(1) FROM T1 GROUP BY key;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'1','1'
'2','1'
'3','1'
'7','1'
'8','2'
5 rows selected 
>>>  
>>>  CREATE TABLE outputTbl2(key1 int, key2 string, cnt int);
No rows affected 
>>>  
>>>  -- no map-side group by even if the group by key is a superset of skewed key
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl2 
SELECT key, val, count(1) FROM T1 GROUP BY key, val;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: val'
'                    type: string'
'              outputColumnNames: key, val'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                      expr: val'
'                      type: string'
'                mode: hash'
'                outputColumnNames: _col0, _col1, _col2'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: string'
'                  sort order: ++'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: string'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col2'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'                expr: KEY._col1'
'                type: string'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1, _col2'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: string'
'                  expr: _col2'
'                  type: bigint'
'            outputColumnNames: _col0, _col1, _col2'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: _col1'
'                    type: string'
'                    expr: UDFToInteger(_col2)'
'                    type: int'
'              outputColumnNames: _col0, _col1, _col2'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 1'
'                directory: pfile:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    properties:'
'                      bucket_count -1'
'                      columns key1,key2,cnt'
'                      columns.types int:string:int'
'                      file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                      file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                      location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl2'
'                      name groupby_sort_1.outputtbl2'
'                      serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt}'
'                      serialization.format 1'
'                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                      transient_lastDdlTime !!UNIXTIME!!'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.outputtbl2'
'                TotalFiles: 1'
'                GatherStats: true'
'                MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,cnt'
'                columns.types int:string:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl2'
'                name groupby_sort_1.outputtbl2'
'                serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl2'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
190 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl2 
SELECT key, val, count(1) FROM T1 GROUP BY key, val;
'_col0','_col1','_col2'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl2 ORDER BY key1, key2;
'key1','key2','cnt'
'1','11','1'
'2','12','1'
'3','13','1'
'7','17','1'
'8','18','1'
'8','28','1'
6 rows selected 
>>>  
>>>  -- It should work for sub-queries
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        subq1:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: _col0'
'              Select Operator'
'                expressions:'
'                      expr: _col0'
'                      type: string'
'                outputColumnNames: _col0'
'                Group By Operator'
'                  aggregations:'
'                        expr: count(1)'
'                  bucketGroup: false'
'                  keys:'
'                        expr: _col0'
'                        type: string'
'                  mode: final'
'                  outputColumnNames: _col0, _col1'
'                  Select Operator'
'                    expressions:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col1'
'                          type: bigint'
'                    outputColumnNames: _col0, _col1'
'                    Select Operator'
'                      expressions:'
'                            expr: UDFToInteger(_col0)'
'                            type: int'
'                            expr: UDFToInteger(_col1)'
'                            type: int'
'                      outputColumnNames: _col0, _col1'
'                      File Output Operator'
'                        compressed: false'
'                        GlobalTableId: 1'
'                        directory: pfile:!!{hive.exec.scratchdir}!!'
'                        NumFilesPerFileSink: 1'
'                        Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                        table:'
'                            input format: org.apache.hadoop.mapred.TextInputFormat'
'                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                            properties:'
'                              bucket_count -1'
'                              columns key,cnt'
'                              columns.types int:int'
'                              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                              name groupby_sort_1.outputtbl1'
'                              numFiles 1'
'                              numPartitions 0'
'                              numRows 5'
'                              rawDataSize 15'
'                              serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                              serialization.format 1'
'                              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              totalSize 20'
'                              transient_lastDdlTime !!UNIXTIME!!'
'                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                            name: groupby_sort_1.outputtbl1'
'                        TotalFiles: 1'
'                        GatherStats: true'
'                        MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [subq1:t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 5'
'                rawDataSize 15'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 20'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
169 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'1','1'
'2','1'
'3','1'
'7','1'
'8','2'
5 rows selected 
>>>  
>>>  -- It should work for sub-queries with column aliases
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL val) v)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL k))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        subq1:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: _col0'
'              Select Operator'
'                expressions:'
'                      expr: _col0'
'                      type: string'
'                outputColumnNames: _col0'
'                Group By Operator'
'                  aggregations:'
'                        expr: count(1)'
'                  bucketGroup: false'
'                  keys:'
'                        expr: _col0'
'                        type: string'
'                  mode: final'
'                  outputColumnNames: _col0, _col1'
'                  Select Operator'
'                    expressions:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col1'
'                          type: bigint'
'                    outputColumnNames: _col0, _col1'
'                    Select Operator'
'                      expressions:'
'                            expr: UDFToInteger(_col0)'
'                            type: int'
'                            expr: UDFToInteger(_col1)'
'                            type: int'
'                      outputColumnNames: _col0, _col1'
'                      File Output Operator'
'                        compressed: false'
'                        GlobalTableId: 1'
'                        directory: pfile:!!{hive.exec.scratchdir}!!'
'                        NumFilesPerFileSink: 1'
'                        Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                        table:'
'                            input format: org.apache.hadoop.mapred.TextInputFormat'
'                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                            properties:'
'                              bucket_count -1'
'                              columns key,cnt'
'                              columns.types int:int'
'                              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                              name groupby_sort_1.outputtbl1'
'                              numFiles 1'
'                              numPartitions 0'
'                              numRows 5'
'                              rawDataSize 15'
'                              serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                              serialization.format 1'
'                              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              totalSize 20'
'                              transient_lastDdlTime !!UNIXTIME!!'
'                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                            name: groupby_sort_1.outputtbl1'
'                        TotalFiles: 1'
'                        GatherStats: true'
'                        MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [subq1:t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 5'
'                rawDataSize 15'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 20'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
169 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'1','1'
'2','1'
'3','1'
'7','1'
'8','2'
5 rows selected 
>>>  
>>>  CREATE TABLE outputTbl3(key1 int, key2 int, cnt int);
No rows affected 
>>>  
>>>  -- The plan should be converted to a map-side group by if the group by key contains a constant followed
>>>  -- by a match to the skewed key
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl3 
SELECT 1, key, count(1) FROM T1 GROUP BY 1, key;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY 1 (TOK_TABLE_OR_COL key))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: 1'
'                      type: int'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1, _col2'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: int'
'                        expr: _col1'
'                        type: string'
'                        expr: _col2'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1, _col2'
'                  Select Operator'
'                    expressions:'
'                          expr: _col0'
'                          type: int'
'                          expr: UDFToInteger(_col1)'
'                          type: int'
'                          expr: UDFToInteger(_col2)'
'                          type: int'
'                    outputColumnNames: _col0, _col1, _col2'
'                    File Output Operator'
'                      compressed: false'
'                      GlobalTableId: 1'
'                      directory: pfile:!!{hive.exec.scratchdir}!!'
'                      NumFilesPerFileSink: 1'
'                      Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                      table:'
'                          input format: org.apache.hadoop.mapred.TextInputFormat'
'                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                          properties:'
'                            bucket_count -1'
'                            columns key1,key2,cnt'
'                            columns.types int:int:int'
'                            file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                            file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                            location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl3'
'                            name groupby_sort_1.outputtbl3'
'                            serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
'                            serialization.format 1'
'                            serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                            transient_lastDdlTime !!UNIXTIME!!'
'                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                          name: groupby_sort_1.outputtbl3'
'                      TotalFiles: 1'
'                      GatherStats: true'
'                      MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,cnt'
'                columns.types int:int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl3'
'                name groupby_sort_1.outputtbl3'
'                serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl3'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
160 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl3 
SELECT 1, key, count(1) FROM T1 GROUP BY 1, key;
'_col0','_col1','_col2'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl3 ORDER BY key1, key2;
'key1','key2','cnt'
'1','1','1'
'1','2','1'
'1','3','1'
'1','7','1'
'1','8','2'
5 rows selected 
>>>  
>>>  CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int);
No rows affected 
>>>  
>>>  -- no map-side group by if the group by key contains a constant followed by another column
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl4 
SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: val'
'                    type: string'
'              outputColumnNames: key, val'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                      expr: 1'
'                      type: int'
'                      expr: val'
'                      type: string'
'                mode: hash'
'                outputColumnNames: _col0, _col1, _col2, _col3'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: int'
'                        expr: _col2'
'                        type: string'
'                  sort order: +++'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: int'
'                        expr: _col2'
'                        type: string'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col3'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'                expr: KEY._col1'
'                type: int'
'                expr: KEY._col2'
'                type: string'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1, _col2, _col3'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: int'
'                  expr: _col2'
'                  type: string'
'                  expr: _col3'
'                  type: bigint'
'            outputColumnNames: _col0, _col1, _col2, _col3'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: _col1'
'                    type: int'
'                    expr: _col2'
'                    type: string'
'                    expr: UDFToInteger(_col3)'
'                    type: int'
'              outputColumnNames: _col0, _col1, _col2, _col3'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 1'
'                directory: pfile:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    properties:'
'                      bucket_count -1'
'                      columns key1,key2,key3,cnt'
'                      columns.types int:int:string:int'
'                      file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                      file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                      location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                      name groupby_sort_1.outputtbl4'
'                      serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                      serialization.format 1'
'                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                      transient_lastDdlTime !!UNIXTIME!!'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.outputtbl4'
'                TotalFiles: 1'
'                GatherStats: true'
'                MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,key3,cnt'
'                columns.types int:int:string:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                name groupby_sort_1.outputtbl4'
'                serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl4'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
202 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl4 
SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val;
'_col0','_col1','_col2','_col3'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl4 ORDER BY key1, key2, key3;
'key1','key2','key3','cnt'
'1','1','11','1'
'2','1','12','1'
'3','1','13','1'
'7','1','17','1'
'8','1','18','1'
'8','1','28','1'
6 rows selected 
>>>  
>>>  -- no map-side group by if the group by key contains a function
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl3 
SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) 1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (+ (TOK_TABLE_OR_COL key) 1))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                      expr: (key + 1)'
'                      type: double'
'                mode: hash'
'                outputColumnNames: _col0, _col1, _col2'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: double'
'                  sort order: ++'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: double'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col2'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'                expr: KEY._col1'
'                type: double'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1, _col2'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: double'
'                  expr: _col2'
'                  type: bigint'
'            outputColumnNames: _col0, _col1, _col2'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: UDFToInteger(_col1)'
'                    type: int'
'                    expr: UDFToInteger(_col2)'
'                    type: int'
'              outputColumnNames: _col0, _col1, _col2'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 1'
'                directory: pfile:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    properties:'
'                      bucket_count -1'
'                      columns key1,key2,cnt'
'                      columns.types int:int:int'
'                      file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                      file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                      location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl3'
'                      name groupby_sort_1.outputtbl3'
'                      numFiles 1'
'                      numPartitions 0'
'                      numRows 5'
'                      rawDataSize 25'
'                      serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
'                      serialization.format 1'
'                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                      totalSize 30'
'                      transient_lastDdlTime !!UNIXTIME!!'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.outputtbl3'
'                TotalFiles: 1'
'                GatherStats: true'
'                MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,cnt'
'                columns.types int:int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl3'
'                name groupby_sort_1.outputtbl3'
'                numFiles 1'
'                numPartitions 0'
'                numRows 5'
'                rawDataSize 25'
'                serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl3'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
198 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl3 
SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1;
'_col0','_col1','_col2'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl3 ORDER BY key1, key2;
'key1','key2','cnt'
'1','2','1'
'2','3','1'
'3','4','1'
'7','8','1'
'8','9','2'
5 rows selected 
>>>  
>>>  -- it should not matter what follows the group by
>>>  -- test various cases
>>>  
>>>  -- group by followed by another group by
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT key + key, sum(cnt) from 
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 
group by key + key;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL cnt)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        subq1:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Select Operator'
'                    expressions:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col1'
'                          type: bigint'
'                    outputColumnNames: _col0, _col1'
'                    Group By Operator'
'                      aggregations:'
'                            expr: sum(_col1)'
'                      bucketGroup: false'
'                      keys:'
'                            expr: (_col0 + _col0)'
'                            type: double'
'                      mode: hash'
'                      outputColumnNames: _col0, _col1'
'                      Reduce Output Operator'
'                        key expressions:'
'                              expr: _col0'
'                              type: double'
'                        sort order: +'
'                        Map-reduce partition columns:'
'                              expr: _col0'
'                              type: double'
'                        tag: -1'
'                        value expressions:'
'                              expr: _col1'
'                              type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [subq1:t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: sum(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: double'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: double'
'                  expr: _col1'
'                  type: bigint'
'            outputColumnNames: _col0, _col1'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: UDFToInteger(_col1)'
'                    type: int'
'              outputColumnNames: _col0, _col1'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 1'
'                directory: pfile:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    properties:'
'                      bucket_count -1'
'                      columns key,cnt'
'                      columns.types int:int'
'                      file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                      file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                      location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                      name groupby_sort_1.outputtbl1'
'                      numFiles 1'
'                      numPartitions 0'
'                      numRows 5'
'                      rawDataSize 15'
'                      serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                      serialization.format 1'
'                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                      totalSize 20'
'                      transient_lastDdlTime !!UNIXTIME!!'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.outputtbl1'
'                TotalFiles: 1'
'                GatherStats: true'
'                MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 5'
'                rawDataSize 15'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 20'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
209 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT key + key, sum(cnt) from 
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 
group by key + key;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'2','1'
'4','1'
'6','1'
'14','1'
'16','2'
5 rows selected 
>>>  
>>>  -- group by followed by a union
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT * FROM ( 
SELECT key, count(1) FROM T1 GROUP BY key 
UNION ALL 
SELECT key, count(1) FROM T1 GROUP BY key 
) subq1;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        null-subquery1:subq1-subquery1:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Union'
'                    Select Operator'
'                      expressions:'
'                            expr: _col0'
'                            type: string'
'                            expr: _col1'
'                            type: bigint'
'                      outputColumnNames: _col0, _col1'
'                      Select Operator'
'                        expressions:'
'                              expr: UDFToInteger(_col0)'
'                              type: int'
'                              expr: UDFToInteger(_col1)'
'                              type: int'
'                        outputColumnNames: _col0, _col1'
'                        File Output Operator'
'                          compressed: false'
'                          GlobalTableId: 1'
'                          directory: pfile:!!{hive.exec.scratchdir}!!'
'                          NumFilesPerFileSink: 1'
'                          Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                          table:'
'                              input format: org.apache.hadoop.mapred.TextInputFormat'
'                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              properties:'
'                                bucket_count -1'
'                                columns key,cnt'
'                                columns.types int:int'
'                                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                                name groupby_sort_1.outputtbl1'
'                                numFiles 1'
'                                numPartitions 0'
'                                numRows 5'
'                                rawDataSize 17'
'                                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                                serialization.format 1'
'                                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                                totalSize 22'
'                                transient_lastDdlTime !!UNIXTIME!!'
'                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              name: groupby_sort_1.outputtbl1'
'                          TotalFiles: 1'
'                          GatherStats: true'
'                          MultiFileSpray: false'
'        null-subquery2:subq1-subquery2:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Union'
'                    Select Operator'
'                      expressions:'
'                            expr: _col0'
'                            type: string'
'                            expr: _col1'
'                            type: bigint'
'                      outputColumnNames: _col0, _col1'
'                      Select Operator'
'                        expressions:'
'                              expr: UDFToInteger(_col0)'
'                              type: int'
'                              expr: UDFToInteger(_col1)'
'                              type: int'
'                        outputColumnNames: _col0, _col1'
'                        File Output Operator'
'                          compressed: false'
'                          GlobalTableId: 1'
'                          directory: pfile:!!{hive.exec.scratchdir}!!'
'                          NumFilesPerFileSink: 1'
'                          Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                          table:'
'                              input format: org.apache.hadoop.mapred.TextInputFormat'
'                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              properties:'
'                                bucket_count -1'
'                                columns key,cnt'
'                                columns.types int:int'
'                                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                                name groupby_sort_1.outputtbl1'
'                                numFiles 1'
'                                numPartitions 0'
'                                numRows 5'
'                                rawDataSize 17'
'                                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                                serialization.format 1'
'                                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                                totalSize 22'
'                                transient_lastDdlTime !!UNIXTIME!!'
'                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              name: groupby_sort_1.outputtbl1'
'                          TotalFiles: 1'
'                          GatherStats: true'
'                          MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [null-subquery1:subq1-subquery1:t1, null-subquery2:subq1-subquery2:t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 5'
'                rawDataSize 17'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 22'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
243 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT * FROM ( 
SELECT key, count(1) FROM T1 GROUP BY key 
UNION ALL 
SELECT key, count(1) FROM T1 GROUP BY key 
) subq1;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'1','1'
'1','1'
'2','1'
'2','1'
'3','1'
'3','1'
'7','1'
'7','1'
'8','2'
'8','2'
10 rows selected 
>>>  
>>>  -- group by followed by a union where one of the sub-queries is map-side group by
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT * FROM ( 
SELECT key, count(1) FROM T1 GROUP BY key 
UNION ALL 
SELECT key + key as key, count(1) FROM T1 GROUP BY key + key 
) subq1;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))'
''
'STAGE DEPENDENCIES:'
'  Stage-4 is a root stage'
'  Stage-2 depends on stages: Stage-4'
'  Stage-0 depends on stages: Stage-2'
'  Stage-3 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-4'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        null-subquery2:subq1-subquery2:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: (key + key)'
'                      type: double'
'                mode: hash'
'                outputColumnNames: _col0, _col1'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: double'
'                  sort order: +'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: double'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col1'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [null-subquery2:subq1-subquery2:t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: double'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: double'
'                  expr: _col1'
'                  type: bigint'
'            outputColumnNames: _col0, _col1'
'            File Output Operator'
'              compressed: false'
'              GlobalTableId: 0'
'              directory: file:!!{hive.exec.scratchdir}!!'
'              NumFilesPerFileSink: 1'
'              table:'
'                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'                  properties:'
'                    columns _col0,_col1'
'                    columns.types double,bigint'
'                    escape.delim \'
'              TotalFiles: 1'
'              GatherStats: false'
'              MultiFileSpray: false'
''
'  Stage: Stage-2'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        file:!!{hive.exec.scratchdir}!! '
'          TableScan'
'            GatherStats: false'
'            Union'
'              Select Operator'
'                expressions:'
'                      expr: _col0'
'                      type: double'
'                      expr: _col1'
'                      type: bigint'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: UDFToInteger(_col0)'
'                        type: int'
'                        expr: UDFToInteger(_col1)'
'                        type: int'
'                  outputColumnNames: _col0, _col1'
'                  File Output Operator'
'                    compressed: false'
'                    GlobalTableId: 1'
'                    directory: pfile:!!{hive.exec.scratchdir}!!'
'                    NumFilesPerFileSink: 1'
'                    Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                    table:'
'                        input format: org.apache.hadoop.mapred.TextInputFormat'
'                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                        properties:'
'                          bucket_count -1'
'                          columns key,cnt'
'                          columns.types int:int'
'                          file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                          file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                          location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                          name groupby_sort_1.outputtbl1'
'                          numFiles 1'
'                          numPartitions 0'
'                          numRows 10'
'                          rawDataSize 30'
'                          serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                          serialization.format 1'
'                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                          totalSize 40'
'                          transient_lastDdlTime !!UNIXTIME!!'
'                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                        name: groupby_sort_1.outputtbl1'
'                    TotalFiles: 1'
'                    GatherStats: true'
'                    MultiFileSpray: false'
'        null-subquery1:subq1-subquery1:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Union'
'                    Select Operator'
'                      expressions:'
'                            expr: _col0'
'                            type: double'
'                            expr: _col1'
'                            type: bigint'
'                      outputColumnNames: _col0, _col1'
'                      Select Operator'
'                        expressions:'
'                              expr: UDFToInteger(_col0)'
'                              type: int'
'                              expr: UDFToInteger(_col1)'
'                              type: int'
'                        outputColumnNames: _col0, _col1'
'                        File Output Operator'
'                          compressed: false'
'                          GlobalTableId: 1'
'                          directory: pfile:!!{hive.exec.scratchdir}!!'
'                          NumFilesPerFileSink: 1'
'                          Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                          table:'
'                              input format: org.apache.hadoop.mapred.TextInputFormat'
'                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              properties:'
'                                bucket_count -1'
'                                columns key,cnt'
'                                columns.types int:int'
'                                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                                name groupby_sort_1.outputtbl1'
'                                numFiles 1'
'                                numPartitions 0'
'                                numRows 10'
'                                rawDataSize 30'
'                                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                                serialization.format 1'
'                                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                                totalSize 40'
'                                transient_lastDdlTime !!UNIXTIME!!'
'                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              name: groupby_sort_1.outputtbl1'
'                          TotalFiles: 1'
'                          GatherStats: true'
'                          MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [null-subquery1:subq1-subquery1:t1]'
'      Path -> Partition:'
'        file:!!{hive.exec.scratchdir}!! '
'          Partition'
'            base file name: -mr-10002'
'            input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'            properties:'
'              columns _col0,_col1'
'              columns.types double,bigint'
'              escape.delim \'
'          '
'              input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'              properties:'
'                columns _col0,_col1'
'                columns.types double,bigint'
'                escape.delim \'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 10'
'                rawDataSize 30'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 40'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-3'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
359 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT * FROM ( 
SELECT key, count(1) as cnt FROM T1 GROUP BY key 
UNION ALL 
SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key 
) subq1;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'','1'
'','1'
'','1'
'','1'
'','2'
'1','1'
'2','1'
'3','1'
'7','1'
'8','2'
10 rows selected 
>>>  
>>>  -- group by followed by a join
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT subq1.key, subq1.cnt+subq2.cnt FROM 
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 
JOIN 
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 
ON subq1.key = subq2.key;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL subq1) cnt) (. (TOK_TABLE_OR_COL subq2) cnt))))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        subq1:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Reduce Output Operator'
'                    key expressions:'
'                          expr: _col0'
'                          type: string'
'                    sort order: +'
'                    Map-reduce partition columns:'
'                          expr: _col0'
'                          type: string'
'                    tag: 0'
'                    value expressions:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col1'
'                          type: bigint'
'        subq2:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Reduce Output Operator'
'                    key expressions:'
'                          expr: _col0'
'                          type: string'
'                    sort order: +'
'                    Map-reduce partition columns:'
'                          expr: _col0'
'                          type: string'
'                    tag: 1'
'                    value expressions:'
'                          expr: _col1'
'                          type: bigint'
'      Needs Tagging: true'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [subq1:t1, subq2:t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Join Operator'
'          condition map:'
'               Inner Join 0 to 1'
'          condition expressions:'
'            0 {VALUE._col0} {VALUE._col1}'
'            1 {VALUE._col1}'
'          handleSkewJoin: false'
'          outputColumnNames: _col0, _col1, _col3'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: (_col1 + _col3)'
'                  type: bigint'
'            outputColumnNames: _col0, _col1'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: UDFToInteger(_col1)'
'                    type: int'
'              outputColumnNames: _col0, _col1'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 1'
'                directory: pfile:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    properties:'
'                      bucket_count -1'
'                      columns key,cnt'
'                      columns.types int:int'
'                      file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                      file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                      location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                      name groupby_sort_1.outputtbl1'
'                      numFiles 2'
'                      numPartitions 0'
'                      numRows 10'
'                      rawDataSize 35'
'                      serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                      serialization.format 1'
'                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                      totalSize 45'
'                      transient_lastDdlTime !!UNIXTIME!!'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.outputtbl1'
'                TotalFiles: 1'
'                GatherStats: true'
'                MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                numFiles 2'
'                numPartitions 0'
'                numRows 10'
'                rawDataSize 35'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 45'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
231 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT subq1.key, subq1.cnt+subq2.cnt FROM 
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 
JOIN 
(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 
ON subq1.key = subq2.key;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'1','2'
'2','2'
'3','2'
'7','2'
'8','4'
5 rows selected 
>>>  
>>>  -- group by followed by a join where one of the sub-queries can be performed in the mapper
>>>  EXPLAIN EXTENDED 
SELECT * FROM 
(SELECT key, count(1) FROM T1 GROUP BY key) subq1 
JOIN 
(SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 
ON subq1.key = subq2.key;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF))))'
''
'STAGE DEPENDENCIES:'
'  Stage-2 is a root stage'
'  Stage-1 depends on stages: Stage-2'
'  Stage-0 is a root stage'
''
'STAGE PLANS:'
'  Stage: Stage-2'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        subq2:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: val'
'                    type: string'
'              outputColumnNames: key, val'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                      expr: val'
'                      type: string'
'                mode: hash'
'                outputColumnNames: _col0, _col1, _col2'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: string'
'                  sort order: ++'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: string'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col2'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [subq2:t1]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'                expr: KEY._col1'
'                type: string'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1, _col2'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: string'
'                  expr: _col2'
'                  type: bigint'
'            outputColumnNames: _col0, _col1, _col2'
'            File Output Operator'
'              compressed: false'
'              GlobalTableId: 0'
'              directory: file:!!{hive.exec.scratchdir}!!'
'              NumFilesPerFileSink: 1'
'              table:'
'                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'                  properties:'
'                    columns _col0,_col1,_col2'
'                    columns.types string,string,bigint'
'                    escape.delim \'
'              TotalFiles: 1'
'              GatherStats: false'
'              MultiFileSpray: false'
''
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        $INTNAME '
'            Reduce Output Operator'
'              key expressions:'
'                    expr: _col0'
'                    type: string'
'              sort order: +'
'              Map-reduce partition columns:'
'                    expr: _col0'
'                    type: string'
'              tag: 1'
'              value expressions:'
'                    expr: _col0'
'                    type: string'
'                    expr: _col1'
'                    type: string'
'                    expr: _col2'
'                    type: bigint'
'        subq1:t1 '
'          TableScan'
'            alias: t1'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1'
'                  Reduce Output Operator'
'                    key expressions:'
'                          expr: _col0'
'                          type: string'
'                    sort order: +'
'                    Map-reduce partition columns:'
'                          expr: _col0'
'                          type: string'
'                    tag: 0'
'                    value expressions:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col1'
'                          type: bigint'
'      Needs Tagging: true'
'      Path -> Alias:'
'        file:!!{hive.exec.scratchdir}!! [$INTNAME]'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 [subq1:t1]'
'      Path -> Partition:'
'        file:!!{hive.exec.scratchdir}!! '
'          Partition'
'            base file name: -mr-10002'
'            input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'            properties:'
'              columns _col0,_col1,_col2'
'              columns.types string,string,bigint'
'              escape.delim \'
'          '
'              input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'              properties:'
'                columns _col0,_col1,_col2'
'                columns.types string,string,bigint'
'                escape.delim \'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1 '
'          Partition'
'            base file name: t1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'              name groupby_sort_1.t1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t1 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t1'
'                name groupby_sort_1.t1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t1 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t1'
'            name: groupby_sort_1.t1'
'      Reduce Operator Tree:'
'        Join Operator'
'          condition map:'
'               Inner Join 0 to 1'
'          condition expressions:'
'            0 {VALUE._col0} {VALUE._col1}'
'            1 {VALUE._col0} {VALUE._col1} {VALUE._col2}'
'          handleSkewJoin: false'
'          outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: bigint'
'                  expr: _col2'
'                  type: string'
'                  expr: _col3'
'                  type: string'
'                  expr: _col4'
'                  type: bigint'
'            outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'            File Output Operator'
'              compressed: false'
'              GlobalTableId: 0'
'              directory: file:!!{hive.exec.scratchdir}!!'
'              NumFilesPerFileSink: 1'
'              Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!'
'              table:'
'                  input format: org.apache.hadoop.mapred.TextInputFormat'
'                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                  properties:'
'                    columns _col0,_col1,_col2,_col3,_col4'
'                    columns.types string:bigint:string:string:bigint'
'                    escape.delim \'
'                    serialization.format 1'
'              TotalFiles: 1'
'              GatherStats: false'
'              MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Fetch Operator'
'      limit: -1'
''
''
316 rows selected 
>>>  
>>>  CREATE TABLE T2(key STRING, val STRING) 
CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE;
No rows affected 
>>>  
>>>  -- perform an insert to make sure there are 2 files
>>>  INSERT OVERWRITE TABLE T2 select key, val from T1;
'key','val'
No rows selected 
>>>  
>>>  -- no mapside sort group by if the group by is a prefix of the sorted key
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl1 
SELECT key, count(1) FROM T2 GROUP BY key;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t2 '
'          TableScan'
'            alias: t2'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: hash'
'                outputColumnNames: _col0, _col1'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: string'
'                  sort order: +'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: string'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col1'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 [t2]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 '
'          Partition'
'            base file name: t2'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'              name groupby_sort_1.t2'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t2 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'                name groupby_sort_1.t2'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t2 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t2'
'            name: groupby_sort_1.t2'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: bigint'
'            outputColumnNames: _col0, _col1'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: UDFToInteger(_col1)'
'                    type: int'
'              outputColumnNames: _col0, _col1'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 1'
'                directory: pfile:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    properties:'
'                      bucket_count -1'
'                      columns key,cnt'
'                      columns.types int:int'
'                      file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                      file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                      location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                      name groupby_sort_1.outputtbl1'
'                      numFiles 1'
'                      numPartitions 0'
'                      numRows 5'
'                      rawDataSize 15'
'                      serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                      serialization.format 1'
'                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                      totalSize 20'
'                      transient_lastDdlTime !!UNIXTIME!!'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.outputtbl1'
'                TotalFiles: 1'
'                GatherStats: true'
'                MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,cnt'
'                columns.types int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl1'
'                name groupby_sort_1.outputtbl1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 5'
'                rawDataSize 15'
'                serialization.ddl struct outputtbl1 { i32 key, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 20'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
186 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl1 
SELECT key, count(1) FROM T2 GROUP BY key;
'_col0','_col1'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl1 ORDER BY key;
'key','cnt'
'1','1'
'2','1'
'3','1'
'7','1'
'8','2'
5 rows selected 
>>>  
>>>  -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
>>>  -- skewed keys
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl4 
SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t2 '
'          TableScan'
'            alias: t2'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: val'
'                    type: string'
'              outputColumnNames: key, val'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                      expr: 1'
'                      type: int'
'                      expr: val'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1, _col2, _col3'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: int'
'                        expr: _col2'
'                        type: string'
'                        expr: _col3'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1, _col2, _col3'
'                  Select Operator'
'                    expressions:'
'                          expr: UDFToInteger(_col0)'
'                          type: int'
'                          expr: _col1'
'                          type: int'
'                          expr: _col2'
'                          type: string'
'                          expr: UDFToInteger(_col3)'
'                          type: int'
'                    outputColumnNames: _col0, _col1, _col2, _col3'
'                    File Output Operator'
'                      compressed: false'
'                      GlobalTableId: 1'
'                      directory: pfile:!!{hive.exec.scratchdir}!!'
'                      NumFilesPerFileSink: 1'
'                      Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                      table:'
'                          input format: org.apache.hadoop.mapred.TextInputFormat'
'                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                          properties:'
'                            bucket_count -1'
'                            columns key1,key2,key3,cnt'
'                            columns.types int:int:string:int'
'                            file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                            file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                            location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                            name groupby_sort_1.outputtbl4'
'                            numFiles 1'
'                            numPartitions 0'
'                            numRows 6'
'                            rawDataSize 48'
'                            serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                            serialization.format 1'
'                            serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                            totalSize 54'
'                            transient_lastDdlTime !!UNIXTIME!!'
'                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                          name: groupby_sort_1.outputtbl4'
'                      TotalFiles: 1'
'                      GatherStats: true'
'                      MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 [t2]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 '
'          Partition'
'            base file name: t2'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'              name groupby_sort_1.t2'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t2 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'                name groupby_sort_1.t2'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t2 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t2'
'            name: groupby_sort_1.t2'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,key3,cnt'
'                columns.types int:int:string:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                name groupby_sort_1.outputtbl4'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 48'
'                serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 54'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl4'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
178 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl4 
SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val;
'_col0','_col1','_col2','_col3'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl4 ORDER BY key1, key2, key3;
'key1','key2','key3','cnt'
'1','1','11','1'
'2','1','12','1'
'3','1','13','1'
'7','1','17','1'
'8','1','18','1'
'8','1','28','1'
6 rows selected 
>>>  
>>>  CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int);
No rows affected 
>>>  
>>>  -- The plan should be converted to a map-side group by if the group by key contains a constant in between the
>>>  -- skewed keys followed by anything
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl5 
SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl5))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR 2) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val) 2)))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t2 '
'          TableScan'
'            alias: t2'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: val'
'                    type: string'
'              outputColumnNames: key, val'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                      expr: 1'
'                      type: int'
'                      expr: val'
'                      type: string'
'                      expr: 2'
'                      type: int'
'                mode: final'
'                outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: int'
'                        expr: _col2'
'                        type: string'
'                        expr: _col3'
'                        type: int'
'                        expr: _col4'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'                  Select Operator'
'                    expressions:'
'                          expr: UDFToInteger(_col0)'
'                          type: int'
'                          expr: _col1'
'                          type: int'
'                          expr: _col2'
'                          type: string'
'                          expr: _col3'
'                          type: int'
'                          expr: UDFToInteger(_col4)'
'                          type: int'
'                    outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'                    File Output Operator'
'                      compressed: false'
'                      GlobalTableId: 1'
'                      directory: pfile:!!{hive.exec.scratchdir}!!'
'                      NumFilesPerFileSink: 1'
'                      Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                      table:'
'                          input format: org.apache.hadoop.mapred.TextInputFormat'
'                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                          properties:'
'                            bucket_count -1'
'                            columns key1,key2,key3,key4,cnt'
'                            columns.types int:int:string:int:int'
'                            file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                            file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                            location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl5'
'                            name groupby_sort_1.outputtbl5'
'                            serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt}'
'                            serialization.format 1'
'                            serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                            transient_lastDdlTime !!UNIXTIME!!'
'                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                          name: groupby_sort_1.outputtbl5'
'                      TotalFiles: 1'
'                      GatherStats: true'
'                      MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 [t2]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 '
'          Partition'
'            base file name: t2'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'              name groupby_sort_1.t2'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t2 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'                name groupby_sort_1.t2'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t2 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t2'
'            name: groupby_sort_1.t2'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,key3,key4,cnt'
'                columns.types int:int:string:int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl5'
'                name groupby_sort_1.outputtbl5'
'                serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl5'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
174 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl5 
SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2;
'_col0','_col1','_col2','_col3','_col4'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl5 
ORDER BY key1, key2, key3, key4;
'key1','key2','key3','key4','cnt'
'1','1','11','2','1'
'2','1','12','2','1'
'3','1','13','2','1'
'7','1','17','2','1'
'8','1','18','2','1'
'8','1','28','2','1'
6 rows selected 
>>>  
>>>  -- contants from sub-queries should work fine
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl4 
SELECT key, constant, val, count(1) from 
(SELECT key, 1 as constant, val from T2)subq 
group by key, constant, val;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 constant) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL constant) (TOK_TABLE_OR_COL val))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        subq:t2 '
'          TableScan'
'            alias: t2'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: 1'
'                    type: int'
'                    expr: val'
'                    type: string'
'              outputColumnNames: _col0, _col1, _col2'
'              Select Operator'
'                expressions:'
'                      expr: _col0'
'                      type: string'
'                      expr: _col1'
'                      type: int'
'                      expr: _col2'
'                      type: string'
'                outputColumnNames: _col0, _col1, _col2'
'                Group By Operator'
'                  aggregations:'
'                        expr: count(1)'
'                  bucketGroup: false'
'                  keys:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: int'
'                        expr: _col2'
'                        type: string'
'                  mode: final'
'                  outputColumnNames: _col0, _col1, _col2, _col3'
'                  Select Operator'
'                    expressions:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col1'
'                          type: int'
'                          expr: _col2'
'                          type: string'
'                          expr: _col3'
'                          type: bigint'
'                    outputColumnNames: _col0, _col1, _col2, _col3'
'                    Select Operator'
'                      expressions:'
'                            expr: UDFToInteger(_col0)'
'                            type: int'
'                            expr: _col1'
'                            type: int'
'                            expr: _col2'
'                            type: string'
'                            expr: UDFToInteger(_col3)'
'                            type: int'
'                      outputColumnNames: _col0, _col1, _col2, _col3'
'                      File Output Operator'
'                        compressed: false'
'                        GlobalTableId: 1'
'                        directory: pfile:!!{hive.exec.scratchdir}!!'
'                        NumFilesPerFileSink: 1'
'                        Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                        table:'
'                            input format: org.apache.hadoop.mapred.TextInputFormat'
'                            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                            properties:'
'                              bucket_count -1'
'                              columns key1,key2,key3,cnt'
'                              columns.types int:int:string:int'
'                              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                              name groupby_sort_1.outputtbl4'
'                              numFiles 1'
'                              numPartitions 0'
'                              numRows 6'
'                              rawDataSize 48'
'                              serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                              serialization.format 1'
'                              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              totalSize 54'
'                              transient_lastDdlTime !!UNIXTIME!!'
'                            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                            name: groupby_sort_1.outputtbl4'
'                        TotalFiles: 1'
'                        GatherStats: true'
'                        MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 [subq:t2]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 '
'          Partition'
'            base file name: t2'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'              name groupby_sort_1.t2'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t2 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'                name groupby_sort_1.t2'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t2 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t2'
'            name: groupby_sort_1.t2'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,key3,cnt'
'                columns.types int:int:string:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                name groupby_sort_1.outputtbl4'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 48'
'                serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 54'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl4'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
189 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl4 
SELECT key, constant, val, count(1) from 
(SELECT key, 1 as constant, val from T2)subq 
group by key, constant, val;
'_col0','_col1','_col2','_col3'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl4 ORDER BY key1, key2, key3;
'key1','key2','key3','cnt'
'1','1','11','1'
'2','1','12','1'
'3','1','13','1'
'7','1','17','1'
'8','1','18','1'
'8','1','28','1'
6 rows selected 
>>>  
>>>  -- multiple levels of contants from sub-queries should work fine
>>>  EXPLAIN EXTENDED 
INSERT OVERWRITE TABLE outputTbl4 
select key, constant3, val, count(1) from 
( 
SELECT key, constant as constant2, val, 2 as constant3 from 
(SELECT key, 1 as constant, val from T2)subq 
)subq2 
group by key, constant3, val;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 constant) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant) constant2) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR 2 constant3)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant3)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL constant3) (TOK_TABLE_OR_COL val))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        subq2:subq:t2 '
'          TableScan'
'            alias: t2'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: val'
'                    type: string'
'              outputColumnNames: _col0, _col2'
'              Select Operator'
'                expressions:'
'                      expr: _col0'
'                      type: string'
'                      expr: _col2'
'                      type: string'
'                      expr: 2'
'                      type: int'
'                outputColumnNames: _col0, _col2, _col3'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col3'
'                        type: int'
'                        expr: _col2'
'                        type: string'
'                  outputColumnNames: _col0, _col3, _col2'
'                  Group By Operator'
'                    aggregations:'
'                          expr: count(1)'
'                    bucketGroup: false'
'                    keys:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col3'
'                          type: int'
'                          expr: _col2'
'                          type: string'
'                    mode: final'
'                    outputColumnNames: _col0, _col1, _col2, _col3'
'                    Select Operator'
'                      expressions:'
'                            expr: _col0'
'                            type: string'
'                            expr: _col1'
'                            type: int'
'                            expr: _col2'
'                            type: string'
'                            expr: _col3'
'                            type: bigint'
'                      outputColumnNames: _col0, _col1, _col2, _col3'
'                      Select Operator'
'                        expressions:'
'                              expr: UDFToInteger(_col0)'
'                              type: int'
'                              expr: _col1'
'                              type: int'
'                              expr: _col2'
'                              type: string'
'                              expr: UDFToInteger(_col3)'
'                              type: int'
'                        outputColumnNames: _col0, _col1, _col2, _col3'
'                        File Output Operator'
'                          compressed: false'
'                          GlobalTableId: 1'
'                          directory: pfile:!!{hive.exec.scratchdir}!!'
'                          NumFilesPerFileSink: 1'
'                          Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                          table:'
'                              input format: org.apache.hadoop.mapred.TextInputFormat'
'                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              properties:'
'                                bucket_count -1'
'                                columns key1,key2,key3,cnt'
'                                columns.types int:int:string:int'
'                                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                                name groupby_sort_1.outputtbl4'
'                                numFiles 1'
'                                numPartitions 0'
'                                numRows 6'
'                                rawDataSize 48'
'                                serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                                serialization.format 1'
'                                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                                totalSize 54'
'                                transient_lastDdlTime !!UNIXTIME!!'
'                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              name: groupby_sort_1.outputtbl4'
'                          TotalFiles: 1'
'                          GatherStats: true'
'                          MultiFileSpray: false'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 [subq2:subq:t2]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2 '
'          Partition'
'            base file name: t2'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 2'
'              bucket_field_name key'
'              columns key,val'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'              name groupby_sort_1.t2'
'              numFiles 1'
'              numPartitions 0'
'              numRows 6'
'              rawDataSize 24'
'              serialization.ddl struct t2 { string key, string val}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 30'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 2'
'                bucket_field_name key'
'                columns key,val'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/t2'
'                name groupby_sort_1.t2'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 24'
'                serialization.ddl struct t2 { string key, string val}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 30'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.t2'
'            name: groupby_sort_1.t2'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key1,key2,key3,cnt'
'                columns.types int:int:string:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_sort_1.db/outputtbl4'
'                name groupby_sort_1.outputtbl4'
'                numFiles 1'
'                numPartitions 0'
'                numRows 6'
'                rawDataSize 48'
'                serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 54'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.outputtbl4'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
196 rows selected 
>>>  
>>>  INSERT OVERWRITE TABLE outputTbl4 
select key, constant3, val, count(1) from 
( 
SELECT key, constant as constant2, val, 2 as constant3 from 
(SELECT key, 1 as constant, val from T2)subq 
)subq2 
group by key, constant3, val;
'_col0','_col1','_col2','_col3'
No rows selected 
>>>  
>>>  SELECT * FROM outputTbl4 ORDER BY key1, key2, key3;
'key1','key2','key3','cnt'
'1','2','11','1'
'2','2','12','1'
'3','2','13','1'
'7','2','17','1'
'8','2','18','1'
'8','2','28','1'
6 rows selected 
>>>  
>>>  set hive.map.aggr=true;
No rows affected 
>>>  set hive.multigroupby.singlereducer=false;
No rows affected 
>>>  set mapred.reduce.tasks=31;
No rows affected 
>>>  
>>>  CREATE TABLE DEST1(key INT, cnt INT);
No rows affected 
>>>  CREATE TABLE DEST2(key INT, val STRING, cnt INT);
No rows affected 
>>>  
>>>  SET hive.exec.compress.intermediate=true;
No rows affected 
>>>  SET hive.exec.compress.output=true;
No rows affected 
>>>  
>>>  EXPLAIN 
FROM T2 
INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key 
INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))'
''
'STAGE DEPENDENCIES:'
'  Stage-2 is a root stage'
'  Stage-0 depends on stages: Stage-2'
'  Stage-3 depends on stages: Stage-0'
'  Stage-1 depends on stages: Stage-2'
'  Stage-4 depends on stages: Stage-1'
''
'STAGE PLANS:'
'  Stage: Stage-2'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        t2 '
'          TableScan'
'            alias: t2'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'              outputColumnNames: key'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                mode: hash'
'                outputColumnNames: _col0, _col1'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: string'
'                  sort order: +'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: string'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col1'
'                        type: bigint'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: val'
'                    type: string'
'              outputColumnNames: key, val'
'              Group By Operator'
'                aggregations:'
'                      expr: count(1)'
'                bucketGroup: false'
'                keys:'
'                      expr: key'
'                      type: string'
'                      expr: val'
'                      type: string'
'                mode: final'
'                outputColumnNames: _col0, _col1, _col2'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: string'
'                        expr: _col2'
'                        type: bigint'
'                  outputColumnNames: _col0, _col1, _col2'
'                  Select Operator'
'                    expressions:'
'                          expr: UDFToInteger(_col0)'
'                          type: int'
'                          expr: _col1'
'                          type: string'
'                          expr: UDFToInteger(_col2)'
'                          type: int'
'                    outputColumnNames: _col0, _col1, _col2'
'                    File Output Operator'
'                      compressed: true'
'                      GlobalTableId: 2'
'                      table:'
'                          input format: org.apache.hadoop.mapred.TextInputFormat'
'                          output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                          name: groupby_sort_1.dest2'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: bigint'
'            outputColumnNames: _col0, _col1'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: UDFToInteger(_col1)'
'                    type: int'
'              outputColumnNames: _col0, _col1'
'              File Output Operator'
'                compressed: true'
'                GlobalTableId: 1'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.dest1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.dest1'
''
'  Stage: Stage-3'
'    Stats-Aggr Operator'
''
'  Stage: Stage-1'
'    Move Operator'
'      tables:'
'          replace: true'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.dest2'
''
'  Stage: Stage-4'
'    Stats-Aggr Operator'
''
''
147 rows selected 
>>>  
>>>  FROM T2 
INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key 
INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val;
'_col0','_col1','_col2'
No rows selected 
>>>  
>>>  select * from DEST1 ORDER BY key, cnt;
'key','cnt'
'1','1'
'2','1'
'3','1'
'7','1'
'8','2'
5 rows selected 
>>>  select * from DEST2 ORDER BY key, val, val;
'key','val','cnt'
'1','11','1'
'2','12','1'
'3','13','1'
'7','17','1'
'8','18','1'
'8','28','1'
6 rows selected 
>>>  
>>>  -- multi-table insert with a sub-query
>>>  EXPLAIN 
FROM (select key, val from T2 where key = 8) x 
INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key 
INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 8)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val))))'
''
'STAGE DEPENDENCIES:'
'  Stage-2 is a root stage'
'  Stage-0 depends on stages: Stage-2'
'  Stage-3 depends on stages: Stage-0'
'  Stage-1 depends on stages: Stage-2'
'  Stage-4 depends on stages: Stage-1'
''
'STAGE PLANS:'
'  Stage: Stage-2'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        x:t2 '
'          TableScan'
'            alias: t2'
'            Filter Operator'
'              predicate:'
'                  expr: (key = 8.0)'
'                  type: boolean'
'              Select Operator'
'                expressions:'
'                      expr: key'
'                      type: string'
'                      expr: val'
'                      type: string'
'                outputColumnNames: _col0, _col1'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                  outputColumnNames: _col0'
'                  Group By Operator'
'                    aggregations:'
'                          expr: count(1)'
'                    bucketGroup: false'
'                    keys:'
'                          expr: _col0'
'                          type: string'
'                    mode: hash'
'                    outputColumnNames: _col0, _col1'
'                    Reduce Output Operator'
'                      key expressions:'
'                            expr: _col0'
'                            type: string'
'                      sort order: +'
'                      Map-reduce partition columns:'
'                            expr: _col0'
'                            type: string'
'                      tag: -1'
'                      value expressions:'
'                            expr: _col1'
'                            type: bigint'
'                Select Operator'
'                  expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: string'
'                  outputColumnNames: _col0, _col1'
'                  Group By Operator'
'                    aggregations:'
'                          expr: count(1)'
'                    bucketGroup: false'
'                    keys:'
'                          expr: _col0'
'                          type: string'
'                          expr: _col1'
'                          type: string'
'                    mode: final'
'                    outputColumnNames: _col0, _col1, _col2'
'                    Select Operator'
'                      expressions:'
'                            expr: _col0'
'                            type: string'
'                            expr: _col1'
'                            type: string'
'                            expr: _col2'
'                            type: bigint'
'                      outputColumnNames: _col0, _col1, _col2'
'                      Select Operator'
'                        expressions:'
'                              expr: UDFToInteger(_col0)'
'                              type: int'
'                              expr: _col1'
'                              type: string'
'                              expr: UDFToInteger(_col2)'
'                              type: int'
'                        outputColumnNames: _col0, _col1, _col2'
'                        File Output Operator'
'                          compressed: true'
'                          GlobalTableId: 2'
'                          table:'
'                              input format: org.apache.hadoop.mapred.TextInputFormat'
'                              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                              name: groupby_sort_1.dest2'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: bigint'
'            outputColumnNames: _col0, _col1'
'            Select Operator'
'              expressions:'
'                    expr: UDFToInteger(_col0)'
'                    type: int'
'                    expr: UDFToInteger(_col1)'
'                    type: int'
'              outputColumnNames: _col0, _col1'
'              File Output Operator'
'                compressed: true'
'                GlobalTableId: 1'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_sort_1.dest1'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.dest1'
''
'  Stage: Stage-3'
'    Stats-Aggr Operator'
''
'  Stage: Stage-1'
'    Move Operator'
'      tables:'
'          replace: true'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_sort_1.dest2'
''
'  Stage: Stage-4'
'    Stats-Aggr Operator'
''
''
158 rows selected 
>>>  
>>>  FROM (select key, val from T2 where key = 8) x 
INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key 
INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val;
'_col0','_col1','_col2'
No rows selected 
>>>  
>>>  select * from DEST1 ORDER BY key, cnt;
'key','cnt'
'8','2'
1 row selected 
>>>  select * from DEST2 ORDER BY key, val, cnt;
'key','val','cnt'
'8','18','1'
'8','28','1'
2 rows selected 
>>>  !record