Saving all output to "!!{outputDirectory}!!/groupby_map_ppr_multi_distinct.q.raw". Enter "record" with no arguments to stop it.
>>>  !run !!{qFileDirectory}!!/groupby_map_ppr_multi_distinct.q
>>>  set hive.map.aggr=true;
No rows affected 
>>>  set hive.groupby.skewindata=false;
No rows affected 
>>>  set mapred.reduce.tasks=31;
No rows affected 
>>>  
>>>  CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, C3 INT, c4 INT) STORED AS TEXTFILE;
No rows affected 
>>>  
>>>  EXPLAIN EXTENDED 
FROM srcpart src 
INSERT OVERWRITE TABLE dest1 
SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) 
WHERE src.ds = '2008-04-08' 
GROUP BY substr(src.key,1,1);
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))))'
''
'STAGE DEPENDENCIES:'
'  Stage-1 is a root stage'
'  Stage-0 depends on stages: Stage-1'
'  Stage-2 depends on stages: Stage-0'
''
'STAGE PLANS:'
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        src '
'          TableScan'
'            alias: src'
'            GatherStats: false'
'            Select Operator'
'              expressions:'
'                    expr: key'
'                    type: string'
'                    expr: value'
'                    type: string'
'              outputColumnNames: key, value'
'              Group By Operator'
'                aggregations:'
'                      expr: count(DISTINCT substr(value, 5))'
'                      expr: sum(substr(value, 5))'
'                      expr: sum(DISTINCT substr(value, 5))'
'                      expr: count(DISTINCT value)'
'                bucketGroup: false'
'                keys:'
'                      expr: substr(key, 1, 1)'
'                      type: string'
'                      expr: substr(value, 5)'
'                      type: string'
'                      expr: value'
'                      type: string'
'                mode: hash'
'                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6'
'                Reduce Output Operator'
'                  key expressions:'
'                        expr: _col0'
'                        type: string'
'                        expr: _col1'
'                        type: string'
'                        expr: _col2'
'                        type: string'
'                  sort order: +++'
'                  Map-reduce partition columns:'
'                        expr: _col0'
'                        type: string'
'                  tag: -1'
'                  value expressions:'
'                        expr: _col3'
'                        type: bigint'
'                        expr: _col4'
'                        type: double'
'                        expr: _col5'
'                        type: double'
'                        expr: _col6'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=11 [src]'
'        !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=12 [src]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=11 '
'          Partition'
'            base file name: hr=11'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            partition values:'
'              ds 2008-04-08'
'              hr 11'
'            properties:'
'              bucket_count -1'
'              columns key,value'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=11'
'              name groupby_map_ppr_multi_distinct.srcpart'
'              numFiles 1'
'              numPartitions 4'
'              numRows 0'
'              partition_columns ds/hr'
'              rawDataSize 0'
'              serialization.ddl struct srcpart { string key, string value}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 5812'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,value'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart'
'                name groupby_map_ppr_multi_distinct.srcpart'
'                numFiles 4'
'                numPartitions 4'
'                numRows 0'
'                partition_columns ds/hr'
'                rawDataSize 0'
'                serialization.ddl struct srcpart { string key, string value}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 23248'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_map_ppr_multi_distinct.srcpart'
'            name: groupby_map_ppr_multi_distinct.srcpart'
'        !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=12 '
'          Partition'
'            base file name: hr=12'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            partition values:'
'              ds 2008-04-08'
'              hr 12'
'            properties:'
'              bucket_count -1'
'              columns key,value'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=12'
'              name groupby_map_ppr_multi_distinct.srcpart'
'              numFiles 1'
'              numPartitions 4'
'              numRows 0'
'              partition_columns ds/hr'
'              rawDataSize 0'
'              serialization.ddl struct srcpart { string key, string value}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 5812'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,value'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart'
'                name groupby_map_ppr_multi_distinct.srcpart'
'                numFiles 4'
'                numPartitions 4'
'                numRows 0'
'                partition_columns ds/hr'
'                rawDataSize 0'
'                serialization.ddl struct srcpart { string key, string value}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 23248'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_map_ppr_multi_distinct.srcpart'
'            name: groupby_map_ppr_multi_distinct.srcpart'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(DISTINCT KEY._col1:0._col0)'
'                expr: sum(VALUE._col1)'
'                expr: sum(DISTINCT KEY._col1:1._col0)'
'                expr: count(DISTINCT KEY._col1:2._col0)'
'          bucketGroup: false'
'          keys:'
'                expr: KEY._col0'
'                type: string'
'          mode: mergepartial'
'          outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: string'
'                  expr: _col1'
'                  type: bigint'
'                  expr: concat(_col0, _col2)'
'                  type: string'
'                  expr: _col3'
'                  type: double'
'                  expr: _col4'
'                  type: bigint'
'            outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'            Select Operator'
'              expressions:'
'                    expr: _col0'
'                    type: string'
'                    expr: UDFToInteger(_col1)'
'                    type: int'
'                    expr: _col2'
'                    type: string'
'                    expr: UDFToInteger(_col3)'
'                    type: int'
'                    expr: UDFToInteger(_col4)'
'                    type: int'
'              outputColumnNames: _col0, _col1, _col2, _col3, _col4'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 1'
'                directory: pfile:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
'                table:'
'                    input format: org.apache.hadoop.mapred.TextInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                    properties:'
'                      bucket_count -1'
'                      columns key,c1,c2,c3,c4'
'                      columns.types string:int:string:int:int'
'                      file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                      file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                      location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/dest1'
'                      name groupby_map_ppr_multi_distinct.dest1'
'                      serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4}'
'                      serialization.format 1'
'                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                      transient_lastDdlTime !!UNIXTIME!!'
'                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                    name: groupby_map_ppr_multi_distinct.dest1'
'                TotalFiles: 1'
'                GatherStats: true'
'                MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Move Operator'
'      tables:'
'          replace: true'
'          source: pfile:!!{hive.exec.scratchdir}!!'
'          table:'
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                bucket_count -1'
'                columns key,c1,c2,c3,c4'
'                columns.types string:int:string:int:int'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/dest1'
'                name groupby_map_ppr_multi_distinct.dest1'
'                serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: groupby_map_ppr_multi_distinct.dest1'
'          tmp directory: pfile:!!{hive.exec.scratchdir}!!'
''
'  Stage: Stage-2'
'    Stats-Aggr Operator'
'      Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!'
''
''
263 rows selected 
>>>  
>>>  FROM srcpart src 
INSERT OVERWRITE TABLE dest1 
SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) 
WHERE src.ds = '2008-04-08' 
GROUP BY substr(src.key,1,1);
'_col0','_col1','_col2','_col3','_col4'
No rows selected 
>>>  
>>>  SELECT dest1.* FROM dest1;
'key','c1','c2','c3','c4'
'0','1','00.0','0','1'
'1','71','132828.0','10044','71'
'2','69','251142.0','15780','69'
'3','62','364008.0','20119','62'
'4','74','4105526.0','30965','74'
'5','6','5794.0','278','6'
'6','5','6796.0','331','5'
'7','6','71470.0','447','6'
'8','8','81524.0','595','8'
'9','7','92094.0','577','7'
10 rows selected 
>>>  !record