Saving all output to "!!{outputDirectory}!!/groupby_map_ppr_multi_distinct.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/groupby_map_ppr_multi_distinct.q >>> set hive.map.aggr=true; No rows affected >>> set hive.groupby.skewindata=false; No rows affected >>> set mapred.reduce.tasks=31; No rows affected >>> >>> CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, C3 INT, c4 INT) STORED AS TEXTFILE; No rows affected >>> >>> EXPLAIN EXTENDED FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) WHERE src.ds = '2008-04-08' GROUP BY substr(src.key,1,1); 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest1))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTIONDI count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL src) ds) '2008-04-08')) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 depends on stages: Stage-1' ' Stage-2 depends on stages: Stage-0' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' src ' ' TableScan' ' alias: src' ' GatherStats: false' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' outputColumnNames: key, value' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT substr(value, 5))' ' expr: sum(substr(value, 5))' ' expr: sum(DISTINCT substr(value, 5))' ' expr: count(DISTINCT value)' ' bucketGroup: false' ' keys:' ' expr: substr(key, 1, 1)' ' type: string' ' expr: substr(value, 5)' ' type: string' ' expr: value' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: string' ' sort order: +++' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col3' ' type: bigint' ' expr: _col4' ' type: double' ' expr: _col5' ' type: double' ' expr: _col6' ' type: bigint' ' Needs Tagging: false' ' Path -> Alias:' ' !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=11 [src]' ' !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=12 [src]' ' Path -> Partition:' ' !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=11 ' ' Partition' ' base file name: hr=11' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' partition values:' ' ds 2008-04-08' ' hr 11' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=11' ' name groupby_map_ppr_multi_distinct.srcpart' ' numFiles 1' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 5812' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' ' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart' ' name groupby_map_ppr_multi_distinct.srcpart' ' numFiles 4' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 23248' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_map_ppr_multi_distinct.srcpart' ' name: groupby_map_ppr_multi_distinct.srcpart' ' !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=12 ' ' Partition' ' base file name: hr=12' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' partition values:' ' ds 2008-04-08' ' hr 12' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart/ds=2008-04-08/hr=12' ' name groupby_map_ppr_multi_distinct.srcpart' ' numFiles 1' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 5812' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' ' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/srcpart' ' name groupby_map_ppr_multi_distinct.srcpart' ' numFiles 4' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 23248' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_map_ppr_multi_distinct.srcpart' ' name: groupby_map_ppr_multi_distinct.srcpart' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col1:0._col0)' ' expr: sum(VALUE._col1)' ' expr: sum(DISTINCT KEY._col1:1._col0)' ' expr: count(DISTINCT KEY._col1:2._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: concat(_col0, _col2)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' directory: pfile:!!{hive.exec.scratchdir}!!' ' NumFilesPerFileSink: 1' ' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,c1,c2,c3,c4' ' columns.types string:int:string:int:int' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/dest1' ' name groupby_map_ppr_multi_distinct.dest1' ' serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_map_ppr_multi_distinct.dest1' ' TotalFiles: 1' ' GatherStats: true' ' MultiFileSpray: false' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' replace: true' ' source: pfile:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,c1,c2,c3,c4' ' columns.types string:int:string:int:int' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/groupby_map_ppr_multi_distinct.db/dest1' ' name groupby_map_ppr_multi_distinct.dest1' ' serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_map_ppr_multi_distinct.dest1' ' tmp directory: pfile:!!{hive.exec.scratchdir}!!' '' ' Stage: Stage-2' ' Stats-Aggr Operator' ' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!' '' '' 263 rows selected >>> >>> FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) WHERE src.ds = '2008-04-08' GROUP BY substr(src.key,1,1); '_col0','_col1','_col2','_col3','_col4' No rows selected >>> >>> SELECT dest1.* FROM dest1; 'key','c1','c2','c3','c4' '0','1','00.0','0','1' '1','71','132828.0','10044','71' '2','69','251142.0','15780','69' '3','62','364008.0','20119','62' '4','74','4105526.0','30965','74' '5','6','5794.0','278','6' '6','5','6796.0','331','5' '7','6','71470.0','447','6' '8','8','81524.0','595','8' '9','7','92094.0','577','7' 10 rows selected >>> !record