Saving all output to "!!{outputDirectory}!!/groupby_multi_single_reducer.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/groupby_multi_single_reducer.q >>> set hive.multigroupby.singlereducer=true; No rows affected >>> >>> CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; No rows affected >>> CREATE TABLE dest_g3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; No rows affected >>> CREATE TABLE dest_g4(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; No rows affected >>> CREATE TABLE dest_h2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; No rows affected >>> CREATE TABLE dest_h3(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE; No rows affected >>> >>> EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1); 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g4))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))))' '' 'STAGE DEPENDENCIES:' ' Stage-3 is a root stage' ' Stage-0 depends on stages: Stage-3' ' Stage-4 depends on stages: Stage-0' ' Stage-1 depends on stages: Stage-3' ' Stage-5 depends on stages: Stage-1' ' Stage-2 depends on stages: Stage-3' ' Stage-6 depends on stages: Stage-2' '' 'STAGE PLANS:' ' Stage: Stage-3' ' Map Reduce' ' Alias -> Map Operator Tree:' ' src ' ' TableScan' ' alias: src' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' outputColumnNames: key, value' ' Reduce Output Operator' ' key expressions:' ' expr: substr(key, 1, 1)' ' type: string' ' expr: substr(value, 5)' ' type: string' ' sort order: ++' ' Map-reduce partition columns:' ' expr: substr(key, 1, 1)' ' type: string' ' tag: -1' ' value expressions:' ' expr: value' ' type: string' ' Reduce Operator Tree:' ' Forward' ' Filter Operator' ' predicate:' ' expr: (KEY._col0 >= 5.0)' ' type: boolean' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col1:1._col0)' ' expr: sum(KEY._col1:1._col0)' ' expr: sum(DISTINCT KEY._col1:1._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: concat(_col0, _col2)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g2' ' Filter Operator' ' predicate:' ' expr: (KEY._col0 < 5.0)' ' type: boolean' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col1:1._col0)' ' expr: sum(KEY._col1:1._col0)' ' expr: sum(DISTINCT KEY._col1:1._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: concat(_col0, _col2)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 2' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g3' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col1:1._col0)' ' expr: sum(KEY._col1:1._col0)' ' expr: sum(DISTINCT KEY._col1:1._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: concat(_col0, _col2)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 3' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g4' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g2' '' ' Stage: Stage-4' ' Stats-Aggr Operator' '' ' Stage: Stage-1' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g3' '' ' Stage: Stage-5' ' Stats-Aggr Operator' '' ' Stage: Stage-2' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g4' '' ' Stage: Stage-6' ' Stats-Aggr Operator' '' '' 229 rows selected >>> >>> FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1); '_col0','_col1','_col2','_col3','_col4' No rows selected >>> >>> SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '5','6','5397.0','278','10' '6','5','6398.0','331','6' '7','6','7735.0','447','10' '8','8','8762.0','595','10' '9','7','91047.0','577','12' 5 rows selected >>> SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '0','1','00.0','0','3' '1','71','116414.0','10044','115' '2','69','225571.0','15780','111' '3','62','332004.0','20119','99' '4','74','452763.0','30965','124' 5 rows selected >>> SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '0','1','00.0','0','3' '1','71','116414.0','10044','115' '2','69','225571.0','15780','111' '3','62','332004.0','20119','99' '4','74','452763.0','30965','124' '5','6','5397.0','278','10' '6','5','6398.0','331','6' '7','6','7735.0','447','10' '8','8','8762.0','595','10' '9','7','91047.0','577','12' 10 rows selected >>> >>> EXPLAIN FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1); 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (< (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_g4))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTIONDI sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_h2))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 2 1)) (TOK_LIMIT 10)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME dest_h3))) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1)) (TOK_SELEXPR (TOK_FUNCTIONDI count (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION concat (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) value) 5))) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) value)))) (TOK_WHERE (>= (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) 5)) (TOK_GROUPBY (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 1 1) (TOK_FUNCTION substr (. (TOK_TABLE_OR_COL src) key) 2 1))))' '' 'STAGE DEPENDENCIES:' ' Stage-5 is a root stage' ' Stage-0 depends on stages: Stage-5' ' Stage-6 depends on stages: Stage-0' ' Stage-1 depends on stages: Stage-5' ' Stage-7 depends on stages: Stage-1' ' Stage-2 depends on stages: Stage-5' ' Stage-8 depends on stages: Stage-2' ' Stage-9 depends on stages: Stage-5' ' Stage-10 depends on stages: Stage-9' ' Stage-3 depends on stages: Stage-10' ' Stage-11 depends on stages: Stage-3' ' Stage-4 depends on stages: Stage-10' ' Stage-12 depends on stages: Stage-4' '' 'STAGE PLANS:' ' Stage: Stage-5' ' Map Reduce' ' Alias -> Map Operator Tree:' ' src ' ' TableScan' ' alias: src' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' outputColumnNames: key, value' ' Reduce Output Operator' ' key expressions:' ' expr: substr(key, 1, 1)' ' type: string' ' expr: substr(value, 5)' ' type: string' ' sort order: ++' ' Map-reduce partition columns:' ' expr: substr(key, 1, 1)' ' type: string' ' tag: -1' ' value expressions:' ' expr: value' ' type: string' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' outputColumnNames: key, value' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' ' Reduce Operator Tree:' ' Forward' ' Filter Operator' ' predicate:' ' expr: (KEY._col0 >= 5.0)' ' type: boolean' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col1:1._col0)' ' expr: sum(KEY._col1:1._col0)' ' expr: sum(DISTINCT KEY._col1:1._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: concat(_col0, _col2)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g2' ' Filter Operator' ' predicate:' ' expr: (KEY._col0 < 5.0)' ' type: boolean' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col1:1._col0)' ' expr: sum(KEY._col1:1._col0)' ' expr: sum(DISTINCT KEY._col1:1._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: concat(_col0, _col2)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 2' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g3' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col1:1._col0)' ' expr: sum(KEY._col1:1._col0)' ' expr: sum(DISTINCT KEY._col1:1._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: concat(_col0, _col2)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 3' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g4' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g2' '' ' Stage: Stage-6' ' Stats-Aggr Operator' '' ' Stage: Stage-1' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g3' '' ' Stage: Stage-7' ' Stats-Aggr Operator' '' ' Stage: Stage-2' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_g4' '' ' Stage: Stage-8' ' Stats-Aggr Operator' '' ' Stage: Stage-9' ' Map Reduce' ' Alias -> Map Operator Tree:' ' file:!!{hive.exec.scratchdir}!! ' ' Reduce Output Operator' ' key expressions:' ' expr: substr(key, 1, 1)' ' type: string' ' expr: substr(key, 2, 1)' ' type: string' ' expr: substr(value, 5)' ' type: string' ' sort order: +++' ' Map-reduce partition columns:' ' expr: substr(key, 1, 1)' ' type: string' ' expr: substr(key, 2, 1)' ' type: string' ' tag: -1' ' value expressions:' ' expr: value' ' type: string' ' Reduce Operator Tree:' ' Forward' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col2:0._col0)' ' expr: sum(KEY._col2:0._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' expr: KEY._col1' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col2' ' type: bigint' ' expr: concat(_col0, _col3)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' ' Filter Operator' ' predicate:' ' expr: (KEY._col0 >= 5.0)' ' type: boolean' ' Group By Operator' ' aggregations:' ' expr: count(DISTINCT KEY._col2:0._col0)' ' expr: sum(KEY._col2:0._col0)' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' expr: KEY._col1' ' type: string' ' mode: complete' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col2' ' type: bigint' ' expr: concat(_col0, _col3)' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 5' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_h3' '' ' Stage: Stage-10' ' Map Reduce' ' Alias -> Map Operator Tree:' ' file:!!{hive.exec.scratchdir}!! ' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' expr: _col2' ' type: string' ' expr: _col3' ' type: double' ' expr: _col4' ' type: bigint' ' Reduce Operator Tree:' ' Extract' ' Limit' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' expr: _col2' ' type: string' ' expr: UDFToInteger(_col3)' ' type: int' ' expr: UDFToInteger(_col4)' ' type: int' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4' ' File Output Operator' ' compressed: false' ' GlobalTableId: 4' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_h2' '' ' Stage: Stage-3' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_h2' '' ' Stage: Stage-11' ' Stats-Aggr Operator' '' ' Stage: Stage-4' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: groupby_multi_single_reducer.dest_h3' '' ' Stage: Stage-12' ' Stats-Aggr Operator' '' '' 426 rows selected >>> >>> FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10 INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1); '_col0','_col1','_col2','_col3','_col4' No rows selected >>> >>> SELECT * FROM dest_g2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '5','6','5397.0','278','10' '6','5','6398.0','331','6' '7','6','7735.0','447','10' '8','8','8762.0','595','10' '9','7','91047.0','577','12' 5 rows selected >>> SELECT * FROM dest_g3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '0','1','00.0','0','3' '1','71','116414.0','10044','115' '2','69','225571.0','15780','111' '3','62','332004.0','20119','99' '4','74','452763.0','30965','124' 5 rows selected >>> SELECT * FROM dest_g4 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '0','1','00.0','0','3' '1','71','116414.0','10044','115' '2','69','225571.0','15780','111' '3','62','332004.0','20119','99' '4','74','452763.0','30965','124' '5','6','5397.0','278','10' '6','5','6398.0','331','6' '7','6','7735.0','447','10' '8','8','8762.0','595','10' '9','7','91047.0','577','12' 10 rows selected >>> SELECT * FROM dest_h2 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '0','1','00.0','0','3' '1','4','1878.0','878','6' '1','5','1729.0','729','8' '1','6','11282.0','1282','12' '1','6','11494.0','1494','11' '1','7','11171.0','1171','11' '1','7','11516.0','1516','10' '1','8','11263.0','1263','10' '1','9','12294.0','2294','14' '1','9','12654.0','2654','16' 10 rows selected >>> SELECT * FROM dest_h3 ORDER BY key ASC, c1 ASC, c2 ASC, c3 ASC, c4 ASC; 'key','c1','c2','c3','c4' '5','1','5102.0','102','2' '5','1','5116.0','116','2' '5','1','515.0','15','3' '5','1','553.0','53','1' '5','1','554.0','54','1' '5','1','557.0','57','1' '6','1','6134.0','134','2' '6','1','664.0','64','1' '6','1','665.0','65','1' '6','1','666.0','66','1' '6','1','669.0','69','1' '7','1','7144.0','144','2' '7','1','7152.0','152','2' '7','1','7210.0','210','3' '7','1','774.0','74','1' '7','1','777.0','77','1' '7','1','778.0','78','1' '8','1','8166.0','166','2' '8','1','8168.0','168','2' '8','1','88.0','8','1' '8','1','880.0','80','1' '8','1','882.0','82','1' '8','1','885.0','85','1' '8','1','886.0','86','1' '8','1','887.0','87','1' '9','1','9190.0','190','2' '9','1','9194.0','194','2' '9','1','9196.0','196','2' '9','1','9270.0','270','3' '9','1','99.0','9','1' '9','1','992.0','92','1' '9','1','996.0','96','1' 32 rows selected >>> >>> DROP TABLE dest_g2; No rows affected >>> DROP TABLE dest_g3; No rows affected >>> DROP TABLE dest_g4; No rows affected >>> DROP TABLE dest_h2; No rows affected >>> DROP TABLE dest_h3; No rows affected >>> !record