Saving all output to "!!{outputDirectory}!!/union11.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/union11.q >>> set hive.map.aggr = true; No rows affected >>> >>> -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by reducesink >>> >>> explain select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst3' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL unionsrc) key))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-2 depends on stages: Stage-1, Stage-3, Stage-4' ' Stage-3 is a root stage' ' Stage-4 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 ' ' TableScan' ' alias: s1' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: 'tst1'' ' type: string' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-2' ' Map Reduce' ' Alias -> Map Operator Tree:' ' file:!!{hive.exec.scratchdir}!! ' ' TableScan' ' Union' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' outputColumnNames: _col0' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: _col0' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' file:!!{hive.exec.scratchdir}!! ' ' TableScan' ' Union' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' outputColumnNames: _col0' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: _col0' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' file:!!{hive.exec.scratchdir}!! ' ' TableScan' ' Union' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' outputColumnNames: _col0' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: _col0' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-3' ' Map Reduce' ' Alias -> Map Operator Tree:' ' null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 ' ' TableScan' ' alias: s2' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: 'tst2'' ' type: string' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-4' ' Map Reduce' ' Alias -> Map Operator Tree:' ' null-subquery2:unionsrc-subquery2:s3 ' ' TableScan' ' alias: s3' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: 'tst3'' ' type: string' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 250 rows selected >>> >>> >>> select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key; 'key','_c1' 'tst1','1' 'tst2','1' 'tst3','1' 3 rows selected >>> >>> >>> >>> !record