Saving all output to "!!{outputDirectory}!!/union10.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/union10.q >>> set hive.map.aggr = true; No rows affected >>> >>> -- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by filesink >>> >>> create table tmptable(key string, value int); No rows affected >>> >>> explain insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst3' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-2 depends on stages: Stage-1, Stage-9, Stage-10' ' Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6' ' Stage-5' ' Stage-0 depends on stages: Stage-5, Stage-4, Stage-7' ' Stage-3 depends on stages: Stage-0' ' Stage-4' ' Stage-6' ' Stage-7 depends on stages: Stage-6' ' Stage-9 is a root stage' ' Stage-10 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' null-subquery1-subquery1:unionsrc-subquery1-subquery1:s1 ' ' TableScan' ' alias: s1' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: 'tst1'' ' type: string' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-2' ' Map Reduce' ' Alias -> Map Operator Tree:' ' file:!!{hive.exec.scratchdir}!! ' ' TableScan' ' Union' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: union10.tmptable' ' file:!!{hive.exec.scratchdir}!! ' ' TableScan' ' Union' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: union10.tmptable' ' file:!!{hive.exec.scratchdir}!! ' ' TableScan' ' Union' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: UDFToInteger(_col1)' ' type: int' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: union10.tmptable' '' ' Stage: Stage-8' ' Conditional Operator' '' ' Stage: Stage-5' ' Move Operator' ' files:' ' hdfs directory: true' ' destination: pfile:!!{hive.exec.scratchdir}!!' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: union10.tmptable' '' ' Stage: Stage-3' ' Stats-Aggr Operator' '' ' Stage: Stage-4' ' Map Reduce' ' Alias -> Map Operator Tree:' ' pfile:!!{hive.exec.scratchdir}!! ' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: union10.tmptable' '' ' Stage: Stage-6' ' Map Reduce' ' Alias -> Map Operator Tree:' ' pfile:!!{hive.exec.scratchdir}!! ' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: union10.tmptable' '' ' Stage: Stage-7' ' Move Operator' ' files:' ' hdfs directory: true' ' destination: pfile:!!{hive.exec.scratchdir}!!' '' ' Stage: Stage-9' ' Map Reduce' ' Alias -> Map Operator Tree:' ' null-subquery1-subquery2:unionsrc-subquery1-subquery2:s2 ' ' TableScan' ' alias: s2' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: 'tst2'' ' type: string' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-10' ' Map Reduce' ' Alias -> Map Operator Tree:' ' null-subquery2:unionsrc-subquery2:s3 ' ' TableScan' ' alias: s3' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: 'tst3'' ' type: string' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' '' 271 rows selected >>> >>> >>> insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2 UNION ALL select 'tst3' as key, count(1) as value from src s3) unionsrc; '_col0','_col1' No rows selected >>> >>> >>> select * from tmptable x sort by x.key; 'key','value' 'tst1','500' 'tst2','500' 'tst3','500' 3 rows selected >>> >>> !record