query: -- union case: both subqueries are map-reduce jobs on same input, followed by filesink drop table tmptable query: create table tmptable(key string, value int) query: explain insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2) unionsrc ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF src s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF src s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst2' key) (TOK_SELEXPR (TOK_FUNCTION count 1) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB tmptable)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-6 Stage-5 depends on stages: Stage-2 Stage-0 depends on stages: Stage-5 Stage-6 is a root stage Stage-2 depends on stages: Stage-1, Stage-6 Stage-5 depends on stages: Stage-2 Stage-0 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: null-subquery1:unionsrc-subquery1:s1 TableScan alias: s1 Select Operator Group By Operator aggregations: expr: count(1) mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst1' type: string expr: _col0 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/tmp/793944135/10002 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: tmptable file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/tmp/793944135/10004 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: tmptable Stage: Stage-5 Conditional Operator list of dependent Tasks: Move Operator files: hdfs directory: true destination: file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/tmp/1820922097/10000 Map Reduce Alias -> Map Operator Tree: file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/tmp/793944135/10003 Reduce Output Operator sort order: Map-reduce partition columns: expr: rand() type: double tag: -1 value expressions: expr: key type: string expr: value type: int Reduce Operator Tree: Extract File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: tmptable Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: tmptable Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: null-subquery2:unionsrc-subquery2:s2 TableScan alias: s2 Select Operator Group By Operator aggregations: expr: count(1) mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst2' type: string expr: _col0 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL select 'tst2' as key, count(1) as value from src s2) unionsrc Input: default/src Output: default/tmptable query: select * from tmptable x sort by x.key Input: default/tmptable Output: file:/data/users/athusoo/commits/hive_trunk_ws1/build/ql/tmp/1401758833/10000 tst1 500 tst2 500 query: drop table tmptable