PREHOOK: query: -- union :map-reduce sub-queries followed by join explain SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 where s2.key < 10) unionsrc1 JOIN (select 'tst1' as key, cast(count(1) as string) as value from src s3 UNION ALL select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2 ON (unionsrc1.key = unionsrc2.key) PREHOOK: type: QUERY POSTHOOK: query: -- union :map-reduce sub-queries followed by join explain SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 where s2.key < 10) unionsrc1 JOIN (select 'tst1' as key, cast(count(1) as string) as value from src s3 UNION ALL select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2 ON (unionsrc1.key = unionsrc2.key) POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL s2) key) 10))))) unionsrc1) (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s3)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s4)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s4) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s4) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL s4) key) 10))))) unionsrc2) (= (. (TOK_TABLE_OR_COL unionsrc1) key) (. (TOK_TABLE_OR_COL unionsrc2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc1) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc1) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc2) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc2) value))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-3, Stage-5 Stage-3 is a root stage Stage-4 is a root stage Stage-5 depends on stages: Stage-4, Stage-6 Stage-6 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: null-subquery1:unionsrc2-subquery1:s3 TableScan alias: s3 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst1' type: string expr: UDFToString(_col0) type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string file:/tmp/sdong/hive_2011-02-10_17-50-58_273_8747453587292923422/-mr-10002 Union Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 1 value expressions: expr: _col0 type: string expr: _col1 type: string file:/tmp/sdong/hive_2011-02-10_17-50-58_273_8747453587292923422/-mr-10003 Union Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 1 value expressions: expr: _col0 type: string expr: _col1 type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: string expr: _col3 type: string outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: null-subquery2:unionsrc2-subquery2:s4 TableScan alias: s4 Filter Operator predicate: expr: (key < 10) type: boolean Select Operator expressions: expr: key type: string expr: value type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: null-subquery2:unionsrc1-subquery2:s2 TableScan alias: s2 Filter Operator predicate: expr: (key < 10) type: boolean Select Operator expressions: expr: key type: string expr: value type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: file:/tmp/sdong/hive_2011-02-10_17-50-58_273_8747453587292923422/-mr-10004 Union File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat file:/tmp/sdong/hive_2011-02-10_17-50-58_273_8747453587292923422/-mr-10006 Union File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: null-subquery1:unionsrc1-subquery1:s1 TableScan alias: s1 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst1' type: string expr: UDFToString(_col0) type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 where s2.key < 10) unionsrc1 JOIN (select 'tst1' as key, cast(count(1) as string) as value from src s3 UNION ALL select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2 ON (unionsrc1.key = unionsrc2.key) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-50-58_427_7809802233568190458/-mr-10000 POSTHOOK: query: SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2 where s2.key < 10) unionsrc1 JOIN (select 'tst1' as key, cast(count(1) as string) as value from src s3 UNION ALL select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2 ON (unionsrc1.key = unionsrc2.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: file:/tmp/sdong/hive_2011-02-10_17-50-58_427_7809802233568190458/-mr-10000 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 2 val_2 2 val_2 4 val_4 4 val_4 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 8 val_8 8 val_8 9 val_9 9 val_9 tst1 500 tst1 500