PREHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink create table tmptable(key string, value string) PREHOOK: type: CREATETABLE POSTHOOK: query: -- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink create table tmptable(key string, value string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmptable PREHOOK: query: explain insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src) s1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'tst1' key) (TOK_SELEXPR (TOK_FUNCTION TOK_STRING (TOK_FUNCTION count 1)) value)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src1) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL s2) value) value))))) unionsrc)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME tmptable))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL unionsrc) value))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: null-subquery1:unionsrc-subquery1:s1 TableScan alias: s1 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: 'tst1' type: string expr: UDFToString(_col0) type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### TableScan Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable null-subquery2:unionsrc-subquery2:s2 TableScan alias: s2 Select Operator expressions: expr: key type: string expr: value type: string outputColumnNames: _col0, _col1 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable Stage: Stage-8 Conditional Operator Stage: Stage-5 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable Stage: Stage-3 Stats-Aggr Operator Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@tmptable POSTHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src1 s2) unionsrc POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Output: default@tmptable POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: select * from tmptable x sort by x.key, x.value PREHOOK: type: QUERY PREHOOK: Input: default@tmptable #### A masked pattern was here #### POSTHOOK: query: select * from tmptable x sort by x.key, x.value POSTHOOK: type: QUERY POSTHOOK: Input: default@tmptable #### A masked pattern was here #### POSTHOOK: Lineage: tmptable.key EXPRESSION [(src1)s2.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmptable.value EXPRESSION [(src)s1.null, (src1)s2.FieldSchema(name:value, type:string, comment:default), ] val_165 val_193 val_265 val_27 val_409 val_484 128 146 val_146 150 val_150 213 val_213 224 238 val_238 255 val_255 273 val_273 278 val_278 311 val_311 369 401 val_401 406 val_406 66 val_66 98 val_98 tst1 500