PREHOOK: query: CREATE TABLE T1(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE T1(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T1 PREHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 PREHOOK: type: LOAD PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD POSTHOOK: Output: default@t1 PREHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 POSTHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key -- addind a order by at the end to make the test results deterministic EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key -- addind a order by at the end to make the test results deterministic EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl2 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: key, val Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string expr: val type: string mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: string expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: string expr: _col1 type: string tag: -1 value expressions: expr: _col2 type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string expr: KEY._col1 type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: string expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.types int:string:int #### A masked pattern was here #### name default.outputtbl2 serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 TotalFiles: 1 GatherStats: true MultiFileSpray: false Truncated Path -> Alias: /t1 [t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.types int:string:int #### A masked pattern was here #### name default.outputtbl2 serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl2 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl2 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl2 ORDER BY key1, key2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl2 ORDER BY key1, key2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 11 1 2 12 1 3 13 1 7 17 1 8 18 1 8 28 1 PREHOOK: query: -- It should work for sub-queries EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY POSTHOOK: query: -- It should work for sub-queries EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: subq1:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: _col0 type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq1:t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: -- It should work for sub-queries with column aliases EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY POSTHOOK: query: -- It should work for sub-queries with column aliases EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key) k) (TOK_SELEXPR (TOK_TABLE_OR_COL val) v)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL k)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: subq1:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: _col0 type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq1:t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl3 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed -- by a match to the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed -- by a match to the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY 1 (TOK_TABLE_OR_COL key)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: 1 type: int expr: key type: string mode: final outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: int expr: UDFToInteger(_col1) type: int expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl3 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl3 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl3 ORDER BY key1, key2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl3 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl3 ORDER BY key1, key2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl3 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 1 1 2 1 1 3 1 1 7 1 1 8 2 PREHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl4 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY POSTHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: key, val Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string expr: 1 type: int expr: val type: string mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Reduce Output Operator key expressions: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string sort order: +++ Map-reduce partition columns: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string tag: -1 value expressions: expr: _col3 type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string expr: KEY._col1 type: int expr: KEY._col2 type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: int expr: _col2 type: string expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Truncated Path -> Alias: /t1 [t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 11 1 2 1 12 1 3 1 13 1 7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY POSTHOOK: query: -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl3))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) 1)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (+ (TOK_TABLE_OR_COL key) 1)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string expr: (key + 1) type: double mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: string expr: _col1 type: double sort order: ++ Map-reduce partition columns: expr: _col0 type: string expr: _col1 type: double tag: -1 value expressions: expr: _col2 type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string expr: KEY._col1 type: double mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: string expr: _col1 type: double expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 numFiles 1 numPartitions 0 numRows 5 rawDataSize 25 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 GatherStats: true MultiFileSpray: false Truncated Path -> Alias: /t1 [t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 numFiles 1 numPartitions 0 numRows 5 rawDataSize 25 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl3 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl3 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl3 ORDER BY key1, key2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl3 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl3 ORDER BY key1, key2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl3 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 2 1 2 3 1 3 4 1 7 8 1 8 9 2 PREHOOK: query: -- it should not matter what follows the group by -- test various cases -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY POSTHOOK: query: -- it should not matter what follows the group by -- test various cases -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_TABLE_OR_COL cnt)))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: subq1:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Group By Operator aggregations: expr: sum(_col1) bucketGroup: false keys: expr: (_col0 + _col0) type: double mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: double sort order: + Map-reduce partition columns: expr: _col0 type: double tag: -1 value expressions: expr: _col1 type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Group By Operator aggregations: expr: sum(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: double mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: double expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Truncated Path -> Alias: /t1 [subq1:t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 2 1 4 1 6 1 14 1 16 2 PREHOOK: query: -- group by followed by a union EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: null-subquery1:subq1-subquery1:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false null-subquery2:subq1-subquery2:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Union Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:subq1-subquery1:t1, null-subquery2:subq1-subquery2:t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 1 1 2 1 2 1 3 1 3 1 7 1 7 1 8 2 8 2 PREHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)) key) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (+ (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL key)))))) subq1)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) STAGE DEPENDENCIES: Stage-4 is a root stage Stage-2 depends on stages: Stage-4 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: null-subquery2:subq1-subquery2:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: (key + key) type: double mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: double sort order: + Map-reduce partition columns: expr: _col0 type: double tag: -1 value expressions: expr: _col1 type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: double mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: double expr: _col1 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ TotalFiles: 1 GatherStats: false MultiFileSpray: false Truncated Path -> Alias: /t1 [null-subquery2:subq1-subquery2:t1] Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### TableScan GatherStats: false Union Select Operator expressions: expr: _col0 type: double expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false null-subquery1:subq1-subquery1:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToDouble(_col0) type: double expr: _col1 type: bigint outputColumnNames: _col0, _col1 Union Select Operator expressions: expr: _col0 type: double expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:subq1-subquery1:t1] #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 2 1 2 1 3 1 4 1 6 1 7 1 8 2 14 1 16 2 PREHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) key)) (TOK_SELEXPR (+ (. (TOK_TABLE_OR_COL subq1) cnt) (. (TOK_TABLE_OR_COL subq2) cnt)))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: subq1:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: bigint subq2:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 1 value expressions: expr: _col1 type: bigint Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator expressions: expr: _col0 type: string expr: (_col1 + _col3) type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 2 numPartitions 0 numRows 10 rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Truncated Path -> Alias: /t1 [subq1:t1, subq2:t1] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 2 numPartitions 0 numRows 10 rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 2 2 2 3 2 7 2 8 4 PREHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN (SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN (SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) subq1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) subq2) (= (. (TOK_TABLE_OR_COL subq1) key) (. (TOK_TABLE_OR_COL subq2) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: subq2:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: key, val Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string expr: val type: string mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: string expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: string expr: _col1 type: string tag: -1 value expressions: expr: _col2 type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string expr: KEY._col1 type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ TotalFiles: 1 GatherStats: false MultiFileSpray: false Truncated Path -> Alias: /t1 [subq2:t1] Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 1 value expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: bigint subq1:t1 TableScan alias: t1 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string mode: final outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: bigint Needs Tagging: true Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint expr: _col2 type: string expr: _col3 type: string expr: _col4 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3,_col4 columns.types string:bigint:string:string:bigint escape.delim \ serialization.format 1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Truncated Path -> Alias: /t1 [subq1:t1] #### A masked pattern was here #### Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@T2 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T2 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t2 POSTHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T2 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t2 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY POSTHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t2 TableScan alias: t2 GatherStats: false Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: true keys: expr: key type: string mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: -1 value expressions: expr: _col1 type: bigint Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Truncated Path -> Alias: /t2 [t2] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numPartitions 0 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t2 TableScan alias: t2 GatherStats: false Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: key, val Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string expr: 1 type: int expr: val type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: int expr: _col2 type: string expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numPartitions 0 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [t2] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numPartitions 0 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 11 1 2 1 12 1 3 1 13 1 7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@outputTbl5 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys followed by anything EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys followed by anything EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl5))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR 2) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 1 (TOK_TABLE_OR_COL val) 2))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t2 TableScan alias: t2 GatherStats: false Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: key, val Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string expr: 1 type: int expr: val type: string expr: 2 type: int mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Operator expressions: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string expr: _col3 type: int expr: _col4 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: int expr: _col2 type: string expr: _col3 type: int expr: UDFToInteger(_col4) type: int outputColumnNames: _col0, _col1, _col2, _col3, _col4 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [t2] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl5 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl5 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl5 ORDER BY key1, key2, key3, key4 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl5 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl5 ORDER BY key1, key2, key3, key4 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl5 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 11 2 1 2 1 12 2 1 3 1 13 2 1 7 1 17 2 1 8 1 18 2 1 8 1 28 2 1 PREHOOK: query: -- contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val PREHOOK: type: QUERY POSTHOOK: query: -- contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 constant) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL constant) (TOK_TABLE_OR_COL val)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: subq:t2 TableScan alias: t2 GatherStats: false Select Operator expressions: expr: key type: string expr: 1 type: int expr: val type: string outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string outputColumnNames: _col0, _col1, _col2 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: int expr: _col2 type: string expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numPartitions 0 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [subq:t2] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numPartitions 0 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 11 1 2 1 12 1 3 1 13 1 7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: -- multiple levels of contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val PREHOOK: type: QUERY POSTHOOK: query: -- multiple levels of contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR 1 constant) (TOK_SELEXPR (TOK_TABLE_OR_COL val))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant) constant2) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR 2 constant3)))) subq2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME outputTbl4))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL constant3)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL constant3) (TOK_TABLE_OR_COL val)))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: subq2:subq:t2 TableScan alias: t2 GatherStats: false Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: _col0, _col2 Select Operator expressions: expr: _col0 type: string expr: _col2 type: string expr: 2 type: int outputColumnNames: _col0, _col2, _col3 Select Operator expressions: expr: _col0 type: string expr: _col3 type: int expr: _col2 type: string outputColumnNames: _col0, _col3, _col2 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: _col0 type: string expr: _col3 type: int expr: _col2 type: string mode: final outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: string expr: _col1 type: int expr: _col2 type: string expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: int expr: _col2 type: string expr: UDFToInteger(_col3) type: int outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numPartitions 0 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numPartitions 0 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [subq2:subq:t2] Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numPartitions 0 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 #### A masked pattern was here #### Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 ORDER BY key1, key2, key3 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 2 11 1 2 2 12 1 3 2 13 1 7 2 17 1 8 2 18 1 8 2 28 1 PREHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@DEST1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@DEST2 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: EXPLAIN FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: t2 TableScan alias: t2 Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(1) bucketGroup: true keys: expr: key type: string mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: -1 value expressions: expr: _col1 type: bigint Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: key, val Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: key type: string expr: val type: string mode: final outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: string expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: true GlobalTableId: 2 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: true GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 POSTHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: select * from DEST1 ORDER BY key, cnt PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### POSTHOOK: query: select * from DEST1 ORDER BY key, cnt POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: select * from DEST2 ORDER BY key, val, val PREHOOK: type: QUERY PREHOOK: Input: default@dest2 #### A masked pattern was here #### POSTHOOK: query: select * from DEST2 ORDER BY key, val, val POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 1 11 1 2 12 1 3 13 1 7 17 1 8 18 1 8 28 1 PREHOOK: query: -- multi-table insert with a sub-query EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY POSTHOOK: query: -- multi-table insert with a sub-query EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val))) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 8)))) x)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST1))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME DEST2))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL val)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL val)))) STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: x:t2 TableScan alias: t2 Filter Operator predicate: expr: (key = 8.0) type: boolean Select Operator expressions: expr: key type: string expr: val type: string outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 Group By Operator aggregations: expr: count(1) bucketGroup: true keys: expr: _col0 type: string mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: -1 value expressions: expr: _col1 type: bigint Select Operator expressions: expr: _col0 type: string expr: _col1 type: string outputColumnNames: _col0, _col1 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: _col0 type: string expr: _col1 type: string mode: final outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: string expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: true GlobalTableId: 2 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: UDFToInteger(_col1) type: int outputColumnNames: _col0, _col1 File Output Operator compressed: true GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-3 Stats-Aggr Operator Stage: Stage-1 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Stage: Stage-4 Stats-Aggr Operator PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 POSTHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: select * from DEST1 ORDER BY key, cnt PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### POSTHOOK: query: select * from DEST1 ORDER BY key, cnt POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 8 2 PREHOOK: query: select * from DEST2 ORDER BY key, val, cnt PREHOOK: type: QUERY PREHOOK: Input: default@dest2 #### A masked pattern was here #### POSTHOOK: query: select * from DEST2 ORDER BY key, val, cnt POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] 8 18 1 8 28 1