PREHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmp PREHOOK: query: INSERT OVERWRITE TABLE tmp SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@tmp POSTHOOK: query: INSERT OVERWRITE TABLE tmp SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@tmp POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: EXPLAIN SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c2) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c2)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 Stage-3 depends on stages: Stage-2 Stage-4 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: yy:x1 TableScan alias: x1 Filter Operator predicate: expr: (c2 > 100) type: boolean Select Operator expressions: expr: c2 type: int outputColumnNames: c2 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c2 type: int mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: -1 value expressions: expr: _col1 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: int expr: _col1 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: 1 value expressions: expr: _col0 type: int expr: _col1 type: bigint $INTNAME1 Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: 0 value expressions: expr: _col0 type: int expr: _col1 type: bigint Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: int expr: _col2 type: int expr: _col1 type: bigint expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: int expr: _col2 type: bigint expr: _col3 type: bigint sort order: ++++ tag: -1 value expressions: expr: _col0 type: int expr: _col1 type: int expr: _col2 type: bigint expr: _col3 type: bigint Reduce Operator Tree: Extract File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: xx:x TableScan alias: x Filter Operator predicate: expr: (c1 < 120) type: boolean Select Operator expressions: expr: c1 type: int outputColumnNames: c1 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c1 type: int mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: -1 value expressions: expr: _col1 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: int expr: _col1 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt PREHOOK: type: QUERY PREHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] 103 103 4 4 104 104 4 4 105 105 1 1 111 111 1 1 113 113 4 4 114 114 1 1 116 116 1 1 118 118 4 4 119 119 9 9 PREHOOK: query: -- The merged table scan should be able to load both c1 and c2 EXPLAIN SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt PREHOOK: type: QUERY POSTHOOK: query: -- The merged table scan should be able to load both c1 and c2 EXPLAIN SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c2) key) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c2)))) yy) (= (. (TOK_TABLE_OR_COL xx) key) (. (TOK_TABLE_OR_COL yy) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: xx:x TableScan alias: x Filter Operator predicate: expr: (c1 < 120) type: boolean Select Operator expressions: expr: c1 type: int outputColumnNames: c1 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c1 type: int mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: 0 value expressions: expr: _col1 type: bigint yy:x1 TableScan alias: x1 Filter Operator predicate: expr: (c2 > 100) type: boolean Select Operator expressions: expr: c2 type: int outputColumnNames: c2 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c2 type: int mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: 1 value expressions: expr: _col1 type: bigint Reduce Operator Tree: Demux Operator Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: int expr: _col1 type: bigint outputColumnNames: _col0, _col1 Mux Operator Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: int expr: _col2 type: int expr: _col1 type: bigint expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: int expr: _col1 type: bigint outputColumnNames: _col0, _col1 Mux Operator Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3 Select Operator expressions: expr: _col0 type: int expr: _col2 type: int expr: _col1 type: bigint expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: int expr: _col2 type: bigint expr: _col3 type: bigint sort order: ++++ tag: -1 value expressions: expr: _col0 type: int expr: _col1 type: int expr: _col2 type: bigint expr: _col3 type: bigint Reduce Operator Tree: Extract File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt PREHOOK: type: QUERY PREHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: query: SELECT xx.key, yy.key, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1) xx JOIN (SELECT x1.c2 AS key, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c2) yy ON (xx.key = yy.key) ORDER BY xx.key, yy.key, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] 103 103 4 4 104 104 4 4 105 105 1 1 111 111 1 1 113 113 4 4 114 114 1 1 116 116 1 1 118 118 4 4 119 119 9 9 PREHOOK: query: EXPLAIN SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1) (. (TOK_TABLE_OR_COL x) c3)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c1) (. (TOK_TABLE_OR_COL x1) c3)))) yy) (AND (= (. (TOK_TABLE_OR_COL xx) key1) (. (TOK_TABLE_OR_COL yy) key1)) (== (. (TOK_TABLE_OR_COL xx) key2) (. (TOK_TABLE_OR_COL yy) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 Stage-3 depends on stages: Stage-2 Stage-4 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: yy:x1 TableScan alias: x1 Filter Operator predicate: expr: (c2 > 100) type: boolean Select Operator expressions: expr: c1 type: int expr: c3 type: string outputColumnNames: c1, c3 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c1 type: int expr: c3 type: string mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: -1 value expressions: expr: _col2 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int expr: KEY._col1 type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: 1 value expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint $INTNAME1 Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: 0 value expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col3 type: int expr: _col4 type: string expr: _col2 type: bigint expr: _col5 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: int expr: _col3 type: string expr: _col4 type: bigint expr: _col5 type: bigint sort order: ++++++ tag: -1 value expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: int expr: _col3 type: string expr: _col4 type: bigint expr: _col5 type: bigint Reduce Operator Tree: Extract File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: xx:x TableScan alias: x Filter Operator predicate: expr: (c1 < 120) type: boolean Select Operator expressions: expr: c1 type: int expr: c3 type: string outputColumnNames: c1, c3 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c1 type: int expr: c3 type: string mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: -1 value expressions: expr: _col2 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int expr: KEY._col1 type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt PREHOOK: type: QUERY PREHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] 103 val_103 103 val_103 4 4 104 val_104 104 val_104 4 4 105 val_105 105 val_105 1 1 111 val_111 111 val_111 1 1 113 val_113 113 val_113 4 4 114 val_114 114 val_114 1 1 116 val_116 116 val_116 1 1 118 val_118 118 val_118 4 4 119 val_119 119 val_119 9 9 PREHOOK: query: EXPLAIN SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c1) (. (TOK_TABLE_OR_COL x) c3)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c1) (. (TOK_TABLE_OR_COL x1) c3)))) yy) (AND (= (. (TOK_TABLE_OR_COL xx) key1) (. (TOK_TABLE_OR_COL yy) key1)) (== (. (TOK_TABLE_OR_COL xx) key2) (. (TOK_TABLE_OR_COL yy) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: xx:x TableScan alias: x Filter Operator predicate: expr: (c1 < 120) type: boolean Select Operator expressions: expr: c1 type: int expr: c3 type: string outputColumnNames: c1, c3 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c1 type: int expr: c3 type: string mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: 0 value expressions: expr: _col2 type: bigint yy:x1 TableScan alias: x1 Filter Operator predicate: expr: (c2 > 100) type: boolean Select Operator expressions: expr: c1 type: int expr: c3 type: string outputColumnNames: c1, c3 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c1 type: int expr: c3 type: string mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: 1 value expressions: expr: _col2 type: bigint Reduce Operator Tree: Demux Operator Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int expr: KEY._col1 type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 Mux Operator Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col3 type: int expr: _col4 type: string expr: _col2 type: bigint expr: _col5 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: int expr: KEY._col1 type: string mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 Mux Operator Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col3 type: int expr: _col4 type: string expr: _col2 type: bigint expr: _col5 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: int expr: _col3 type: string expr: _col4 type: bigint expr: _col5 type: bigint sort order: ++++++ tag: -1 value expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: int expr: _col3 type: string expr: _col4 type: bigint expr: _col5 type: bigint Reduce Operator Tree: Extract File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt PREHOOK: type: QUERY PREHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: query: SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c1, x.c3) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c1, x1.c3) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Input: default@tmp #### A masked pattern was here #### POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] 103 val_103 103 val_103 4 4 104 val_104 104 val_104 4 4 105 val_105 105 val_105 1 1 111 val_111 111 val_111 1 1 113 val_113 113 val_113 4 4 114 val_114 114 val_114 1 1 116 val_116 116 val_116 1 1 118 val_118 118 val_118 4 4 119 val_119 119 val_119 9 9