PREHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE tmp(c1 INT, c2 INT, c3 STRING, c4 STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@tmp PREHOOK: query: INSERT OVERWRITE TABLE tmp SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@tmp POSTHOOK: query: INSERT OVERWRITE TABLE tmp SELECT x.key, y.key, x.value, y.value FROM src x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@tmp POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The query in this file have operators with same set of keys -- but having different sorting orders. -- Correlation optimizer currently do not optimize this case. -- This case will be optimized latter (need a follow-up jira). EXPLAIN SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c3, x.c1) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c3, x1.c1) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt PREHOOK: type: QUERY POSTHOOK: query: -- The query in this file have operators with same set of keys -- but having different sorting orders. -- Correlation optimizer currently do not optimize this case. -- This case will be optimized latter (need a follow-up jira). EXPLAIN SELECT xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt FROM (SELECT x.c1 AS key1, x.c3 AS key2, count(1) AS cnt FROM tmp x WHERE x.c1 < 120 GROUP BY x.c3, x.c1) xx JOIN (SELECT x1.c1 AS key1, x1.c3 AS key2, count(1) AS cnt FROM tmp x1 WHERE x1.c2 > 100 GROUP BY x1.c3, x1.c1) yy ON (xx.key1 = yy.key1 AND xx.key2 == yy.key2) ORDER BY xx.key1, xx.key2, yy.key1, yy.key2, xx.cnt, yy.cnt POSTHOOK: type: QUERY POSTHOOK: Lineage: tmp.c1 EXPRESSION [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c2 EXPRESSION [(src)y.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c3 SIMPLE [(src)x.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tmp.c4 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) c1) 120)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) c3) (. (TOK_TABLE_OR_COL x) c1)))) xx) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME tmp) x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c1) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) c3) key2) (TOK_SELEXPR (TOK_FUNCTION count 1) cnt)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) c2) 100)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x1) c3) (. (TOK_TABLE_OR_COL x1) c1)))) yy) (AND (= (. (TOK_TABLE_OR_COL xx) key1) (. (TOK_TABLE_OR_COL yy) key1)) (== (. (TOK_TABLE_OR_COL xx) key2) (. (TOK_TABLE_OR_COL yy) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) key2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL yy) cnt))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key1)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) key2)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL xx) cnt)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL yy) cnt))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1, Stage-4 Stage-3 depends on stages: Stage-2 Stage-4 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: yy:x1 TableScan alias: x1 Filter Operator predicate: expr: (c2 > 100) type: boolean Select Operator expressions: expr: c3 type: string expr: c1 type: int outputColumnNames: c3, c1 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c3 type: string expr: c1 type: int mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: string expr: _col1 type: int sort order: ++ Map-reduce partition columns: expr: _col0 type: string expr: _col1 type: int tag: -1 value expressions: expr: _col2 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string expr: KEY._col1 type: int mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col1 type: int expr: _col0 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: 1 value expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint $INTNAME1 Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ Map-reduce partition columns: expr: _col0 type: int expr: _col1 type: string tag: 0 value expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: bigint Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} {VALUE._col2} 1 {VALUE._col0} {VALUE._col1} {VALUE._col2} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col3 type: int expr: _col4 type: string expr: _col2 type: bigint expr: _col5 type: bigint outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: #### A masked pattern was here #### Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: int expr: _col3 type: string expr: _col4 type: bigint expr: _col5 type: bigint sort order: ++++++ tag: -1 value expressions: expr: _col0 type: int expr: _col1 type: string expr: _col2 type: int expr: _col3 type: string expr: _col4 type: bigint expr: _col5 type: bigint Reduce Operator Tree: Extract File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: xx:x TableScan alias: x Filter Operator predicate: expr: (c1 < 120) type: boolean Select Operator expressions: expr: c3 type: string expr: c1 type: int outputColumnNames: c3, c1 Group By Operator aggregations: expr: count(1) bucketGroup: false keys: expr: c3 type: string expr: c1 type: int mode: hash outputColumnNames: _col0, _col1, _col2 Reduce Output Operator key expressions: expr: _col0 type: string expr: _col1 type: int sort order: ++ Map-reduce partition columns: expr: _col0 type: string expr: _col1 type: int tag: -1 value expressions: expr: _col2 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string expr: KEY._col1 type: int mode: mergepartial outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col1 type: int expr: _col0 type: string expr: _col2 type: bigint outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Fetch Operator limit: -1