PREHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are sorted on columns which is a superset
-- of join columns

-- Create bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS
PREHOOK: type: CREATETABLE
POSTHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are sorted on columns which is a superset
-- of join columns

-- Create bucketed and sorted tables
CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@test_table1
PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS
PREHOOK: type: CREATETABLE
POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@test_table2
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE test_table1 SELECT *
INSERT OVERWRITE TABLE test_table2 SELECT *
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@test_table1
PREHOOK: Output: default@test_table2
POSTHOOK: query: FROM src
INSERT OVERWRITE TABLE test_table1 SELECT *
INSERT OVERWRITE TABLE test_table2 SELECT *
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@test_table1
POSTHOOK: Output: default@test_table2
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: -- it should be converted to a sort-merge join, since the first sort column (#join columns = 1) contains the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
POSTHOOK: query: -- it should be converted to a sort-merge join, since the first sort column (#join columns = 1) contains the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        a 
          TableScan
            alias: a
            GatherStats: false
            Sorted Merge Bucket Map Join Operator
              condition map:
                   Inner Join 0 to 1
              condition expressions:
                0 {key} {value}
                1 {key} {value}
              handleSkewJoin: false
              keys:
                0 [Column[key]]
                1 [Column[key]]
              outputColumnNames: _col0, _col1, _col4, _col5
              Position of Big Table: 0
              Select Operator
                expressions:
                      expr: _col0
                      type: int
                      expr: _col1
                      type: string
                      expr: _col4
                      type: int
                      expr: _col5
                      type: string
                outputColumnNames: _col0, _col1, _col2, _col3
                Reduce Output Operator
                  key expressions:
                        expr: _col0
                        type: int
                  sort order: +
                  tag: -1
                  value expressions:
                        expr: _col0
                        type: int
                        expr: _col1
                        type: string
                        expr: _col2
                        type: int
                        expr: _col3
                        type: string
      Path -> Alias:
#### A masked pattern was here ####
      Path -> Partition:
#### A masked pattern was here ####
          Partition
            base file name: test_table1
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              SORTBUCKETCOLSPREFIX TRUE
              bucket_count 16
              bucket_field_name key
              columns key,value
              columns.types int:string
#### A masked pattern was here ####
              name default.test_table1
              numFiles 16
              numPartitions 0
              numRows 500
              rawDataSize 5312
              serialization.ddl struct test_table1 { i32 key, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 5812
#### A masked pattern was here ####
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                SORTBUCKETCOLSPREFIX TRUE
                bucket_count 16
                bucket_field_name key
                columns key,value
                columns.types int:string
#### A masked pattern was here ####
                name default.test_table1
                numFiles 16
                numPartitions 0
                numRows 500
                rawDataSize 5312
                serialization.ddl struct test_table1 { i32 key, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 5812
#### A masked pattern was here ####
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.test_table1
            name: default.test_table1
      Truncated Path -> Alias:
        /test_table1 [a]
      Needs Tagging: false
      Reduce Operator Tree:
        Extract
          Limit
            File Output Operator
              compressed: false
              GlobalTableId: 0
#### A masked pattern was here ####
              NumFilesPerFileSink: 1
#### A masked pattern was here ####
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    columns _col0,_col1,_col2,_col3
                    columns.types int:string:int:string
                    escape.delim \
                    hive.serialization.extend.nesting.levels true
                    serialization.format 1
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false

  Stage: Stage-0
    Fetch Operator
      limit: 10


PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1
PREHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1
POSTHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
0	val_0	0	val_0
0	val_0	0	val_0
0	val_0	0	val_0
0	val_0	0	val_0
0	val_0	0	val_0
0	val_0	0	val_0
0	val_0	0	val_0
0	val_0	0	val_0
0	val_0	0	val_0
2	val_2	2	val_2
PREHOOK: query: DROP TABLE test_table1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@test_table1
PREHOOK: Output: default@test_table1
POSTHOOK: query: DROP TABLE test_table1
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@test_table1
POSTHOOK: Output: default@test_table1
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: DROP TABLE test_table2
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@test_table2
PREHOOK: Output: default@test_table2
POSTHOOK: query: DROP TABLE test_table2
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@test_table2
POSTHOOK: Output: default@test_table2
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: -- Create bucketed and sorted tables
CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS
PREHOOK: type: CREATETABLE
POSTHOOK: query: -- Create bucketed and sorted tables
CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@test_table1
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: CREATE TABLE test_table2 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS
PREHOOK: type: CREATETABLE
POSTHOOK: query: CREATE TABLE test_table2 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: default@test_table2
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE test_table1 SELECT key, key, value
INSERT OVERWRITE TABLE test_table2 SELECT key, key, value
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@test_table1
PREHOOK: Output: default@test_table2
POSTHOOK: query: FROM src
INSERT OVERWRITE TABLE test_table1 SELECT key, key, value
INSERT OVERWRITE TABLE test_table2 SELECT key, key, value
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@test_table1
POSTHOOK: Output: default@test_table2
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
POSTHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        a 
          TableScan
            alias: a
            GatherStats: false
            Sorted Merge Bucket Map Join Operator
              condition map:
                   Inner Join 0 to 1
              condition expressions:
                0 {key} {key2} {value}
                1 {key} {key2} {value}
              handleSkewJoin: false
              keys:
                0 [Column[key], Column[key2]]
                1 [Column[key], Column[key2]]
              outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
              Position of Big Table: 0
              Select Operator
                expressions:
                      expr: _col0
                      type: int
                      expr: _col1
                      type: int
                      expr: _col2
                      type: string
                      expr: _col5
                      type: int
                      expr: _col6
                      type: int
                      expr: _col7
                      type: string
                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                Reduce Output Operator
                  key expressions:
                        expr: _col0
                        type: int
                  sort order: +
                  tag: -1
                  value expressions:
                        expr: _col0
                        type: int
                        expr: _col1
                        type: int
                        expr: _col2
                        type: string
                        expr: _col3
                        type: int
                        expr: _col4
                        type: int
                        expr: _col5
                        type: string
      Path -> Alias:
#### A masked pattern was here ####
      Path -> Partition:
#### A masked pattern was here ####
          Partition
            base file name: test_table1
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              SORTBUCKETCOLSPREFIX TRUE
              bucket_count 16
              bucket_field_name key
              columns key,key2,value
              columns.types int:int:string
#### A masked pattern was here ####
              name default.test_table1
              numFiles 16
              numPartitions 0
              numRows 500
              rawDataSize 7218
              serialization.ddl struct test_table1 { i32 key, i32 key2, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 7718
#### A masked pattern was here ####
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                SORTBUCKETCOLSPREFIX TRUE
                bucket_count 16
                bucket_field_name key
                columns key,key2,value
                columns.types int:int:string
#### A masked pattern was here ####
                name default.test_table1
                numFiles 16
                numPartitions 0
                numRows 500
                rawDataSize 7218
                serialization.ddl struct test_table1 { i32 key, i32 key2, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 7718
#### A masked pattern was here ####
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.test_table1
            name: default.test_table1
      Truncated Path -> Alias:
        /test_table1 [a]
      Needs Tagging: false
      Reduce Operator Tree:
        Extract
          Limit
            File Output Operator
              compressed: false
              GlobalTableId: 0
#### A masked pattern was here ####
              NumFilesPerFileSink: 1
#### A masked pattern was here ####
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    columns _col0,_col1,_col2,_col3,_col4,_col5
                    columns.types int:int:string:int:int:string
                    escape.delim \
                    hive.serialization.extend.nesting.levels true
                    serialization.format 1
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false

  Stage: Stage-0
    Fetch Operator
      limit: 10


PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1
PREHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1
POSTHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
2	2	val_2	2	2	val_2
PREHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns
-- even if the order is not the same
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
POSTHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns
-- even if the order is not the same
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key2) (. (TOK_TABLE_OR_COL b) key2)) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        a 
          TableScan
            alias: a
            GatherStats: false
            Sorted Merge Bucket Map Join Operator
              condition map:
                   Inner Join 0 to 1
              condition expressions:
                0 {key} {key2} {value}
                1 {key} {key2} {value}
              handleSkewJoin: false
              keys:
                0 [Column[key2], Column[key]]
                1 [Column[key2], Column[key]]
              outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
              Position of Big Table: 0
              Select Operator
                expressions:
                      expr: _col0
                      type: int
                      expr: _col1
                      type: int
                      expr: _col2
                      type: string
                      expr: _col5
                      type: int
                      expr: _col6
                      type: int
                      expr: _col7
                      type: string
                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                Reduce Output Operator
                  key expressions:
                        expr: _col0
                        type: int
                  sort order: +
                  tag: -1
                  value expressions:
                        expr: _col0
                        type: int
                        expr: _col1
                        type: int
                        expr: _col2
                        type: string
                        expr: _col3
                        type: int
                        expr: _col4
                        type: int
                        expr: _col5
                        type: string
      Path -> Alias:
#### A masked pattern was here ####
      Path -> Partition:
#### A masked pattern was here ####
          Partition
            base file name: test_table1
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              SORTBUCKETCOLSPREFIX TRUE
              bucket_count 16
              bucket_field_name key
              columns key,key2,value
              columns.types int:int:string
#### A masked pattern was here ####
              name default.test_table1
              numFiles 16
              numPartitions 0
              numRows 500
              rawDataSize 7218
              serialization.ddl struct test_table1 { i32 key, i32 key2, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 7718
#### A masked pattern was here ####
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                SORTBUCKETCOLSPREFIX TRUE
                bucket_count 16
                bucket_field_name key
                columns key,key2,value
                columns.types int:int:string
#### A masked pattern was here ####
                name default.test_table1
                numFiles 16
                numPartitions 0
                numRows 500
                rawDataSize 7218
                serialization.ddl struct test_table1 { i32 key, i32 key2, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 7718
#### A masked pattern was here ####
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.test_table1
            name: default.test_table1
      Truncated Path -> Alias:
        /test_table1 [a]
      Needs Tagging: false
      Reduce Operator Tree:
        Extract
          Limit
            File Output Operator
              compressed: false
              GlobalTableId: 0
#### A masked pattern was here ####
              NumFilesPerFileSink: 1
#### A masked pattern was here ####
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    columns _col0,_col1,_col2,_col3,_col4,_col5
                    columns.types int:int:string:int:int:string
                    escape.delim \
                    hive.serialization.extend.nesting.levels true
                    serialization.format 1
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false

  Stage: Stage-0
    Fetch Operator
      limit: 10


PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1
PREHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1
POSTHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
2	2	val_2	2	2	val_2
PREHOOK: query: -- it should not be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) do not contain all 
-- the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
POSTHOOK: query: -- it should not be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) do not contain all 
-- the join columns
EXPLAIN EXTENDED
SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME test_table1) a) (TOK_TABREF (TOK_TABNAME test_table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))) (TOK_LIMIT 10)))

STAGE DEPENDENCIES:
  Stage-3 is a root stage
  Stage-1 depends on stages: Stage-3
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-3
    Map Reduce Local Work
      Alias -> Map Local Tables:
        b 
          Fetch Operator
            limit: -1
      Alias -> Map Local Operator Tree:
        b 
          TableScan
            alias: b
            GatherStats: false
            HashTable Sink Operator
              condition expressions:
                0 {key} {key2} {value}
                1 {key} {key2} {value}
              handleSkewJoin: false
              keys:
                0 [Column[key], Column[value]]
                1 [Column[key], Column[value]]
              Position of Big Table: 0
      Bucket Mapjoin Context:
          Alias Bucket Base File Name Mapping:
            b {000000_0=[000000_0], 000001_0=[000001_0], 000002_0=[000002_0], 000003_0=[000003_0], 000004_0=[000004_0], 000005_0=[000005_0], 000006_0=[000006_0], 000007_0=[000007_0], 000008_0=[000008_0], 000009_0=[000009_0], 000010_0=[000010_0], 000011_0=[000011_0], 000012_0=[000012_0], 000013_0=[000013_0], 000014_0=[000014_0], 000015_0=[000015_0]}
          Alias Bucket File Name Mapping:
#### A masked pattern was here ####
          Alias Bucket Output File Name Mapping:
#### A masked pattern was here ####

  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        a 
          TableScan
            alias: a
            GatherStats: false
            Map Join Operator
              condition map:
                   Inner Join 0 to 1
              condition expressions:
                0 {key} {key2} {value}
                1 {key} {key2} {value}
              handleSkewJoin: false
              keys:
                0 [Column[key], Column[value]]
                1 [Column[key], Column[value]]
              outputColumnNames: _col0, _col1, _col2, _col5, _col6, _col7
              Position of Big Table: 0
              BucketMapJoin: true
              Select Operator
                expressions:
                      expr: _col0
                      type: int
                      expr: _col1
                      type: int
                      expr: _col2
                      type: string
                      expr: _col5
                      type: int
                      expr: _col6
                      type: int
                      expr: _col7
                      type: string
                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                Reduce Output Operator
                  key expressions:
                        expr: _col0
                        type: int
                  sort order: +
                  tag: -1
                  value expressions:
                        expr: _col0
                        type: int
                        expr: _col1
                        type: int
                        expr: _col2
                        type: string
                        expr: _col3
                        type: int
                        expr: _col4
                        type: int
                        expr: _col5
                        type: string
      Local Work:
        Map Reduce Local Work
      Path -> Alias:
#### A masked pattern was here ####
      Path -> Partition:
#### A masked pattern was here ####
          Partition
            base file name: test_table1
            input format: org.apache.hadoop.mapred.TextInputFormat
            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
            properties:
              SORTBUCKETCOLSPREFIX TRUE
              bucket_count 16
              bucket_field_name key
              columns key,key2,value
              columns.types int:int:string
#### A masked pattern was here ####
              name default.test_table1
              numFiles 16
              numPartitions 0
              numRows 500
              rawDataSize 7218
              serialization.ddl struct test_table1 { i32 key, i32 key2, string value}
              serialization.format 1
              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              totalSize 7718
#### A masked pattern was here ####
            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
          
              input format: org.apache.hadoop.mapred.TextInputFormat
              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
              properties:
                SORTBUCKETCOLSPREFIX TRUE
                bucket_count 16
                bucket_field_name key
                columns key,key2,value
                columns.types int:int:string
#### A masked pattern was here ####
                name default.test_table1
                numFiles 16
                numPartitions 0
                numRows 500
                rawDataSize 7218
                serialization.ddl struct test_table1 { i32 key, i32 key2, string value}
                serialization.format 1
                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                totalSize 7718
#### A masked pattern was here ####
              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
              name: default.test_table1
            name: default.test_table1
      Truncated Path -> Alias:
        /test_table1 [a]
      Needs Tagging: false
      Reduce Operator Tree:
        Extract
          Limit
            File Output Operator
              compressed: false
              GlobalTableId: 0
#### A masked pattern was here ####
              NumFilesPerFileSink: 1
#### A masked pattern was here ####
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  properties:
                    columns _col0,_col1,_col2,_col3,_col4,_col5
                    columns.types int:int:string:int:int:string
                    escape.delim \
                    hive.serialization.extend.nesting.levels true
                    serialization.format 1
              TotalFiles: 1
              GatherStats: false
              MultiFileSpray: false

  Stage: Stage-0
    Fetch Operator
      limit: 10


PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10
PREHOOK: type: QUERY
PREHOOK: Input: default@test_table1
PREHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10
POSTHOOK: type: QUERY
POSTHOOK: Input: default@test_table1
POSTHOOK: Input: default@test_table2
#### A masked pattern was here ####
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
0	0	val_0	0	0	val_0
2	2	val_2	2	2	val_2
PREHOOK: query: DROP TABLE test_table1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@test_table1
PREHOOK: Output: default@test_table1
POSTHOOK: query: DROP TABLE test_table1
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@test_table1
POSTHOOK: Output: default@test_table1
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: DROP TABLE test_table2
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@test_table2
PREHOOK: Output: default@test_table2
POSTHOOK: query: DROP TABLE test_table2
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@test_table2
POSTHOOK: Output: default@test_table2
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]