PREHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket
CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_small
POSTHOOK: query: -- small 2 part, 2 bucket & big 2 part, 4 bucket
CREATE TABLE bucket_small (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_small
PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_small
POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_small
POSTHOOK: Output: default@bucket_small@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_small@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_small@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_small
POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket1outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_small
POSTHOOK: Output: default@bucket_small@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_small@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/smallsrcsortbucket2outof4.txt' INTO TABLE bucket_small partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_small@ds=2008-04-09
PREHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@bucket_big
POSTHOOK: query: CREATE TABLE bucket_big (key string, value string) partitioned by (ds string) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@bucket_big
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big
POSTHOOK: Output: default@bucket_big@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big@ds=2008-04-08
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-08')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big@ds=2008-04-08
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket1outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big
POSTHOOK: Output: default@bucket_big@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket2outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket3outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big@ds=2008-04-09
PREHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
PREHOOK: type: LOAD
#### A masked pattern was here ####
PREHOOK: Output: default@bucket_big@ds=2008-04-09
POSTHOOK: query: load data local inpath '../../data/files/srcsortbucket4outof4.txt' INTO TABLE bucket_big partition(ds='2008-04-09')
POSTHOOK: type: LOAD
#### A masked pattern was here ####
POSTHOOK: Output: default@bucket_big@ds=2008-04-09
PREHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: -- Since size is being used to find the big table, the order of the tables in the join does not matter
explain extended select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
  
TOK_QUERY
   TOK_FROM
      TOK_JOIN
         TOK_TABREF
            TOK_TABNAME
               bucket_small
            a
         TOK_TABREF
            TOK_TABNAME
               bucket_big
            b
         =
            .
               TOK_TABLE_OR_COL
                  a
               key
            .
               TOK_TABLE_OR_COL
                  b
               key
   TOK_INSERT
      TOK_DESTINATION
         TOK_DIR
            TOK_TMP_FILE
      TOK_SELECT
         TOK_SELEXPR
            TOK_FUNCTIONSTAR
               count


STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Tez
      Edges:
        Map 2 <- Map 1 (BROADCAST_EDGE)
        Reducer 3 <- Map 2 (SIMPLE_EDGE)
#### A masked pattern was here ####
      Vertices:
        Map 1 
            Map Operator Tree:
                TableScan
                  alias: a
                  Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
                    predicate: key is not null (type: boolean)
                    Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
                    Reduce Output Operator
                      key expressions: key (type: string)
                      sort order: +
                      Map-reduce partition columns: key (type: string)
                      Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
                      tag: 0
                      auto parallelism: true
            Path -> Alias:
#### A masked pattern was here ####
            Path -> Partition:
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-08
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-08
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 2
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_small
                    numFiles 2
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_small { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 114
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 2
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_small
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_small { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_small
                  name: default.bucket_small
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-09
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-09
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 2
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_small
                    numFiles 2
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_small { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 114
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 2
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_small
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_small { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_small
                  name: default.bucket_small
            Truncated Path -> Alias:
              /bucket_small/ds=2008-04-08 [a]
              /bucket_small/ds=2008-04-09 [a]
        Map 2 
            Map Operator Tree:
                TableScan
                  alias: b
                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
                    predicate: key is not null (type: boolean)
                    Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
                    Map Join Operator
                      condition map:
                           Inner Join 0 to 1
                      Estimated key counts: Map 1 => 1
                      keys:
                        0 key (type: string)
                        1 key (type: string)
                      input vertices:
                        0 Map 1
                      Position of Big Table: 1
                      Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
                      Group By Operator
                        aggregations: count()
                        mode: hash
                        outputColumnNames: _col0
                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                        Reduce Output Operator
                          sort order: 
                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                          tag: -1
                          value expressions: _col0 (type: bigint)
                          auto parallelism: false
            Path -> Alias:
#### A masked pattern was here ####
            Path -> Partition:
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-08
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-08
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 4
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_big
                    numFiles 4
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_big { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 5812
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 4
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_big
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_big { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_big
                  name: default.bucket_big
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-09
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-09
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 4
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_big
                    numFiles 4
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_big { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 5812
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 4
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_big
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_big { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_big
                  name: default.bucket_big
            Truncated Path -> Alias:
              /bucket_big/ds=2008-04-08 [b]
              /bucket_big/ds=2008-04-09 [b]
        Reducer 3 
            Needs Tagging: false
            Reduce Operator Tree:
              Group By Operator
                aggregations: count(VALUE._col0)
                mode: mergepartial
                outputColumnNames: _col0
                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: _col0 (type: bigint)
                  outputColumnNames: _col0
                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
                    GlobalTableId: 0
#### A masked pattern was here ####
                    NumFilesPerFileSink: 1
                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                        properties:
                          columns _col0
                          columns.types bigint
                          escape.delim \
                          hive.serialization.extend.nesting.levels true
                          serialization.format 1
                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    TotalFiles: 1
                    GatherStats: false
                    MultiFileSpray: false

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
PREHOOK: Input: default@bucket_big@ds=2008-04-08
PREHOOK: Input: default@bucket_big@ds=2008-04-09
PREHOOK: Input: default@bucket_small
PREHOOK: Input: default@bucket_small@ds=2008-04-08
PREHOOK: Input: default@bucket_small@ds=2008-04-09
#### A masked pattern was here ####
POSTHOOK: query: select count(*) FROM bucket_small a JOIN bucket_big b ON a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucket_big
POSTHOOK: Input: default@bucket_big@ds=2008-04-08
POSTHOOK: Input: default@bucket_big@ds=2008-04-09
POSTHOOK: Input: default@bucket_small
POSTHOOK: Input: default@bucket_small@ds=2008-04-08
POSTHOOK: Input: default@bucket_small@ds=2008-04-09
#### A masked pattern was here ####
76
PREHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
  
TOK_QUERY
   TOK_FROM
      TOK_JOIN
         TOK_TABREF
            TOK_TABNAME
               bucket_big
            a
         TOK_TABREF
            TOK_TABNAME
               bucket_small
            b
         =
            .
               TOK_TABLE_OR_COL
                  a
               key
            .
               TOK_TABLE_OR_COL
                  b
               key
   TOK_INSERT
      TOK_DESTINATION
         TOK_DIR
            TOK_TMP_FILE
      TOK_SELECT
         TOK_SELEXPR
            TOK_FUNCTIONSTAR
               count


STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Tez
      Edges:
        Map 1 <- Map 3 (BROADCAST_EDGE)
        Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
      Vertices:
        Map 1 
            Map Operator Tree:
                TableScan
                  alias: a
                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
                    predicate: key is not null (type: boolean)
                    Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
                    Map Join Operator
                      condition map:
                           Inner Join 0 to 1
                      Estimated key counts: Map 3 => 1
                      keys:
                        0 key (type: string)
                        1 key (type: string)
                      input vertices:
                        1 Map 3
                      Position of Big Table: 0
                      Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
                      Group By Operator
                        aggregations: count()
                        mode: hash
                        outputColumnNames: _col0
                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                        Reduce Output Operator
                          sort order: 
                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                          tag: -1
                          value expressions: _col0 (type: bigint)
                          auto parallelism: false
            Path -> Alias:
#### A masked pattern was here ####
            Path -> Partition:
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-08
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-08
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 4
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_big
                    numFiles 4
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_big { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 5812
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 4
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_big
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_big { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_big
                  name: default.bucket_big
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-09
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-09
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 4
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_big
                    numFiles 4
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_big { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 5812
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 4
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_big
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_big { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_big
                  name: default.bucket_big
            Truncated Path -> Alias:
              /bucket_big/ds=2008-04-08 [a]
              /bucket_big/ds=2008-04-09 [a]
        Map 3 
            Map Operator Tree:
                TableScan
                  alias: b
                  Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
                    predicate: key is not null (type: boolean)
                    Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
                    Reduce Output Operator
                      key expressions: key (type: string)
                      sort order: +
                      Map-reduce partition columns: key (type: string)
                      Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
                      tag: 1
                      auto parallelism: true
            Path -> Alias:
#### A masked pattern was here ####
            Path -> Partition:
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-08
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-08
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 2
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_small
                    numFiles 2
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_small { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 114
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 2
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_small
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_small { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_small
                  name: default.bucket_small
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-09
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-09
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 2
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_small
                    numFiles 2
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_small { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 114
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 2
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_small
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_small { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_small
                  name: default.bucket_small
            Truncated Path -> Alias:
              /bucket_small/ds=2008-04-08 [b]
              /bucket_small/ds=2008-04-09 [b]
        Reducer 2 
            Needs Tagging: false
            Reduce Operator Tree:
              Group By Operator
                aggregations: count(VALUE._col0)
                mode: mergepartial
                outputColumnNames: _col0
                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: _col0 (type: bigint)
                  outputColumnNames: _col0
                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
                    GlobalTableId: 0
#### A masked pattern was here ####
                    NumFilesPerFileSink: 1
                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                        properties:
                          columns _col0
                          columns.types bigint
                          escape.delim \
                          hive.serialization.extend.nesting.levels true
                          serialization.format 1
                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    TotalFiles: 1
                    GatherStats: false
                    MultiFileSpray: false

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
PREHOOK: Input: default@bucket_big@ds=2008-04-08
PREHOOK: Input: default@bucket_big@ds=2008-04-09
PREHOOK: Input: default@bucket_small
PREHOOK: Input: default@bucket_small@ds=2008-04-08
PREHOOK: Input: default@bucket_small@ds=2008-04-09
#### A masked pattern was here ####
POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucket_big
POSTHOOK: Input: default@bucket_big@ds=2008-04-08
POSTHOOK: Input: default@bucket_big@ds=2008-04-09
POSTHOOK: Input: default@bucket_small
POSTHOOK: Input: default@bucket_small@ds=2008-04-08
POSTHOOK: Input: default@bucket_small@ds=2008-04-09
#### A masked pattern was here ####
76
PREHOOK: query: -- The mapjoin should fail resulting in the sort-merge join
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
PREHOOK: type: QUERY
POSTHOOK: query: -- The mapjoin should fail resulting in the sort-merge join
explain extended select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
ABSTRACT SYNTAX TREE:
  
TOK_QUERY
   TOK_FROM
      TOK_JOIN
         TOK_TABREF
            TOK_TABNAME
               bucket_big
            a
         TOK_TABREF
            TOK_TABNAME
               bucket_small
            b
         =
            .
               TOK_TABLE_OR_COL
                  a
               key
            .
               TOK_TABLE_OR_COL
                  b
               key
   TOK_INSERT
      TOK_DESTINATION
         TOK_DIR
            TOK_TMP_FILE
      TOK_SELECT
         TOK_SELEXPR
            TOK_FUNCTIONSTAR
               count


STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Tez
      Edges:
        Map 1 <- Map 3 (BROADCAST_EDGE)
        Reducer 2 <- Map 1 (SIMPLE_EDGE)
#### A masked pattern was here ####
      Vertices:
        Map 1 
            Map Operator Tree:
                TableScan
                  alias: a
                  Statistics: Num rows: 116 Data size: 11624 Basic stats: COMPLETE Column stats: NONE
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
                    predicate: key is not null (type: boolean)
                    Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE
                    Map Join Operator
                      condition map:
                           Inner Join 0 to 1
                      Estimated key counts: Map 3 => 1
                      keys:
                        0 key (type: string)
                        1 key (type: string)
                      input vertices:
                        1 Map 3
                      Position of Big Table: 0
                      Statistics: Num rows: 63 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
                      Group By Operator
                        aggregations: count()
                        mode: hash
                        outputColumnNames: _col0
                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                        Reduce Output Operator
                          sort order: 
                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                          tag: -1
                          value expressions: _col0 (type: bigint)
                          auto parallelism: false
            Path -> Alias:
#### A masked pattern was here ####
            Path -> Partition:
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-08
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-08
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 4
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_big
                    numFiles 4
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_big { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 5812
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 4
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_big
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_big { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_big
                  name: default.bucket_big
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-09
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-09
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 4
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_big
                    numFiles 4
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_big { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 5812
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 4
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_big
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_big { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_big
                  name: default.bucket_big
            Truncated Path -> Alias:
              /bucket_big/ds=2008-04-08 [a]
              /bucket_big/ds=2008-04-09 [a]
        Map 3 
            Map Operator Tree:
                TableScan
                  alias: b
                  Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE
                  GatherStats: false
                  Filter Operator
                    isSamplingPred: false
                    predicate: key is not null (type: boolean)
                    Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
                    Reduce Output Operator
                      key expressions: key (type: string)
                      sort order: +
                      Map-reduce partition columns: key (type: string)
                      Statistics: Num rows: 1 Data size: 114 Basic stats: COMPLETE Column stats: NONE
                      tag: 1
                      auto parallelism: true
            Path -> Alias:
#### A masked pattern was here ####
            Path -> Partition:
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-08
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-08
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 2
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_small
                    numFiles 2
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_small { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 114
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 2
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_small
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_small { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_small
                  name: default.bucket_small
#### A masked pattern was here ####
                Partition
                  base file name: ds=2008-04-09
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                  partition values:
                    ds 2008-04-09
                  properties:
                    COLUMN_STATS_ACCURATE true
                    bucket_count 2
                    bucket_field_name key
                    columns key,value
                    columns.comments 
                    columns.types string:string
#### A masked pattern was here ####
                    name default.bucket_small
                    numFiles 2
                    numRows 0
                    partition_columns ds
                    partition_columns.types string
                    rawDataSize 0
                    serialization.ddl struct bucket_small { string key, string value}
                    serialization.format 1
                    serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    totalSize 114
#### A masked pattern was here ####
                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                
                    input format: org.apache.hadoop.mapred.TextInputFormat
                    output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                    properties:
                      SORTBUCKETCOLSPREFIX TRUE
                      bucket_count 2
                      bucket_field_name key
                      columns key,value
                      columns.comments 
                      columns.types string:string
#### A masked pattern was here ####
                      name default.bucket_small
                      partition_columns ds
                      partition_columns.types string
                      serialization.ddl struct bucket_small { string key, string value}
                      serialization.format 1
                      serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
#### A masked pattern was here ####
                    serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    name: default.bucket_small
                  name: default.bucket_small
            Truncated Path -> Alias:
              /bucket_small/ds=2008-04-08 [b]
              /bucket_small/ds=2008-04-09 [b]
        Reducer 2 
            Needs Tagging: false
            Reduce Operator Tree:
              Group By Operator
                aggregations: count(VALUE._col0)
                mode: mergepartial
                outputColumnNames: _col0
                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                Select Operator
                  expressions: _col0 (type: bigint)
                  outputColumnNames: _col0
                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
                  File Output Operator
                    compressed: false
                    GlobalTableId: 0
#### A masked pattern was here ####
                    NumFilesPerFileSink: 1
                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
                    table:
                        input format: org.apache.hadoop.mapred.TextInputFormat
                        output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                        properties:
                          columns _col0
                          columns.types bigint
                          escape.delim \
                          hive.serialization.extend.nesting.levels true
                          serialization.format 1
                          serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
                    TotalFiles: 1
                    GatherStats: false
                    MultiFileSpray: false

  Stage: Stage-0
    Fetch Operator
      limit: -1
      Processor Tree:
        ListSink

PREHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
PREHOOK: type: QUERY
PREHOOK: Input: default@bucket_big
PREHOOK: Input: default@bucket_big@ds=2008-04-08
PREHOOK: Input: default@bucket_big@ds=2008-04-09
PREHOOK: Input: default@bucket_small
PREHOOK: Input: default@bucket_small@ds=2008-04-08
PREHOOK: Input: default@bucket_small@ds=2008-04-09
#### A masked pattern was here ####
POSTHOOK: query: select count(*) FROM bucket_big a JOIN bucket_small b ON a.key = b.key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@bucket_big
POSTHOOK: Input: default@bucket_big@ds=2008-04-08
POSTHOOK: Input: default@bucket_big@ds=2008-04-09
POSTHOOK: Input: default@bucket_small
POSTHOOK: Input: default@bucket_small@ds=2008-04-08
POSTHOOK: Input: default@bucket_small@ds=2008-04-09
#### A masked pattern was here ####
76