PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 3 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcbucket_mapjoin_part PREHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: query: load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 POSTHOOK: query: load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@bucketmapjoin_tmp_result POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@bucketmapjoin_tmp_result PREHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key where b.ds="2008-04-08" PREHOOK: type: QUERY POSTHOOK: query: explain extended insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b on a.key=b.key where b.ds="2008-04-08" POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_TABREF TOK_TABNAME srcbucket_mapjoin a TOK_TABREF TOK_TABNAME srcbucket_mapjoin_part b = . TOK_TABLE_OR_COL a key . TOK_TABLE_OR_COL b key TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME bucketmapjoin_tmp_result TOK_SELECT TOK_HINTLIST TOK_HINT TOK_MAPJOIN TOK_HINTARGLIST b TOK_SELEXPR . TOK_TABLE_OR_COL a key TOK_SELEXPR . TOK_TABLE_OR_COL a value TOK_SELEXPR . TOK_TABLE_OR_COL b value TOK_WHERE = . TOK_TABLE_OR_COL b ds "2008-04-08" STAGE DEPENDENCIES: Stage-9 is a root stage Stage-1 depends on stages: Stage-9 Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: b Fetch Operator limit: -1 Partition Description: Partition input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 2008-04-08 properties: COLUMN_STATS_ACCURATE true bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### name default.srcbucket_mapjoin_part numFiles 3 numRows 0 partition_columns ds partition_columns.types string rawDataSize 0 serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 4200 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count 3 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### name default.srcbucket_mapjoin_part partition_columns ds partition_columns.types string serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin_part name: default.srcbucket_mapjoin_part Alias -> Map Local Operator Tree: b TableScan alias: b Statistics: Num rows: 40 Data size: 4200 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 20 Data size: 2100 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: int) 1 key (type: int) Position of Big Table: 0 Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: a Statistics: Num rows: 26 Data size: 2750 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 13 Data size: 1375 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col6 Position of Big Table: 0 Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 22 Data size: 2310 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result TotalFiles: 1 GatherStats: true MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### name default.srcbucket_mapjoin numFiles 2 serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count 2 bucket_field_name key columns key,value columns.comments columns.types int:string #### A masked pattern was here #### name default.srcbucket_mapjoin numFiles 2 serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2750 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.srcbucket_mapjoin name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result name: default.bucketmapjoin_tmp_result Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,value1,value2 columns.comments columns.types string:string:string #### A masked pattern was here #### name default.bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucketmapjoin_tmp_result name: default.bucketmapjoin_tmp_result Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here ####