Saving all output to "!!{outputDirectory}!!/bucket_map_join_2.q.raw". Enter "record" with no arguments to stop it.
>>>  !run !!{qFileDirectory}!!/bucket_map_join_2.q
>>>  drop table table1;
No rows affected 
>>>  drop table table2;
No rows affected 
>>>  
>>>  set hive.enforce.bucketing = true;
No rows affected 
>>>  set hive.enforce.sorting = true;
No rows affected 
>>>  
>>>  create table table1(key string, value string) clustered by (key, value) 
sorted by (key desc, value desc) into 1 BUCKETS stored as textfile;
No rows affected 
>>>  create table table2(key string, value string) clustered by (value, key) 
sorted by (value desc, key desc) into 1 BUCKETS stored as textfile;
No rows affected 
>>>  
>>>  load data local inpath '../data/files/SortCol1Col2.txt' overwrite into table table1;
No rows affected 
>>>  load data local inpath '../data/files/SortCol2Col1.txt' overwrite into table table2;
No rows affected 
>>>  
>>>  set hive.optimize.bucketmapjoin = true;
No rows affected 
>>>  set hive.optimize.bucketmapjoin.sortedmerge = true;
No rows affected 
>>>  set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;
No rows affected 
>>>  
>>>  -- The tables are bucketed in same columns in different order,
>>>  -- but sorted in different column orders
>>>  -- Neither bucketed map-join, nor sort-merge join should be performed
>>>  
>>>  explain extended 
select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value;
'Explain'
'ABSTRACT SYNTAX TREE:'
'  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME table1) a) (TOK_TABREF (TOK_TABNAME table2) b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count)))))'
''
'STAGE DEPENDENCIES:'
'  Stage-4 is a root stage'
'  Stage-1 depends on stages: Stage-4'
'  Stage-2 depends on stages: Stage-1'
'  Stage-0 is a root stage'
''
'STAGE PLANS:'
'  Stage: Stage-4'
'    Map Reduce Local Work'
'      Alias -> Map Local Tables:'
'        b '
'          Fetch Operator'
'            limit: -1'
'      Alias -> Map Local Operator Tree:'
'        b '
'          TableScan'
'            alias: b'
'            GatherStats: false'
'            HashTable Sink Operator'
'              condition expressions:'
'                0 '
'                1 '
'              handleSkewJoin: false'
'              keys:'
'                0 [Column[key], Column[value]]'
'                1 [Column[key], Column[value]]'
'              Position of Big Table: 0'
''
'  Stage: Stage-1'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        a '
'          TableScan'
'            alias: a'
'            GatherStats: false'
'            Map Join Operator'
'              condition map:'
'                   Inner Join 0 to 1'
'              condition expressions:'
'                0 '
'                1 '
'              handleSkewJoin: false'
'              keys:'
'                0 [Column[key], Column[value]]'
'                1 [Column[key], Column[value]]'
'              Position of Big Table: 0'
'              File Output Operator'
'                compressed: false'
'                GlobalTableId: 0'
'                directory: file:!!{hive.exec.scratchdir}!!'
'                NumFilesPerFileSink: 1'
'                table:'
'                    input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'                    output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'                    properties:'
'                      columns '
'                      columns.types '
'                      escape.delim \'
'                TotalFiles: 1'
'                GatherStats: false'
'                MultiFileSpray: false'
'      Local Work:'
'        Map Reduce Local Work'
'      Needs Tagging: false'
'      Path -> Alias:'
'        !!{hive.metastore.warehouse.dir}!!/bucket_map_join_2.db/table1 [a]'
'      Path -> Partition:'
'        !!{hive.metastore.warehouse.dir}!!/bucket_map_join_2.db/table1 '
'          Partition'
'            base file name: table1'
'            input format: org.apache.hadoop.mapred.TextInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'            properties:'
'              SORTBUCKETCOLSPREFIX TRUE'
'              bucket_count 1'
'              bucket_field_name key'
'              columns key,value'
'              columns.types string:string'
'              file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'              file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              location !!{hive.metastore.warehouse.dir}!!/bucket_map_join_2.db/table1'
'              name bucket_map_join_2.table1'
'              numFiles 1'
'              numPartitions 0'
'              numRows 0'
'              rawDataSize 0'
'              serialization.ddl struct table1 { string key, string value}'
'              serialization.format 1'
'              serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              totalSize 20'
'              transient_lastDdlTime !!UNIXTIME!!'
'            serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'          '
'              input format: org.apache.hadoop.mapred.TextInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'              properties:'
'                SORTBUCKETCOLSPREFIX TRUE'
'                bucket_count 1'
'                bucket_field_name key'
'                columns key,value'
'                columns.types string:string'
'                file.inputformat org.apache.hadoop.mapred.TextInputFormat'
'                file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                location !!{hive.metastore.warehouse.dir}!!/bucket_map_join_2.db/table1'
'                name bucket_map_join_2.table1'
'                numFiles 1'
'                numPartitions 0'
'                numRows 0'
'                rawDataSize 0'
'                serialization.ddl struct table1 { string key, string value}'
'                serialization.format 1'
'                serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'                totalSize 20'
'                transient_lastDdlTime !!UNIXTIME!!'
'              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
'              name: bucket_map_join_2.table1'
'            name: bucket_map_join_2.table1'
''
'  Stage: Stage-2'
'    Map Reduce'
'      Alias -> Map Operator Tree:'
'        file:!!{hive.exec.scratchdir}!! '
'          Select Operator'
'            Select Operator'
'              Group By Operator'
'                aggregations:'
'                      expr: count()'
'                bucketGroup: false'
'                mode: hash'
'                outputColumnNames: _col0'
'                Reduce Output Operator'
'                  sort order: '
'                  tag: -1'
'                  value expressions:'
'                        expr: _col0'
'                        type: bigint'
'      Needs Tagging: false'
'      Path -> Alias:'
'        file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]'
'      Path -> Partition:'
'        file:!!{hive.exec.scratchdir}!! '
'          Partition'
'            base file name: -mr-10002'
'            input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'            output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'            properties:'
'              columns '
'              columns.types '
'              escape.delim \'
'          '
'              input format: org.apache.hadoop.mapred.SequenceFileInputFormat'
'              output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'
'              properties:'
'                columns '
'                columns.types '
'                escape.delim \'
'      Reduce Operator Tree:'
'        Group By Operator'
'          aggregations:'
'                expr: count(VALUE._col0)'
'          bucketGroup: false'
'          mode: mergepartial'
'          outputColumnNames: _col0'
'          Select Operator'
'            expressions:'
'                  expr: _col0'
'                  type: bigint'
'            outputColumnNames: _col0'
'            File Output Operator'
'              compressed: false'
'              GlobalTableId: 0'
'              directory: file:!!{hive.exec.scratchdir}!!'
'              NumFilesPerFileSink: 1'
'              Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!'
'              table:'
'                  input format: org.apache.hadoop.mapred.TextInputFormat'
'                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
'                  properties:'
'                    columns _col0'
'                    columns.types bigint'
'                    escape.delim \'
'                    serialization.format 1'
'              TotalFiles: 1'
'              GatherStats: false'
'              MultiFileSpray: false'
''
'  Stage: Stage-0'
'    Fetch Operator'
'      limit: -1'
''
''
194 rows selected 
>>>  
>>>  select /*+ mapjoin(b) */ count(*) from table1 a join table2 b on a.key=b.key and a.value=b.value;
'_c1'
'4'
1 row selected 
>>>  
>>>  !record