Saving all output to "!!{outputDirectory}!!/smb_mapjoin_10.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/smb_mapjoin_10.q >>> >>> create table tmp_smb_bucket_10(userid int, pageid int, postid int, type string) partitioned by (ds string) CLUSTERED BY (userid) SORTED BY (pageid, postid, type, userid) INTO 2 BUCKETS STORED AS RCFILE; No rows affected >>> >>> alter table tmp_smb_bucket_10 add partition (ds = '1'); No rows affected >>> alter table tmp_smb_bucket_10 add partition (ds = '2'); No rows affected >>> >>> -- add dummy files to make sure that the number of files in each partition is same as number of buckets >>> >>> load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); No rows affected >>> load data local inpath '../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='1'); No rows affected >>> >>> load data local inpath '../data/files/smbbucket_1.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); No rows affected >>> load data local inpath '../data/files/smbbucket_2.rc' INTO TABLE tmp_smb_bucket_10 partition(ds='2'); No rows affected >>> >>> set hive.optimize.bucketmapjoin = true; No rows affected >>> set hive.optimize.bucketmapjoin.sortedmerge = true; No rows affected >>> set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; No rows affected >>> >>> explain select /*+mapjoin(a)*/ * from tmp_smb_bucket_10 a join tmp_smb_bucket_10 b on (a.ds = '1' and b.ds = '2' and a.userid = b.userid and a.pageid = b.pageid and a.postid = b.postid and a.type = b.type); 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME tmp_smb_bucket_10) a) (TOK_TABREF (TOK_TABNAME tmp_smb_bucket_10) b) (and (and (and (and (and (= (. (TOK_TABLE_OR_COL a) ds) '1') (= (. (TOK_TABLE_OR_COL b) ds) '2')) (= (. (TOK_TABLE_OR_COL a) userid) (. (TOK_TABLE_OR_COL b) userid))) (= (. (TOK_TABLE_OR_COL a) pageid) (. (TOK_TABLE_OR_COL b) pageid))) (= (. (TOK_TABLE_OR_COL a) postid) (. (TOK_TABLE_OR_COL b) postid))) (= (. (TOK_TABLE_OR_COL a) type) (. (TOK_TABLE_OR_COL b) type))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR TOK_ALLCOLREF))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' b ' ' TableScan' ' alias: b' ' Sorted Merge Bucket Map Join Operator' ' condition map:' ' Inner Join 0 to 1' ' condition expressions:' ' 0 {userid} {pageid} {postid} {type} {ds}' ' 1 {userid} {pageid} {postid} {type} {ds}' ' handleSkewJoin: false' ' keys:' ' 0 [Column[userid], Column[pageid], Column[postid], Column[type]]' ' 1 [Column[userid], Column[pageid], Column[postid], Column[type]]' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10, _col11' ' Position of Big Table: 1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: int' ' expr: _col1' ' type: int' ' expr: _col2' ' type: int' ' expr: _col3' ' type: string' ' expr: _col4' ' type: string' ' expr: _col7' ' type: int' ' expr: _col8' ' type: int' ' expr: _col9' ' type: int' ' expr: _col10' ' type: string' ' expr: _col11' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col7, _col8, _col9, _col10, _col11' ' Select Operator' ' expressions:' ' expr: _col0' ' type: int' ' expr: _col1' ' type: int' ' expr: _col2' ' type: int' ' expr: _col3' ' type: string' ' expr: _col4' ' type: string' ' expr: _col7' ' type: int' ' expr: _col8' ' type: int' ' expr: _col9' ' type: int' ' expr: _col10' ' type: string' ' expr: _col11' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 84 rows selected >>> >>> !record