set hive.enforce.bucketing = true; set hive.enforce.sorting = true; set hive.exec.reducers.max = 1; CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS; insert overwrite table tbl1 select * from src where key < 10; insert overwrite table tbl2 select * from src where key < 10; set hive.optimize.bucketmapjoin = true; set hive.optimize.bucketmapjoin.sortedmerge = true; set hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; -- A join is being performed across different sub-queries, where a mapjoin is being performed in each of them. -- Each sub-query should be converted to a sort-merge join. -- A join followed by mapjoin is not allowed, so this query should fail. -- Once HIVE-3403 is in, this should be automatically converted to a sort-merge join without the hint explain select src1.key, src1.cnt1, src2.cnt1 from ( select key, count(*) as cnt1 from ( select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq1 group by key ) src1 join ( select key, count(*) as cnt1 from ( select /*+mapjoin(a)*/ a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key ) subq2 group by key ) src2 on src1.key = src2.key order by src1.key, src1.cnt1, src2.cnt1;