drop table if exists staging;
drop table if exists parquet_jointable1;
drop table if exists parquet_jointable2;
drop table if exists parquet_jointable1_bucketed_sorted;
drop table if exists parquet_jointable2_bucketed_sorted;

create table staging (key int, value string) stored as textfile;
insert into table staging select distinct key, value from src order by key limit 2;

create table parquet_jointable1 stored as parquet as select * from staging;

create table parquet_jointable2 stored as parquet as select key,key+1,concat(value,"value") as myvalue from staging;

-- SORT_QUERY_RESULTS

-- MR join

explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key;
select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key;

set hive.auto.convert.join=true;

-- The two tables involved in the join have differing number of columns(table1-2,table2-3). In case of Map and SMB join,
-- when the second table is loaded, the column indices in hive.io.file.readcolumn.ids refer to columns of both the first and the second table
-- and hence the parquet schema/types passed to ParquetInputSplit should contain only the column indexes belonging to second/current table

-- Map join

explain select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key;
select p2.myvalue from parquet_jointable1 p1 join parquet_jointable2 p2 on p1.key=p2.key;

set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;
set hive.auto.convert.sortmerge.join=true;
set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat;

-- SMB join

create table parquet_jointable1_bucketed_sorted (key int,value string) clustered by (key) sorted by (key ASC) INTO 1 BUCKETS stored as parquet;
insert overwrite table parquet_jointable1_bucketed_sorted select key,concat(value,"value1") as value from staging cluster by key;
create table parquet_jointable2_bucketed_sorted (key int,value1 string, value2 string) clustered by (key) sorted by (key ASC) INTO 1 BUCKETS stored as parquet;
insert overwrite table parquet_jointable2_bucketed_sorted select key,concat(value,"value2-1") as value1,concat(value,"value2-2") as value2 from staging cluster by key;
explain select p1.value,p2.value2 from parquet_jointable1_bucketed_sorted p1 join parquet_jointable2_bucketed_sorted p2 on p1.key=p2.key;
select p1.value,p2.value2 from parquet_jointable1_bucketed_sorted p1 join parquet_jointable2_bucketed_sorted p2 on p1.key=p2.key;