set hive.exec.infer.bucket.sort=true;
set hive.enforce.bucketing = true;
set hive.enforce.sorting = true;

-- This tests inferring how data is bucketed/sorted from the operators in the reducer
-- and populating that information in partitions' metadata, in particular, this tests
-- that operators in the mapper have no effect

CREATE TABLE test_table1 (key STRING, value STRING)
CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS;

CREATE TABLE test_table2 (key STRING, value STRING)
CLUSTERED BY (key) SORTED BY (key DESC) INTO 2 BUCKETS;

INSERT OVERWRITE TABLE test_table1 SELECT key, value FROM src;

INSERT OVERWRITE TABLE test_table2 SELECT key, value FROM src;

CREATE TABLE test_table_out (key STRING, value STRING) PARTITIONED BY (part STRING);

set hive.map.groupby.sorted=true;

-- Test map group by doesn't affect inference, should not be bucketed or sorted
EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') 
SELECT key, count(*) FROM test_table1 GROUP BY key;

INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') 
SELECT key, count(*) FROM test_table1 GROUP BY key;

DESCRIBE FORMATTED test_table_out PARTITION (part = '1');

-- Test map group by doesn't affect inference, should be bucketed and sorted by value
EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') 
SELECT a.key, a.value FROM (
	SELECT key, count(*) AS value FROM test_table1 GROUP BY key
) a JOIN (
 	SELECT key, value FROM src
) b
ON (a.value = b.value);

INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') 
SELECT a.key, a.value FROM (
	SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key
) a JOIN (
 	SELECT key, value FROM src
) b
ON (a.value = b.value);

DESCRIBE FORMATTED test_table_out PARTITION (part = '1');

set hive.map.groupby.sorted=false;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;

-- Test SMB join doesn't affect inference, should not be bucketed or sorted
EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key;

INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key;

DESCRIBE FORMATTED test_table_out PARTITION (part = '1');

-- Test SMB join doesn't affect inference, should be bucketed and sorted by key
EXPLAIN INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
GROUP BY b.value;

INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1')
SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key
GROUP BY b.value;

DESCRIBE FORMATTED test_table_out PARTITION (part = '1');