-- SORT_QUERY_RESULTS

create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc;

load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim;

SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
SET mapred.min.split.size=1000;
SET mapred.max.split.size=5000;
SET hive.optimize.index.filter=false;

-- The above table will have 5 splits with the followings stats
--  Stripe 1:
--    Column 0: count: 5000
--    Column 1: count: 5000 min: 2 max: 100 sum: 499902
--    Column 2: count: 5000 min: foo max: zebra sum: 24998
--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
--    Column 4: count: 5000 min: 0 max: 1.2 sum: 1.2
--    Column 5: count: 5000
--  Stripe 2:
--    Column 0: count: 5000
--    Column 1: count: 5000 min: 13 max: 100 sum: 499913
--    Column 2: count: 5000 min: bar max: zebra sum: 24998
--    Column 3: count: 5000 min: 8.0 max: 80.0 sum: 40072.0
--    Column 4: count: 5000 min: 0 max: 2.2 sum: 2.2
--    Column 5: count: 5000
--  Stripe 3:
--    Column 0: count: 5000
--    Column 1: count: 5000 min: 29 max: 100 sum: 499929
--    Column 2: count: 5000 min: cat max: zebra sum: 24998
--    Column 3: count: 5000 min: 8.0 max: 8.0 sum: 40000.0
--    Column 4: count: 5000 min: 0 max: 3.3 sum: 3.3
--    Column 5: count: 5000
--  Stripe 4:
--    Column 0: count: 5000
--    Column 1: count: 5000 min: 70 max: 100 sum: 499970
--    Column 2: count: 5000 min: dog max: zebra sum: 24998
--    Column 3: count: 5000 min: 1.8 max: 8.0 sum: 39993.8
--    Column 4: count: 5000 min: 0 max: 4.4 sum: 4.4
--    Column 5: count: 5000
--  Stripe 5:
--    Column 0: count: 5000
--    Column 1: count: 5000 min: 5 max: 100 sum: 499905
--    Column 2: count: 5000 min: eat max: zebra sum: 24998
--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
--    Column 4: count: 5000 min: 0 max: 5.5 sum: 5.5
--    Column 5: count: 5000

-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;

SET hive.optimize.index.filter=true;
-- 0 mapper
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
SET hive.optimize.index.filter=false;

-- 5 mappers. count should be 0
select count(*) from orc_split_elim where userid<=0;

SET hive.optimize.index.filter=true;
-- 0 mapper
select count(*) from orc_split_elim where userid<=0;
SET hive.optimize.index.filter=false;

-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2;

SET hive.optimize.index.filter=true;
-- 1 mapper
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2;
SET hive.optimize.index.filter=false;

-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5;

SET hive.optimize.index.filter=true;
-- 2 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5;
SET hive.optimize.index.filter=false;

-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13;

SET hive.optimize.index.filter=true;
-- 3 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13;
SET hive.optimize.index.filter=false;

-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29;

SET hive.optimize.index.filter=true;
-- 4 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29;
SET hive.optimize.index.filter=false;

-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70;

SET hive.optimize.index.filter=true;
-- 5 mappers
select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70;
SET hive.optimize.index.filter=false;

-- partitioned table
create table orc_split_elim_part (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) partitioned by (country string, year int) stored as orc;

alter table orc_split_elim_part add partition(country='us', year=2000);
alter table orc_split_elim_part add partition(country='us', year=2001);

load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2000);
load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim_part partition(country='us', year=2001);

-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us';

SET hive.optimize.index.filter=true;
-- 2 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us';
SET hive.optimize.index.filter=false;

-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001);

SET hive.optimize.index.filter=true;
-- 2 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and (year=2000 or year=2001);
SET hive.optimize.index.filter=false;

-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000;

SET hive.optimize.index.filter=true;
-- 1 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=2 and country='us' and year=2000;
SET hive.optimize.index.filter=false;

-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us';

SET hive.optimize.index.filter=true;
-- 4 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us';
SET hive.optimize.index.filter=false;

-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001);

SET hive.optimize.index.filter=true;
-- 4 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and (year=2000 or year=2001);
SET hive.optimize.index.filter=false;

-- 10 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000;

SET hive.optimize.index.filter=true;
-- 2 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=5 and country='us' and year=2000;
SET hive.optimize.index.filter=false;

-- 0 mapper - no split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in';
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002;

SET hive.optimize.index.filter=true;
-- 0 mapper - split elimination
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='in';
select userid,string1,subtype,decimal1,ts from orc_split_elim_part where userid<=70 and country='us' and year=2002;
SET hive.optimize.index.filter=false;