-- union10.q

 
-- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by filesink

create table tmptable(key string, value int);

explain 
insert overwrite table tmptable
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                            select 'tst2' as key, count(1) as value from src s2
                                        UNION DISTINCT
                                            select 'tst3' as key, count(1) as value from src s3) unionsrc;


insert overwrite table tmptable
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                            select 'tst2' as key, count(1) as value from src s2
                                        UNION DISTINCT
                                            select 'tst3' as key, count(1) as value from src s3) unionsrc;


select * from tmptable x sort by x.key;

-- union11.q

 
-- SORT_BEFORE_DIFF
-- union case: all subqueries are a map-reduce jobs, 3 way union, same input for all sub-queries, followed by reducesink

explain 
  select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                            select 'tst2' as key, count(1) as value from src s2
                                        UNION DISTINCT
                                            select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key;


  select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                            select 'tst2' as key, count(1) as value from src s2
                                        UNION DISTINCT
                                            select 'tst3' as key, count(1) as value from src s3) unionsrc group by unionsrc.key;


-- union12.q

 
-- union case: all subqueries are a map-reduce jobs, 3 way union, different inputs for all sub-queries, followed by filesink

create table tmptable12(key string, value int);

explain 
insert overwrite table tmptable12
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                            select 'tst2' as key, count(1) as value from src1 s2
                                        UNION DISTINCT
                                            select 'tst3' as key, count(1) as value from srcbucket s3) unionsrc;


insert overwrite table tmptable12
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                            select 'tst2' as key, count(1) as value from src1 s2
                                        UNION DISTINCT
                                            select 'tst3' as key, count(1) as value from srcbucket s3) unionsrc;

select * from tmptable12 x sort by x.key;
-- union13.q

-- SORT_BEFORE_DIFF
-- union case: both subqueries are a map-only jobs, same input, followed by filesink

explain 
  select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                                            select s2.key as key, s2.value as value from src s2) unionsrc;

select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                                          select s2.key as key, s2.value as value from src s2) unionsrc;
-- union14.q

 
-- SORT_BEFORE_DIFF
-- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink

explain 
  select unionsrc.key, count(1) FROM (select s2.key as key, s2.value as value from src1 s2
                                        UNION DISTINCT  
                                      select 'tst1' as key, cast(count(1) as string) as value from src s1) 
  unionsrc group by unionsrc.key;


  select unionsrc.key, count(1) FROM (select s2.key as key, s2.value as value from src1 s2
                                        UNION DISTINCT  
                                      select 'tst1' as key, cast(count(1) as string) as value from src s1) 
  unionsrc group by unionsrc.key;
-- union15.q

 
-- SORT_BEFORE_DIFF
-- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink

explain 
  select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                        UNION DISTINCT  
                                            select s2.key as key, s2.value as value from src1 s2
                                        UNION DISTINCT  
                                            select s3.key as key, s3.value as value from src1 s3) unionsrc group by unionsrc.key;

  select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                        UNION DISTINCT  
                                            select s2.key as key, s2.value as value from src1 s2
                                        UNION DISTINCT  
                                            select s3.key as key, s3.value as value from src1 s3) unionsrc group by unionsrc.key;


-- union16.q

-- SORT_BEFORE_DIFF
EXPLAIN
SELECT count(1) FROM (
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src) src;


SELECT count(1) FROM (
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT

  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src UNION DISTINCT
  SELECT key, value FROM src) src;
-- union17.q

CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE;
CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE;

-- SORT_BEFORE_DIFF
-- union case:map-reduce sub-queries followed by multi-table insert

explain 
FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2) unionsrc
INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value;

FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2) unionsrc
INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value;

SELECT DEST1.* FROM DEST1;
SELECT DEST2.* FROM DEST2;
-- union18.q

CREATE TABLE DEST118(key STRING, value STRING) STORED AS TEXTFILE;
CREATE TABLE DEST218(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE;

-- union case:map-reduce sub-queries followed by multi-table insert 

explain 
FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2) unionsrc
INSERT OVERWRITE TABLE DEST118 SELECT unionsrc.key, unionsrc.value
INSERT OVERWRITE TABLE DEST218 SELECT unionsrc.key, unionsrc.value, unionsrc.value;

FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2) unionsrc
INSERT OVERWRITE TABLE DEST118 SELECT unionsrc.key, unionsrc.value
INSERT OVERWRITE TABLE DEST218 SELECT unionsrc.key, unionsrc.value, unionsrc.value;

SELECT DEST118.* FROM DEST118 SORT BY DEST118.key, DEST118.value;
SELECT DEST218.* FROM DEST218 SORT BY DEST218.key, DEST218.val1, DEST218.val2;
-- union19.q


CREATE TABLE DEST119(key STRING, value STRING) STORED AS TEXTFILE;
CREATE TABLE DEST219(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE;

-- union case:map-reduce sub-queries followed by multi-table insert

explain 
FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2) unionsrc
INSERT OVERWRITE TABLE DEST119 SELECT unionsrc.key, count(unionsrc.value) group by unionsrc.key
INSERT OVERWRITE TABLE DEST219 SELECT unionsrc.key, unionsrc.value, unionsrc.value;

FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2) unionsrc
INSERT OVERWRITE TABLE DEST119 SELECT unionsrc.key, count(unionsrc.value) group by unionsrc.key
INSERT OVERWRITE TABLE DEST219 SELECT unionsrc.key, unionsrc.value, unionsrc.value;

SELECT DEST119.* FROM DEST119 SORT BY DEST119.key, DEST119.value;
SELECT DEST219.* FROM DEST219 SORT BY DEST219.key, DEST219.val1, DEST219.val2;


-- union2.q

-- SORT_BEFORE_DIFF
-- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink

explain 
  select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                        select s2.key as key, s2.value as value from src s2) unionsrc;

select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                      select s2.key as key, s2.value as value from src s2) unionsrc;
-- union20.q

-- SORT_BEFORE_DIFF
-- union :map-reduce sub-queries followed by join

explain 
SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value
FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2 where s2.key < 10) unionsrc1 
JOIN 
     (select 'tst1' as key, cast(count(1) as string) as value from src s3
                         UNION DISTINCT  
      select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2
ON (unionsrc1.key = unionsrc2.key);

SELECT unionsrc1.key, unionsrc1.value, unionsrc2.key, unionsrc2.value
FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                         UNION DISTINCT  
      select s2.key as key, s2.value as value from src s2 where s2.key < 10) unionsrc1 
JOIN 
     (select 'tst1' as key, cast(count(1) as string) as value from src s3
                         UNION DISTINCT  
      select s4.key as key, s4.value as value from src s4 where s4.key < 10) unionsrc2
ON (unionsrc1.key = unionsrc2.key);
-- union21.q

-- SORT_BEFORE_DIFF
-- union of constants, udf outputs, and columns from text table and thrift table

explain
SELECT key, count(1)
FROM (
  SELECT '1' as key from src
  UNION DISTINCT
  SELECT reverse(key) as key from src
  UNION DISTINCT
  SELECT key as key from src
  UNION DISTINCT
  SELECT astring as key from src_thrift
  UNION DISTINCT
  SELECT lstring[0] as key from src_thrift
) union_output
GROUP BY key;

SELECT key, count(1)
FROM (
  SELECT '1' as key from src
  UNION DISTINCT
  SELECT reverse(key) as key from src
  UNION DISTINCT
  SELECT key as key from src
  UNION DISTINCT
  SELECT astring as key from src_thrift
  UNION DISTINCT
  SELECT lstring[0] as key from src_thrift
) union_output
GROUP BY key;
-- union22.q

-- SORT_QUERY_RESULTS

create table dst_union22(k1 string, k2 string, k3 string, k4 string) partitioned by (ds string);
create table dst_union22_delta(k0 string, k1 string, k2 string, k3 string, k4 string, k5 string) partitioned by (ds string);

insert overwrite table dst_union22 partition (ds='1')
select key, value, key , value from src;

insert overwrite table dst_union22_delta partition (ds='1')
select key, key, value, key, value, value from src;

set hive.merge.mapfiles=false;

set hive.auto.convert.join=true;
set hive.auto.convert.join.noconditionaltask=true;
set hive.auto.convert.join.noconditionaltask.size=10000;

-- Since the inputs are small, it should be automatically converted to mapjoin

explain extended
insert overwrite table dst_union22 partition (ds='2')
select * from
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
UNION DISTINCT
select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
)
subq;

insert overwrite table dst_union22 partition (ds='2')
select * from
(
select k1 as k1, k2 as k2, k3 as k3, k4 as k4 from dst_union22_delta where ds = '1' and k0 <= 50
UNION DISTINCT
select a.k1 as k1, a.k2 as k2, b.k3 as k3, b.k4 as k4
from dst_union22 a left outer join (select * from dst_union22_delta where ds = '1' and k0 > 50) b on
a.k1 = b.k1 and a.ds='1'
where a.k1 > 20
)
subq;

select * from dst_union22 where ds = '2';
-- union23.q

explain
select s.key2, s.value2
from (
  select transform(key, value) using 'cat' as (key2, value2)
  from src
  UNION DISTINCT 
  select key as key2, value as value2 from src) s
order by s.key2, s.value2;

select s.key2, s.value2
from (
  select transform(key, value) using 'cat' as (key2, value2)
  from src
  UNION DISTINCT 
  select key as key2, value as value2 from src) s
order by s.key2, s.value2;

-- union24.q

-- SORT_QUERY_RESULTS

create table src2 as select key, count(1) as count from src group by key;
create table src3 as select * from src2;
create table src4 as select * from src2;
create table src5 as select * from src2;


set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;


explain extended
select s.key, s.count from (
  select key, count from src2  where key < 10
  UNION DISTINCT
  select key, count from src3  where key < 10
  UNION DISTINCT
  select key, count from src4  where key < 10
  UNION DISTINCT
  select key, count(1) as count from src5 where key < 10 group by key
)s
;

select s.key, s.count from (
  select key, count from src2  where key < 10
  UNION DISTINCT
  select key, count from src3  where key < 10
  UNION DISTINCT
  select key, count from src4  where key < 10
  UNION DISTINCT
  select key, count(1) as count from src5 where key < 10 group by key
)s
;

explain extended
select s.key, s.count from (
  select key, count from src2  where key < 10
  UNION DISTINCT
  select key, count from src3  where key < 10
  UNION DISTINCT
  select a.key as key, b.count as count from src4 a join src5 b on a.key=b.key where a.key < 10
)s
;

select s.key, s.count from (
  select key, count from src2  where key < 10
  UNION DISTINCT
  select key, count from src3  where key < 10
  UNION DISTINCT
  select a.key as key, b.count as count from src4 a join src5 b on a.key=b.key where a.key < 10
)s
;

explain extended
select s.key, s.count from (
  select key, count from src2  where key < 10
  UNION DISTINCT
  select key, count from src3  where key < 10
  UNION DISTINCT
  select a.key as key, count(1) as count from src4 a join src5 b on a.key=b.key where a.key < 10 group by a.key
)s
;

select s.key, s.count from (
  select key, count from src2  where key < 10
  UNION DISTINCT
  select key, count from src3  where key < 10
  UNION DISTINCT
  select a.key as key, count(1) as count from src4 a join src5 b on a.key=b.key where a.key < 10 group by a.key
)s
;
-- union25.q

create table tmp_srcpart like srcpart;

insert overwrite table tmp_srcpart partition (ds='2008-04-08', hr='11')
select key, value from srcpart where ds='2008-04-08' and hr='11';

explain
create table tmp_unionall as
SELECT count(1) as counts, key, value
FROM
(
  SELECT key, value FROM srcpart a WHERE a.ds='2008-04-08' and a.hr='11'

    UNION DISTINCT

  SELECT key, key as value FROM (
    SELECT distinct key FROM (
      SELECT key, value FROM tmp_srcpart a WHERE a.ds='2008-04-08' and a.hr='11'
        UNION DISTINCT
      SELECT key, value FROM tmp_srcpart b WHERE b.ds='2008-04-08' and b.hr='11'
    )t
  ) master_table
) a GROUP BY key, value
;
-- union26.q

-- SORT_QUERY_RESULTS

EXPLAIN
SELECT 
count(1) as counts,
key,
value
FROM
(

SELECT
a.key, a.value
FROM srcpart a JOIN srcpart b 
ON a.ds='2008-04-08' and a.hr='11' and b.ds='2008-04-08' and b.hr='12'
AND a.key = b.key 

UNION DISTINCT

select key, value 
FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol
WHERE ds='2008-04-08' and hr='11'
) a
group by key, value
;

SELECT 
count(1) as counts,
key,
value
FROM
(

SELECT
a.key, a.value
FROM srcpart a JOIN srcpart b 
ON a.ds='2008-04-08' and a.hr='11' and b.ds='2008-04-08' and b.hr='12'
AND a.key = b.key 

UNION DISTINCT

select key, value 
FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol
WHERE ds='2008-04-08' and hr='11'
) a
group by key, value
;


SELECT 
count(1) as counts,
key,
value
FROM
(

SELECT
a.key, a.value
FROM srcpart a JOIN srcpart b 
ON a.ds='2008-04-08' and a.hr='11' and b.ds='2008-04-08' and b.hr='12'
AND a.key = b.key 

UNION DISTINCT

select key, value 
FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol
WHERE ds='2008-04-08' and hr='11'
) a
group by key, value
;

SELECT 
count(1) as counts,
key,
value
FROM
(

SELECT
a.key, a.value
FROM srcpart a JOIN srcpart b 
ON a.ds='2008-04-08' and a.hr='11' and b.ds='2008-04-08' and b.hr='12'
AND a.key = b.key 

UNION DISTINCT

select key, value 
FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol
WHERE ds='2008-04-08' and hr='11'
) a
group by key, value
;
-- union27.q

-- SORT_BEFORE_DIFF
create table jackson_sev_same as select * from src;
create table dim_pho as select * from src;
create table jackson_sev_add as select * from src;
explain select b.* from jackson_sev_same a join (select * from dim_pho UNION DISTINCT select * from jackson_sev_add)b on a.key=b.key and b.key=97;
select b.* from jackson_sev_same a join (select * from dim_pho UNION DISTINCT select * from jackson_sev_add)b on a.key=b.key and b.key=97;
-- union28.q

create table union_subq_union(key int, value string);

explain
insert overwrite table union_subq_union 
select * from (
  select key, value from src 
  UNION DISTINCT 
  select key, value from 
  (
    select key, value, count(1) from src group by key, value
    UNION DISTINCT
    select key, value, count(1) from src group by key, value
  ) subq
) a
;

insert overwrite table union_subq_union 
select * from (
  select key, value from src 
  UNION DISTINCT 
  select key, value from 
  (
    select key, value, count(1) from src group by key, value
    UNION DISTINCT
    select key, value, count(1) from src group by key, value
  ) subq
) a
;

select * from union_subq_union order by key, value limit 20;
-- union29.q

create table union_subq_union29(key int, value string);

explain
insert overwrite table union_subq_union29 
select * from (
  select key, value from src 
  UNION DISTINCT 
  select key, value from 
  (
    select key, value from src 
    UNION DISTINCT
    select key, value from src
  ) subq
) a
;

insert overwrite table union_subq_union29 
select * from (
  select key, value from src 
  UNION DISTINCT 
  select key, value from 
  (
    select key, value from src 
    UNION DISTINCT
    select key, value from src
  ) subq
) a
;

select * from union_subq_union29 order by key, value limit 20;
-- union3.q

-- SORT_BEFORE_DIFF

explain
SELECT *
FROM (
  SELECT 1 AS id
  FROM (SELECT * FROM src LIMIT 1) s1
  UNION DISTINCT
  SELECT 2 AS id
  FROM (SELECT * FROM src LIMIT 1) s1
  UNION DISTINCT
  SELECT 3 AS id
  FROM (SELECT * FROM src LIMIT 1) s2
  UNION DISTINCT
  SELECT 4 AS id
  FROM (SELECT * FROM src LIMIT 1) s2
  CLUSTER BY id
) a;


CREATE TABLE union_out (id int);

insert overwrite table union_out 
SELECT *
FROM (
  SELECT 1 AS id
  FROM (SELECT * FROM src LIMIT 1) s1
  UNION DISTINCT
  SELECT 2 AS id
  FROM (SELECT * FROM src LIMIT 1) s1
  UNION DISTINCT
  SELECT 3 AS id
  FROM (SELECT * FROM src LIMIT 1) s2
  UNION DISTINCT
  SELECT 4 AS id
  FROM (SELECT * FROM src LIMIT 1) s2
  CLUSTER BY id
) a;

select * from union_out;
-- union30.q

create table union_subq_union30(key int, value string);

explain
insert overwrite table union_subq_union30 
select * from (

select * from (
  select key, value from src 
  UNION DISTINCT 
  select key, value from 
  (
    select key, value, count(1) from src group by key, value
    UNION DISTINCT
    select key, value, count(1) from src group by key, value
  ) subq
) a

UNION DISTINCT

select key, value from src
) aa
;

insert overwrite table union_subq_union30 
select * from (

select * from (
  select key, value from src 
  UNION DISTINCT 
  select key, value from 
  (
    select key, value, count(1) from src group by key, value
    UNION DISTINCT
    select key, value, count(1) from src group by key, value
  ) subq
) a

UNION DISTINCT

select key, value from src
) aa
;

select * from union_subq_union30 order by key, value limit 20;
-- union31.q

-- SORT_QUERY_RESULTS

drop table t1;
drop table t2;


create table t1 as select * from src where key < 10;
create table t2 as select * from src where key < 10;

create table t3(key string, cnt int);
create table t4(value string, cnt int);

explain
from
(select * from t1
 UNION DISTINCT
 select * from t2
) x
insert overwrite table t3
  select key, count(1) group by key
insert overwrite table t4
  select value, count(1) group by value;

from
(select * from t1
 UNION DISTINCT
 select * from t2
) x
insert overwrite table t3
  select key, count(1) group by key
insert overwrite table t4
  select value, count(1) group by value;

select * from t3;
select * from t4;

create table t5(c1 string, cnt int);
create table t6(c1 string, cnt int);

explain
from
(
 select key as c1, count(1) as cnt from t1 group by key
   UNION DISTINCT
 select key as c1, count(1) as cnt from t2 group by key
) x
insert overwrite table t5
  select c1, sum(cnt) group by c1
insert overwrite table t6
  select c1, sum(cnt) group by c1;

from
(
 select key as c1, count(1) as cnt from t1 group by key
   UNION DISTINCT
 select key as c1, count(1) as cnt from t2 group by key
) x
insert overwrite table t5
  select c1, sum(cnt) group by c1
insert overwrite table t6
  select c1, sum(cnt) group by c1;

select * from t5;
select * from t6;

drop table t1;
drop table t2;

create table t1 as select * from src where key < 10;
create table t2 as select key, count(1) as cnt from src where key < 10 group by key;

create table t7(c1 string, cnt int);
create table t8(c1 string, cnt int);

explain
from
(
 select key as c1, count(1) as cnt from t1 group by key
   UNION DISTINCT
 select key as c1, cnt from t2
) x
insert overwrite table t7
  select c1, count(1) group by c1
insert overwrite table t8
  select c1, count(1) group by c1;

from
(
 select key as c1, count(1) as cnt from t1 group by key
   UNION DISTINCT
 select key as c1, cnt from t2
) x
insert overwrite table t7
  select c1, count(1) group by c1
insert overwrite table t8
  select c1, count(1) group by c1;

select * from t7;
select * from t8;
-- union32.q

-- SORT_QUERY_RESULTS

-- This tests various union queries which have columns on one side of the query
-- being of double type and those on the other side another

drop table if exists t1;

drop table if exists t2;

CREATE TABLE t1 AS SELECT * FROM src WHERE key < 10;
CREATE TABLE t2 AS SELECT * FROM src WHERE key < 10;

-- Test simple union with double
EXPLAIN
SELECT * FROM 
(SELECT CAST(key AS DOUBLE) AS key FROM t1
UNION DISTINCT
SELECT CAST(key AS BIGINT) AS key FROM t2) a
ORDER BY key;

SELECT * FROM 
(SELECT CAST(key AS DOUBLE) AS key FROM t1
UNION DISTINCT
SELECT CAST(key AS BIGINT) AS key FROM t2) a
;

-- Test union with join on the left
EXPLAIN
SELECT * FROM 
(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key FROM t2) a
;

SELECT * FROM 
(SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key FROM t2) a
;

-- Test union with join on the right
EXPLAIN
SELECT * FROM 
(SELECT CAST(key AS DOUBLE) AS key FROM t2
UNION DISTINCT
SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a
;

SELECT * FROM 
(SELECT CAST(key AS DOUBLE) AS key FROM t2
UNION DISTINCT
SELECT CAST(a.key AS BIGINT) AS key FROM t1 a JOIN t2 b ON a.key = b.key) a
;

-- Test union with join on the left selecting multiple columns
EXPLAIN
SELECT * FROM 
(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
;

SELECT * FROM 
(SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key
UNION DISTINCT
SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2) a
;

-- Test union with join on the right selecting multiple columns
EXPLAIN
SELECT * FROM 
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
;

SELECT * FROM 
(SELECT CAST(key AS DOUBLE) AS key, CAST(key AS STRING) AS value FROM t2
UNION DISTINCT
SELECT CAST(a.key AS BIGINT) AS key, CAST(b.key AS DOUBLE) AS value FROM t1 a JOIN t2 b ON a.key = b.key) a
;
-- union33.q

-- SORT_BEFORE_DIFF
-- This tests that a UNION DISTINCT with a map only subquery on one side and a 
-- subquery involving two map reduce jobs on the other runs correctly.

drop table if exists test_src;

CREATE TABLE test_src (key STRING, value STRING);

EXPLAIN INSERT OVERWRITE TABLE test_src 
SELECT key, value FROM (
	SELECT key, value FROM src 
	WHERE key = 0
UNION DISTINCT
 	SELECT key, COUNT(*) AS value FROM src
 	GROUP BY key
)a;
 
INSERT OVERWRITE TABLE test_src 
SELECT key, value FROM (
	SELECT key, value FROM src 
	WHERE key = 0
UNION DISTINCT
 	SELECT key, COUNT(*) AS value FROM src
 	GROUP BY key
)a;
 
SELECT COUNT(*) FROM test_src;
 
EXPLAIN INSERT OVERWRITE TABLE test_src 
SELECT key, value FROM (
	SELECT key, COUNT(*) AS value FROM src
 	GROUP BY key
UNION DISTINCT
 	SELECT key, value FROM src 
	WHERE key = 0
)a;
 
INSERT OVERWRITE TABLE test_src 
SELECT key, value FROM (
	SELECT key, COUNT(*) AS value FROM src
 	GROUP BY key
UNION DISTINCT
 	SELECT key, value FROM src 
	WHERE key = 0
)a;
 
SELECT COUNT(*) FROM test_src;
 -- union34.q

create table src10_1 (key string, value string);
create table src10_2 (key string, value string);
create table src10_3 (key string, value string);
create table src10_4 (key string, value string);

from (select * from src tablesample (10 rows)) a
insert overwrite table src10_1 select *
insert overwrite table src10_2 select *
insert overwrite table src10_3 select *
insert overwrite table src10_4 select *;

set hive.auto.convert.join=true;
-- When we convert the Join of sub1 and sub0 into a MapJoin,
-- we can use a single MR job to evaluate this entire query.
explain
SELECT * FROM (
  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
  UNION DISTINCT
  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0
) alias1 order by key;

SELECT * FROM (
  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
  UNION DISTINCT
  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0
) alias1 order by key;

set hive.auto.convert.join=false;
-- When we do not convert the Join of sub1 and sub0 into a MapJoin,
-- we need to use two MR jobs to evaluate this query.
-- The first job is for the Join of sub1 and sub2. The second job
-- is for the UNION DISTINCT and ORDER BY.
explain
SELECT * FROM (
  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
  UNION DISTINCT
  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0
) alias1 order by key;

SELECT * FROM (
  SELECT sub1.key,sub1.value FROM (SELECT * FROM src10_1) sub1 JOIN (SELECT * FROM src10_2) sub0 ON (sub0.key = sub1.key)
  UNION DISTINCT
  SELECT key,value FROM (SELECT * FROM (SELECT * FROM src10_3) sub2 UNION DISTINCT SELECT * FROM src10_4 ) alias0
) alias1 order by key;
-- union4.q

 
-- union case: both subqueries are map-reduce jobs on same input, followed by filesink

drop table if exists tmptable;

create table tmptable(key string, value int);

explain 
insert overwrite table tmptable
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                            select 'tst2' as key, count(1) as value from src s2) unionsrc;

insert overwrite table tmptable
select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1
                                        UNION DISTINCT  
                                          select 'tst2' as key, count(1) as value from src s2) unionsrc;

select * from tmptable x sort by x.key;


-- union5.q

 
-- SORT_BEFORE_DIFF
-- union case: both subqueries are map-reduce jobs on same input, followed by reduce sink

explain 
  select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
                                    UNION DISTINCT  
                                      select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key;

select unionsrc.key, count(1) FROM (select 'tst1' as key, count(1) as value from src s1
                                  UNION DISTINCT  
                                    select 'tst2' as key, count(1) as value from src s2) unionsrc group by unionsrc.key;
-- union6.q

 
-- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by filesink

drop table if exists tmptable;

create table tmptable(key string, value string);

explain 
insert overwrite table tmptable
  select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                        UNION DISTINCT  
                                            select s2.key as key, s2.value as value from src1 s2) unionsrc;

insert overwrite table tmptable
select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                      UNION DISTINCT  
                                          select s2.key as key, s2.value as value from src1 s2) unionsrc;

select * from tmptable x sort by x.key, x.value;


-- union7.q

 
-- SORT_BEFORE_DIFF
-- union case: 1 subquery is a map-reduce job, different inputs for sub-queries, followed by reducesink

explain 
  select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                        UNION DISTINCT  
                                            select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key;

select unionsrc.key, count(1) FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
                                      UNION DISTINCT  
                                    select s2.key as key, s2.value as value from src1 s2) unionsrc group by unionsrc.key;


-- union8.q

-- SORT_BEFORE_DIFF
-- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by filesink

explain 
  select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                                            select s2.key as key, s2.value as value from src s2 UNION DISTINCT  
                                            select s3.key as key, s3.value as value from src s3) unionsrc;

select unionsrc.key, unionsrc.value FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                                          select s2.key as key, s2.value as value from src s2 UNION DISTINCT  
                                          select s3.key as key, s3.value as value from src s3) unionsrc;
-- union9.q

-- SORT_BEFORE_DIFF
-- union case: all subqueries are a map-only jobs, 3 way union, same input for all sub-queries, followed by reducesink

explain 
  select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                        select s2.key as key, s2.value as value from src s2 UNION DISTINCT
                        select s3.key as key, s3.value as value from src s3) unionsrc;

  select count(1) FROM (select s1.key as key, s1.value as value from src s1 UNION DISTINCT  
                        select s2.key as key, s2.value as value from src s2 UNION DISTINCT
                        select s3.key as key, s3.value as value from src s3) unionsrc;