drop table hbsort;
drop table hbpartition;

-- this is a dummy table used for controlling how the HFiles are
-- created
create table hbsort(key string, val string, val2 string)
stored as
INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat'
TBLPROPERTIES ('hfile.family.path' = '/tmp/hbsort/cf');

-- this is a dummy table used for controlling how the input file
-- for TotalOrderPartitioner is created
create external table hbpartition(part_break string)
row format serde 
'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe'
stored as 
inputformat 
'org.apache.hadoop.mapred.TextInputFormat'
outputformat 
'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat'
location '/tmp/hbpartitions';

-- this should produce one file in /tmp/hbpartitions, but we do not
-- know what it will be called, so we will copy it to a well known
-- filename /tmp/hbpartition.lst
insert overwrite table hbpartition
select distinct value
from src
where value='val_100' or value='val_200';

dfs -count /tmp/hbpartitions;
dfs -cp /tmp/hbpartitions/* /tmp/hbpartition.lst;

set mapred.reduce.tasks=3;
set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner;
set total.order.partitioner.natural.order=false;
set total.order.partitioner.path=/tmp/hbpartition.lst;

-- this should produce three files in /tmp/hbsort/cf
-- include some trailing blanks and nulls to make sure we handle them correctly
insert overwrite table hbsort
select distinct value,
  case when key=103 then cast(null as string) else key end,
  case when key=103 then ''
       else cast(key+1 as string) end
from src
cluster by value;

dfs -count /tmp/hbsort/cf;

-- To get the files out to your local filesystem for loading into
-- HBase, run mkdir -p /tmp/blah/cf, then uncomment and
-- semicolon-terminate the line below before running this test:
-- dfs -copyToLocal /tmp/hbsort/cf/* /tmp/blah/cf

drop table hbsort;
drop table hbpartition;