-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements.  See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License") you may not use this file except in compliance with
-- the License.  You may obtain a copy of the License at
--
--     http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
DROP TABLE hbase_table_1

CREATE TABLE hbase_table_1(key int, value string) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")


DESCRIBE EXTENDED hbase_table_1
key	int	from deserializer
value	string	from deserializer
	 	 
Detailed Table Information	Table(tableName:hbase_table_1, dbName:default, owner:testuser1@MINOTAUR.CLOUDERA.COM, createTime:1301900428, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:hdfs://minotaur01.sf.cloudera.com:17020/user/hive/warehouse/hbase_table_1, inputFormat:org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat, outputFormat:org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.hbase.HBaseSerDe, parameters:{serialization.format=1, hbase.columns.mapping=cf:string}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[], parameters:{hbase.table.name=hbase_table_0, transient_lastDdlTime=1301900428, storage_handler=org.apache.hadoop.hive.hbase.HBaseStorageHandler}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	


select * from hbase_table_1


EXPLAIN FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (% (TOK_TABLE_OR_COL key) 2) 0))))

STAGE DEPENDENCIES:
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-0
    Map Reduce
      Alias -> Map Operator Tree:
        src 
          TableScan
            alias: src
            Filter Operator
              predicate:
                  expr: ((key % 2) = 0)
                  type: boolean
              Filter Operator
                predicate:
                    expr: ((key % 2) = 0)
                    type: boolean
                Select Operator
                  expressions:
                        expr: key
                        type: string
                        expr: value
                        type: string
                  outputColumnNames: _col0, _col1
                  Select Operator
                    expressions:
                          expr: UDFToInteger(_col0)
                          type: int
                          expr: _col1
                          type: string
                    outputColumnNames: _col0, _col1
                    File Output Operator
                      compressed: false
                      GlobalTableId: 1
                      table:
                          input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat
                          output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat
                          serde: org.apache.hadoop.hive.hbase.HBaseSerDe
                          name: default.hbase_table_1


FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0


DROP TABLE hbase_table_2

CREATE EXTERNAL TABLE hbase_table_2(key int, value string) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")
TBLPROPERTIES ("hbase.table.name" = "hbase_table_0")


EXPLAIN 
SELECT Y.* 
FROM 
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN 
(SELECT src.* FROM src) Y
ON (x.key = Y.key)
ORDER BY key, value LIMIT 20
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 20)))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        x:hbase_table_1 
          TableScan
            alias: hbase_table_1
            Select Operator
              expressions:
                    expr: key
                    type: int
              outputColumnNames: _col0
              Reduce Output Operator
                key expressions:
                      expr: UDFToDouble(_col0)
                      type: double
                sort order: +
                Map-reduce partition columns:
                      expr: UDFToDouble(_col0)
                      type: double
                tag: 0
        y:src 
          TableScan
            alias: src
            Select Operator
              expressions:
                    expr: key
                    type: string
                    expr: value
                    type: string
              outputColumnNames: _col0, _col1
              Reduce Output Operator
                key expressions:
                      expr: UDFToDouble(_col0)
                      type: double
                sort order: +
                Map-reduce partition columns:
                      expr: UDFToDouble(_col0)
                      type: double
                tag: 1
                value expressions:
                      expr: _col0
                      type: string
                      expr: _col1
                      type: string
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          condition expressions:
            0 
            1 {VALUE._col0} {VALUE._col1}
          handleSkewJoin: false
          outputColumnNames: _col2, _col3
          Select Operator
            expressions:
                  expr: _col2
                  type: string
                  expr: _col3
                  type: string
            outputColumnNames: _col0, _col1
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

  Stage: Stage-2
    Map Reduce
      Alias -> Map Operator Tree:
        hdfs://minotaur01.sf.cloudera.com:17020/tmp/hive-testuser1/hive_2011-04-04_00-00-37_447_311787050586995300/-mr-10002 
            Reduce Output Operator
              key expressions:
                    expr: _col0
                    type: string
                    expr: _col1
                    type: string
              sort order: ++
              tag: -1
              value expressions:
                    expr: _col0
                    type: string
                    expr: _col1
                    type: string
      Reduce Operator Tree:
        Extract
          Limit
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.TextInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: 20


SELECT Y.* 
FROM 
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN 
(SELECT src.* FROM src) Y
ON (x.key = Y.key)
ORDER BY key, value LIMIT 20
0	val_0
0	val_0
0	val_0
10	val_10
100	val_100
100	val_100
104	val_104
104	val_104
114	val_114
116	val_116
118	val_118
118	val_118
12	val_12
12	val_12
120	val_120
120	val_120
126	val_126
128	val_128
128	val_128
128	val_128


EXPLAIN 
SELECT Y.*
FROM 
(SELECT hbase_table_1.* FROM hbase_table_1 WHERE hbase_table_1.key > 100) x
JOIN 
(SELECT hbase_table_2.* FROM hbase_table_2 WHERE hbase_table_2.key < 120) Y
ON (x.key = Y.key)
ORDER BY key, value
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL hbase_table_1) key) 100)))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_2)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL hbase_table_2) key) 120)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value)))))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-2 depends on stages: Stage-1
  Stage-0 is a root stage

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        x:hbase_table_1 
          TableScan
            alias: hbase_table_1
            Filter Operator
              predicate:
                  expr: (key > 100)
                  type: boolean
              Filter Operator
                predicate:
                    expr: (key > 100)
                    type: boolean
                Select Operator
                  expressions:
                        expr: key
                        type: int
                  outputColumnNames: _col0
                  Reduce Output Operator
                    key expressions:
                          expr: _col0
                          type: int
                    sort order: +
                    Map-reduce partition columns:
                          expr: _col0
                          type: int
                    tag: 0
        y:hbase_table_2 
          TableScan
            alias: hbase_table_2
            Filter Operator
              predicate:
                  expr: (key < 120)
                  type: boolean
              Filter Operator
                predicate:
                    expr: (key < 120)
                    type: boolean
                Select Operator
                  expressions:
                        expr: key
                        type: int
                        expr: value
                        type: string
                  outputColumnNames: _col0, _col1
                  Reduce Output Operator
                    key expressions:
                          expr: _col0
                          type: int
                    sort order: +
                    Map-reduce partition columns:
                          expr: _col0
                          type: int
                    tag: 1
                    value expressions:
                          expr: _col0
                          type: int
                          expr: _col1
                          type: string
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          condition expressions:
            0 
            1 {VALUE._col0} {VALUE._col1}
          handleSkewJoin: false
          outputColumnNames: _col2, _col3
          Select Operator
            expressions:
                  expr: _col2
                  type: int
                  expr: _col3
                  type: string
            outputColumnNames: _col0, _col1
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

  Stage: Stage-2
    Map Reduce
      Alias -> Map Operator Tree:
        hdfs://minotaur01.sf.cloudera.com:17020/tmp/hive-testuser1/hive_2011-04-04_00-01-03_920_5397268077686778739/-mr-10002 
            Reduce Output Operator
              key expressions:
                    expr: _col0
                    type: int
                    expr: _col1
                    type: string
              sort order: ++
              tag: -1
              value expressions:
                    expr: _col0
                    type: int
                    expr: _col1
                    type: string
      Reduce Operator Tree:
        Extract
          File Output Operator
            compressed: false
            GlobalTableId: 0
            table:
                input format: org.apache.hadoop.mapred.TextInputFormat
                output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

  Stage: Stage-0
    Fetch Operator
      limit: -1


SELECT Y.*
FROM 
(SELECT hbase_table_1.* FROM hbase_table_1 WHERE hbase_table_1.key > 100) x
JOIN 
(SELECT hbase_table_2.* FROM hbase_table_2 WHERE hbase_table_2.key < 120) Y
ON (x.key = Y.key)
ORDER BY key,value
104	val_104
114	val_114
116	val_116
118	val_118


DROP TABLE empty_hbase_table

CREATE TABLE empty_hbase_table(key int, value string) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string")


DROP TABLE empty_normal_table

CREATE TABLE empty_normal_table(key int, value string)


select * from (select count(1) as c from empty_normal_table union all select count(1) as c from empty_hbase_table) x order by c
0
0

select * from (select count(1) c from empty_normal_table union all select count(1) as c from hbase_table_1) x order by c
0
155

select * from (select count(1) c from src union all select count(1) as c from empty_hbase_table) x order by c
0
500

select * from (select count(1) c from src union all select count(1) as c from hbase_table_1) x order by c
155
500


CREATE TABLE hbase_table_3(key int, value string, count int) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "cf:val,cf2:count"
)


EXPLAIN 
INSERT OVERWRITE TABLE hbase_table_3
SELECT x.key, x.value, Y.count 
FROM 
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN 
(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y
ON (x.key = Y.key)
ABSTRACT SYNTAX TREE:
  (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) count)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL Y) count)))))

STAGE DEPENDENCIES:
  Stage-1 is a root stage
  Stage-0 depends on stages: Stage-1

STAGE PLANS:
  Stage: Stage-1
    Map Reduce
      Alias -> Map Operator Tree:
        y:src 
          TableScan
            alias: src
            Select Operator
              expressions:
                    expr: key
                    type: string
              outputColumnNames: key
              Group By Operator
                aggregations:
                      expr: count(key)
                bucketGroup: false
                keys:
                      expr: key
                      type: string
                mode: hash
                outputColumnNames: _col0, _col1
                Reduce Output Operator
                  key expressions:
                        expr: _col0
                        type: string
                  sort order: +
                  Map-reduce partition columns:
                        expr: _col0
                        type: string
                  tag: -1
                  value expressions:
                        expr: _col1
                        type: bigint
      Reduce Operator Tree:
        Group By Operator
          aggregations:
                expr: count(VALUE._col0)
          bucketGroup: false
          keys:
                expr: KEY._col0
                type: string
          mode: mergepartial
          outputColumnNames: _col0, _col1
          Select Operator
            expressions:
                  expr: _col0
                  type: string
                  expr: _col1
                  type: bigint
            outputColumnNames: _col0, _col1
            File Output Operator
              compressed: false
              GlobalTableId: 0
              table:
                  input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                  output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

  Stage: Stage-0
    Map Reduce
      Alias -> Map Operator Tree:
        $INTNAME 
            Reduce Output Operator
              key expressions:
                    expr: UDFToDouble(_col0)
                    type: double
              sort order: +
              Map-reduce partition columns:
                    expr: UDFToDouble(_col0)
                    type: double
              tag: 1
              value expressions:
                    expr: _col1
                    type: bigint
        x:hbase_table_1 
          TableScan
            alias: hbase_table_1
            Select Operator
              expressions:
                    expr: key
                    type: int
                    expr: value
                    type: string
              outputColumnNames: _col0, _col1
              Reduce Output Operator
                key expressions:
                      expr: UDFToDouble(_col0)
                      type: double
                sort order: +
                Map-reduce partition columns:
                      expr: UDFToDouble(_col0)
                      type: double
                tag: 0
                value expressions:
                      expr: _col0
                      type: int
                      expr: _col1
                      type: string
      Reduce Operator Tree:
        Join Operator
          condition map:
               Inner Join 0 to 1
          condition expressions:
            0 {VALUE._col0} {VALUE._col1}
            1 {VALUE._col1}
          handleSkewJoin: false
          outputColumnNames: _col0, _col1, _col3
          Select Operator
            expressions:
                  expr: _col0
                  type: int
                  expr: _col1
                  type: string
                  expr: _col3
                  type: bigint
            outputColumnNames: _col0, _col1, _col2
            Select Operator
              expressions:
                    expr: _col0
                    type: int
                    expr: _col1
                    type: string
                    expr: UDFToInteger(_col2)
                    type: int
              outputColumnNames: _col0, _col1, _col2
              File Output Operator
                compressed: false
                GlobalTableId: 1
                table:
                    input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat
                    output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat
                    serde: org.apache.hadoop.hive.hbase.HBaseSerDe
                    name: default.hbase_table_3


INSERT OVERWRITE TABLE hbase_table_3
SELECT x.key, x.value, Y.count 
FROM 
(SELECT hbase_table_1.* FROM hbase_table_1) x
JOIN 
(SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y
ON (x.key = Y.key)


select count(1) from hbase_table_3
155

select * from hbase_table_3 order by key, value limit 5
0	val_0	3
2	val_2	1
4	val_4	1
8	val_8	1
10	val_10	1

select key, count from hbase_table_3 order by key, count desc limit 5
0	3
2	1
4	1
8	1
10	1


DROP TABLE hbase_table_4

CREATE TABLE hbase_table_4(key int, value1 string, value2 int, value3 int) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "a:b,a:c,d:e"
)


INSERT OVERWRITE TABLE hbase_table_4 SELECT key, value, key+1, key+2 
FROM src WHERE key=98 OR key=100


SELECT * FROM hbase_table_4 ORDER BY key
98	val_98	99	100
100	val_100	101	102


DROP TABLE hbase_table_5

CREATE EXTERNAL TABLE hbase_table_5(key int, value map<string,string>) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = "a:")
TBLPROPERTIES ("hbase.table.name" = "hbase_table_4")


SELECT * FROM hbase_table_5 ORDER BY key
98	{"b":"val_98","c":"99"}
100	{"b":"val_100","c":"101"}


DROP TABLE hbase_table_6

CREATE TABLE hbase_table_6(key int, value map<string,string>) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = ":key,cf:"
)

INSERT OVERWRITE TABLE hbase_table_6 SELECT key, map(value, key) FROM src
WHERE key=98 OR key=100


SELECT * FROM hbase_table_6 ORDER BY key
98	{"val_98":"98"}
100	{"val_100":"100"}


DROP TABLE hbase_table_7

CREATE TABLE hbase_table_7(value map<string,string>, key int) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "cf:,:key"
)

INSERT OVERWRITE TABLE hbase_table_7 
SELECT map(value, key, upper(value), key+1), key FROM src
WHERE key=98 OR key=100


SELECT * FROM hbase_table_7 ORDER BY key
{"VAL_98":"99.0","val_98":"98"}	98
{"VAL_100":"101.0","val_100":"100"}	100
set hive.hbase.wal.enabled=false


DROP TABLE hbase_table_8

CREATE TABLE hbase_table_8(key int, value1 string, value2 int, value3 int) 
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES (
"hbase.columns.mapping" = "a:b,a:c,d:e"
)


INSERT OVERWRITE TABLE hbase_table_8 SELECT key, value, key+1, key+2 
FROM src WHERE key=98 OR key=100


SELECT * FROM hbase_table_8 ORDER BY key
98	val_98	99	100
100	val_100	101	102


DROP TABLE hbase_table_1

DROP TABLE hbase_table_2

DROP TABLE hbase_table_3

DROP TABLE hbase_table_4

DROP TABLE hbase_table_5

DROP TABLE hbase_table_6

DROP TABLE hbase_table_7

DROP TABLE hbase_table_8

DROP TABLE empty_hbase_table

DROP TABLE empty_normal_table