-- Licensed to the Apache Software Foundation (ASF) under one or more -- contributor license agreements. See the NOTICE file distributed with -- this work for additional information regarding copyright ownership. -- The ASF licenses this file to You under the Apache License, Version 2.0 -- (the "License") you may not use this file except in compliance with -- the License. You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- See the License for the specific language governing permissions and -- limitations under the License. DROP TABLE hbase_table_1 CREATE TABLE hbase_table_1(key int, value string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string") TBLPROPERTIES ("hbase.table.name" = "hbase_table_0") DESCRIBE EXTENDED hbase_table_1 key int from deserializer value string from deserializer Detailed Table Information Table(tableName:hbase_table_1, dbName:default, owner:testuser1@MINOTAUR.CLOUDERA.COM, createTime:1301900428, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:hdfs://minotaur01.sf.cloudera.com:17020/user/hive/warehouse/hbase_table_1, inputFormat:org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat, outputFormat:org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.hbase.HBaseSerDe, parameters:{serialization.format=1, hbase.columns.mapping=cf:string}), bucketCols:[], sortCols:[], parameters:{}), partitionKeys:[], parameters:{hbase.table.name=hbase_table_0, transient_lastDdlTime=1301900428, storage_handler=org.apache.hadoop.hive.hbase.HBaseStorageHandler}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE) select * from hbase_table_1 EXPLAIN FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (% (TOK_TABLE_OR_COL key) 2) 0)))) STAGE DEPENDENCIES: Stage-0 is a root stage STAGE PLANS: Stage: Stage-0 Map Reduce Alias -> Map Operator Tree: src TableScan alias: src Filter Operator predicate: expr: ((key % 2) = 0) type: boolean Filter Operator predicate: expr: ((key % 2) = 0) type: boolean Select Operator expressions: expr: key type: string expr: value type: string outputColumnNames: _col0, _col1 Select Operator expressions: expr: UDFToInteger(_col0) type: int expr: _col1 type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_1 FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 DROP TABLE hbase_table_2 CREATE EXTERNAL TABLE hbase_table_2(key int, value string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string") TBLPROPERTIES ("hbase.table.name" = "hbase_table_0") EXPLAIN SELECT Y.* FROM (SELECT hbase_table_1.* FROM hbase_table_1) x JOIN (SELECT src.* FROM src) Y ON (x.key = Y.key) ORDER BY key, value LIMIT 20 ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME src)))))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))) (TOK_LIMIT 20))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: x:hbase_table_1 TableScan alias: hbase_table_1 Select Operator expressions: expr: key type: int outputColumnNames: _col0 Reduce Output Operator key expressions: expr: UDFToDouble(_col0) type: double sort order: + Map-reduce partition columns: expr: UDFToDouble(_col0) type: double tag: 0 y:src TableScan alias: src Select Operator expressions: expr: key type: string expr: value type: string outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: UDFToDouble(_col0) type: double sort order: + Map-reduce partition columns: expr: UDFToDouble(_col0) type: double tag: 1 value expressions: expr: _col0 type: string expr: _col1 type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: false outputColumnNames: _col2, _col3 Select Operator expressions: expr: _col2 type: string expr: _col3 type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: hdfs://minotaur01.sf.cloudera.com:17020/tmp/hive-testuser1/hive_2011-04-04_00-00-37_447_311787050586995300/-mr-10002 Reduce Output Operator key expressions: expr: _col0 type: string expr: _col1 type: string sort order: ++ tag: -1 value expressions: expr: _col0 type: string expr: _col1 type: string Reduce Operator Tree: Extract Limit File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: 20 SELECT Y.* FROM (SELECT hbase_table_1.* FROM hbase_table_1) x JOIN (SELECT src.* FROM src) Y ON (x.key = Y.key) ORDER BY key, value LIMIT 20 0 val_0 0 val_0 0 val_0 10 val_10 100 val_100 100 val_100 104 val_104 104 val_104 114 val_114 116 val_116 118 val_118 118 val_118 12 val_12 12 val_12 120 val_120 120 val_120 126 val_126 128 val_128 128 val_128 128 val_128 EXPLAIN SELECT Y.* FROM (SELECT hbase_table_1.* FROM hbase_table_1 WHERE hbase_table_1.key > 100) x JOIN (SELECT hbase_table_2.* FROM hbase_table_2 WHERE hbase_table_2.key < 120) Y ON (x.key = Y.key) ORDER BY key, value ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))) (TOK_WHERE (> (. (TOK_TABLE_OR_COL hbase_table_1) key) 100)))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_2)))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL hbase_table_2) key) 120)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME Y)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL key)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL value))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: x:hbase_table_1 TableScan alias: hbase_table_1 Filter Operator predicate: expr: (key > 100) type: boolean Filter Operator predicate: expr: (key > 100) type: boolean Select Operator expressions: expr: key type: int outputColumnNames: _col0 Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: 0 y:hbase_table_2 TableScan alias: hbase_table_2 Filter Operator predicate: expr: (key < 120) type: boolean Filter Operator predicate: expr: (key < 120) type: boolean Select Operator expressions: expr: key type: int expr: value type: string outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: int sort order: + Map-reduce partition columns: expr: _col0 type: int tag: 1 value expressions: expr: _col0 type: int expr: _col1 type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 1 {VALUE._col0} {VALUE._col1} handleSkewJoin: false outputColumnNames: _col2, _col3 Select Operator expressions: expr: _col2 type: int expr: _col3 type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: hdfs://minotaur01.sf.cloudera.com:17020/tmp/hive-testuser1/hive_2011-04-04_00-01-03_920_5397268077686778739/-mr-10002 Reduce Output Operator key expressions: expr: _col0 type: int expr: _col1 type: string sort order: ++ tag: -1 value expressions: expr: _col0 type: int expr: _col1 type: string Reduce Operator Tree: Extract File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 SELECT Y.* FROM (SELECT hbase_table_1.* FROM hbase_table_1 WHERE hbase_table_1.key > 100) x JOIN (SELECT hbase_table_2.* FROM hbase_table_2 WHERE hbase_table_2.key < 120) Y ON (x.key = Y.key) ORDER BY key,value 104 val_104 114 val_114 116 val_116 118 val_118 DROP TABLE empty_hbase_table CREATE TABLE empty_hbase_table(key int, value string) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "cf:string") DROP TABLE empty_normal_table CREATE TABLE empty_normal_table(key int, value string) select * from (select count(1) as c from empty_normal_table union all select count(1) as c from empty_hbase_table) x order by c 0 0 select * from (select count(1) c from empty_normal_table union all select count(1) as c from hbase_table_1) x order by c 0 155 select * from (select count(1) c from src union all select count(1) as c from empty_hbase_table) x order by c 0 500 select * from (select count(1) c from src union all select count(1) as c from hbase_table_1) x order by c 155 500 CREATE TABLE hbase_table_3(key int, value string, count int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = "cf:val,cf2:count" ) EXPLAIN INSERT OVERWRITE TABLE hbase_table_3 SELECT x.key, x.value, Y.count FROM (SELECT hbase_table_1.* FROM hbase_table_1) x JOIN (SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y ON (x.key = Y.key) ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME hbase_table_1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME hbase_table_1)))))) x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL src) key)) (TOK_SELEXPR (TOK_FUNCTION count (. (TOK_TABLE_OR_COL src) key)) count)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL src) key)))) Y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL Y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME hbase_table_3))) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL Y) count))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: y:src TableScan alias: src Select Operator expressions: expr: key type: string outputColumnNames: key Group By Operator aggregations: expr: count(key) bucketGroup: false keys: expr: key type: string mode: hash outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: -1 value expressions: expr: _col1 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false keys: expr: KEY._col0 type: string mode: mergepartial outputColumnNames: _col0, _col1 Select Operator expressions: expr: _col0 type: string expr: _col1 type: bigint outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: UDFToDouble(_col0) type: double sort order: + Map-reduce partition columns: expr: UDFToDouble(_col0) type: double tag: 1 value expressions: expr: _col1 type: bigint x:hbase_table_1 TableScan alias: hbase_table_1 Select Operator expressions: expr: key type: int expr: value type: string outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: expr: UDFToDouble(_col0) type: double sort order: + Map-reduce partition columns: expr: UDFToDouble(_col0) type: double tag: 0 value expressions: expr: _col0 type: int expr: _col1 type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col1} handleSkewJoin: false outputColumnNames: _col0, _col1, _col3 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: _col3 type: bigint outputColumnNames: _col0, _col1, _col2 Select Operator expressions: expr: _col0 type: int expr: _col1 type: string expr: UDFToInteger(_col2) type: int outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_3 INSERT OVERWRITE TABLE hbase_table_3 SELECT x.key, x.value, Y.count FROM (SELECT hbase_table_1.* FROM hbase_table_1) x JOIN (SELECT src.key, count(src.key) as count FROM src GROUP BY src.key) Y ON (x.key = Y.key) select count(1) from hbase_table_3 155 select * from hbase_table_3 order by key, value limit 5 0 val_0 3 2 val_2 1 4 val_4 1 8 val_8 1 10 val_10 1 select key, count from hbase_table_3 order by key, count desc limit 5 0 3 2 1 4 1 8 1 10 1 DROP TABLE hbase_table_4 CREATE TABLE hbase_table_4(key int, value1 string, value2 int, value3 int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = "a:b,a:c,d:e" ) INSERT OVERWRITE TABLE hbase_table_4 SELECT key, value, key+1, key+2 FROM src WHERE key=98 OR key=100 SELECT * FROM hbase_table_4 ORDER BY key 98 val_98 99 100 100 val_100 101 102 DROP TABLE hbase_table_5 CREATE EXTERNAL TABLE hbase_table_5(key int, value map) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = "a:") TBLPROPERTIES ("hbase.table.name" = "hbase_table_4") SELECT * FROM hbase_table_5 ORDER BY key 98 {"b":"val_98","c":"99"} 100 {"b":"val_100","c":"101"} DROP TABLE hbase_table_6 CREATE TABLE hbase_table_6(key int, value map) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = ":key,cf:" ) INSERT OVERWRITE TABLE hbase_table_6 SELECT key, map(value, key) FROM src WHERE key=98 OR key=100 SELECT * FROM hbase_table_6 ORDER BY key 98 {"val_98":"98"} 100 {"val_100":"100"} DROP TABLE hbase_table_7 CREATE TABLE hbase_table_7(value map, key int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = "cf:,:key" ) INSERT OVERWRITE TABLE hbase_table_7 SELECT map(value, key, upper(value), key+1), key FROM src WHERE key=98 OR key=100 SELECT * FROM hbase_table_7 ORDER BY key {"VAL_98":"99.0","val_98":"98"} 98 {"VAL_100":"101.0","val_100":"100"} 100 set hive.hbase.wal.enabled=false DROP TABLE hbase_table_8 CREATE TABLE hbase_table_8(key int, value1 string, value2 int, value3 int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ( "hbase.columns.mapping" = "a:b,a:c,d:e" ) INSERT OVERWRITE TABLE hbase_table_8 SELECT key, value, key+1, key+2 FROM src WHERE key=98 OR key=100 SELECT * FROM hbase_table_8 ORDER BY key 98 val_98 99 100 100 val_100 101 102 DROP TABLE hbase_table_1 DROP TABLE hbase_table_2 DROP TABLE hbase_table_3 DROP TABLE hbase_table_4 DROP TABLE hbase_table_5 DROP TABLE hbase_table_6 DROP TABLE hbase_table_7 DROP TABLE hbase_table_8 DROP TABLE empty_hbase_table DROP TABLE empty_normal_table