PREHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the -- a number of files equal to the number of buckets CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@test_table_bucketed POSTHOOK: query: -- Test writing to a bucketed table, the output should be bucketed by the bucketing key into the -- a number of files equal to the number of buckets CREATE TABLE test_table_bucketed (key STRING, value STRING) PARTITIONED BY (part STRING) CLUSTERED BY (value) SORTED BY (value) INTO 3 BUCKETS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table_bucketed PREHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, -- this should be bucketed and sorted by value into 3 buckets INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') SELECT key, count(1) FROM src GROUP BY KEY PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table_bucketed@part=1 POSTHOOK: query: -- Despite the fact that normally inferring would say this table is bucketed and sorted on key, -- this should be bucketed and sorted by value into 3 buckets INSERT OVERWRITE TABLE test_table_bucketed PARTITION (part = '1') SELECT key, count(1) FROM src GROUP BY KEY POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table_bucketed@part=1 POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table_bucketed PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table_bucketed POSTHOOK: query: DESCRIBE FORMATTED test_table_bucketed PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table_bucketed # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table_bucketed #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 3 numRows 0 rawDataSize 0 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 3 Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is -- bucketed SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table_bucketed PREHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### POSTHOOK: query: -- If the count(*) from sampling the buckets matches the count(*) from each file, the table is -- bucketed SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 1 OUT OF 3) WHERE part = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table_bucketed POSTHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### 31 PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table_bucketed PREHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 2 OUT OF 3) WHERE part = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table_bucketed POSTHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### 179 PREHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table_bucketed PREHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### POSTHOOK: query: SELECT COUNT(*) FROM test_table_bucketed TABLESAMPLE (BUCKET 3 OUT OF 3) WHERE part = '1' POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table_bucketed POSTHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### 99 PREHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a PREHOOK: type: QUERY PREHOOK: Input: default@test_table_bucketed PREHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### POSTHOOK: query: SELECT cnt FROM (SELECT INPUT__FILE__NAME, COUNT(*) cnt FROM test_table_bucketed WHERE part = '1' GROUP BY INPUT__FILE__NAME ORDER BY INPUT__FILE__NAME ASC LIMIT 3) a POSTHOOK: type: QUERY POSTHOOK: Input: default@test_table_bucketed POSTHOOK: Input: default@test_table_bucketed@part=1 #### A masked pattern was here #### 31 179 99