PREHOOK: query: -- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- create a skewed table CREATE TABLE list_bucketing_table (key STRING, value STRING) PARTITIONED BY (part STRING) SKEWED BY (key) ON ("484") STORED AS DIRECTORIES PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@list_bucketing_table POSTHOOK: query: -- This tests that bucketing/sorting metadata is not inferred for tables with list bucketing -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- create a skewed table CREATE TABLE list_bucketing_table (key STRING, value STRING) PARTITIONED BY (part STRING) SKEWED BY (key) ON ("484") STORED AS DIRECTORIES POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@list_bucketing_table PREHOOK: query: -- Tests group by, the output should neither be bucketed nor sorted INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@list_bucketing_table@part=1 POSTHOOK: query: -- Tests group by, the output should neither be bucketed nor sorted INSERT OVERWRITE TABLE list_bucketing_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing_table@part=1 POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: list_bucketing_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESC FORMATTED list_bucketing_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@list_bucketing_table POSTHOOK: query: DESC FORMATTED list_bucketing_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@list_bucketing_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: list_bucketing_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 2 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Stored As SubDirectories: Yes Skewed Columns: [key] Skewed Values: [[484]] #### A masked pattern was here #### Skewed Value to Truncated Path: {[484]=/list_bucketing_table/part=1/key=484} Storage Desc Params: serialization.format 1 PREHOOK: query: -- create a table skewed on a key which doesnt exist in the data CREATE TABLE list_bucketing_table2 (key STRING, value STRING) PARTITIONED BY (part STRING) SKEWED BY (key) ON ("abc") STORED AS DIRECTORIES PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@list_bucketing_table2 POSTHOOK: query: -- create a table skewed on a key which doesnt exist in the data CREATE TABLE list_bucketing_table2 (key STRING, value STRING) PARTITIONED BY (part STRING) SKEWED BY (key) ON ("abc") STORED AS DIRECTORIES POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@list_bucketing_table2 PREHOOK: query: -- should not be bucketed or sorted INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@list_bucketing_table2@part=1 POSTHOOK: query: -- should not be bucketed or sorted INSERT OVERWRITE TABLE list_bucketing_table2 PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@list_bucketing_table2@part=1 POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: list_bucketing_table2 PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESC FORMATTED list_bucketing_table2 PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@list_bucketing_table2 POSTHOOK: query: DESC FORMATTED list_bucketing_table2 PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@list_bucketing_table2 # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: list_bucketing_table2 #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Stored As SubDirectories: Yes Skewed Columns: [key] Skewed Values: [[abc]] Storage Desc Params: serialization.format 1