PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer -- and populating that information in partitions' metadata, it also verifies that the -- number of reducers chosen will be a power of two CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@test_table POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer -- and populating that information in partitions' metadata, it also verifies that the -- number of reducers chosen will be a power of two CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 4 numRows 0 rawDataSize 0 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 4 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 4 numRows 0 rawDataSize 0 totalSize 11996 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 4 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 4 numRows 0 rawDataSize 0 totalSize 11996 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 4 Bucket Columns: [key, value] Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 4 numRows 0 rawDataSize 0 totalSize 31120 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 4 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 16 numRows 0 rawDataSize 0 totalSize 31120 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 16 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the -- key of the outer group by INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the -- key of the outer group by INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 0 rawDataSize 0 totalSize 24 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1