PREHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer -- and populating that information in partitions' metadata CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@test_table POSTHOOK: query: -- This tests inferring how data is bucketed/sorted from the operators in the reducer -- and populating that information in partitions' metadata CREATE TABLE test_table (key STRING, value STRING) PARTITIONED BY (part STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table PREHOOK: query: -- Test group by, should be bucketed and sorted by group by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by, should be bucketed and sorted by group by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by where a key isn't selected, should not be bucketed or sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, count(*) FROM src GROUP BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 1028 rawDataSize 10968 totalSize 11996 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join with two keys, should be bucketed and sorted by join keys INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, a.value FROM src a JOIN src b ON a.key = b.key AND a.value = b.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 1028 rawDataSize 10968 totalSize 11996 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key, value] Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join with two keys and only one selected, should not be bucketed or sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, '1' FROM src a JOIN src b ON a.key = b.key AND a.value = b.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 1028 rawDataSize 4970 totalSize 5998 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join on three tables on same key, should be bucketed and sorted by join key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.key = c.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 2654 rawDataSize 28466 totalSize 31120 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join on three tables on different keys, should be bucketed and sorted by latter key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, c.value FROM src a JOIN src b ON (a.key = b.key) JOIN src c ON (b.value = c.value) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 2654 rawDataSize 28466 totalSize 31120 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test distribute by, should only be bucketed by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test distribute by, should only be bucketed by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test sort by, should be sorted by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key ASC PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test sort by, should be sorted by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test sort by desc, should be sorted by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key DESC PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test sort by desc, should be sorted by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src SORT BY key DESC POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [Order(col:key, order:0)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test cluster by, should be bucketed and sorted by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src CLUSTER BY key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test cluster by, should be bucketed and sorted by key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src CLUSTER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key SORT BY value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test distribute by and sort by different keys, should be bucketed by one key sorted by the other INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM src DISTRIBUTE BY key SORT BY value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join in simple subquery, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value from (SELECT a.key, b.value FROM src a JOIN src b ON (a.key = b.key)) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 1028 rawDataSize 10968 totalSize 11996 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test join in simple subquery renaming key column, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, value FROM (SELECT a.key as k, b.value FROM src a JOIN src b ON (a.key = b.key)) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)a.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)a.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 1028 rawDataSize 10968 totalSize 11996 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in simple subquery, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, cnt from (SELECT key, count(*) as cnt FROM src GROUP BY key) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in simple subquery renaming key column, should be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT k, cnt FROM (SELECT key as k, count(*) as cnt FROM src GROUP BY key) subq POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery with where outside, should still be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 6 rawDataSize 18 totalSize 24 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery with expression on value, should still be bucketed and sorted on key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value + 1 FROM (SELECT key, count(1) AS value FROM src group by key) a where key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 6 rawDataSize 18 totalSize 24 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery with lateral view outside, should still be bucketed and sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key FROM src group by key) a lateral view explode(array(1, 2)) value as value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 618 rawDataSize 2964 totalSize 3582 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the -- key of the outer group by INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery with another group by outside, should be bucketed and sorted by the -- key of the outer group by INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT count(1), value FROM (SELECT key, count(1) as value FROM src group by key) a group by value POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 5 rawDataSize 19 totalSize 24 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and -- sorted by the column the group by key ends up in INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery with select on outside reordering the columns, should be bucketed and -- sorted by the column the group by key ends up in INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT value, key FROM (SELECT key, count(1) as value FROM src group by key) a POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [value] Sort Columns: [Order(col:value, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery followed by distribute by, should only be bucketed by the distribute key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a distribute by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery followed by sort by, should only be sorted by the sort key INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT key, count(1) as value FROM src group by key) a sort by key POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value EXPRESSION [(src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key] Sort Columns: [Order(col:key, order:1)] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by in subquery followed by transform script, should not be bucketed or sorted INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT TRANSFORM (a.key, a.value) USING 'cat' AS (key, value) FROM (SELECT key, count(1) AS value FROM src GROUP BY KEY) a POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.null, ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 1482 totalSize 1791 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 POSTHOOK: query: -- Test group by on function, should be bucketed and sorted by key and value because the function is applied in the mapper INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT key, value FROM (SELECT concat(key, "a") AS key, value, count(*) FROM src GROUP BY concat(key, "a"), value) a POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@test_table@part=1 POSTHOOK: Lineage: test_table PARTITION(part=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: test_table PARTITION(part=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@test_table POSTHOOK: query: DESCRIBE FORMATTED test_table PARTITION (part = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@test_table # col_name data_type comment key string value string # Partition Information # col_name data_type comment part string # Detailed Partition Information Partition Value: [1] Database: default Table: test_table #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 309 rawDataSize 3582 totalSize 3891 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: 1 Bucket Columns: [key, value] Sort Columns: [Order(col:key, order:1), Order(col:value, order:1)] Storage Desc Params: serialization.format 1