PREHOOK: query: -- Note, its important that the partitions created below have a name greater than 16 characters in -- length since KeyVerifyingStatsAggregator depends on checking that a keyPrefix is hashed by the -- length of the keyPrefix, having a partition name greather than 16 characters guarantees no false -- positives. create table stats_part like srcpart PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@stats_part POSTHOOK: query: -- Note, its important that the partitions created below have a name greater than 16 characters in -- length since KeyVerifyingStatsAggregator depends on checking that a keyPrefix is hashed by the -- length of the keyPrefix, having a partition name greather than 16 characters guarantees no false -- positives. create table stats_part like srcpart POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@stats_part PREHOOK: query: -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 Stats prefix is hashed: true Stats prefix is hashed: true POSTHOOK: query: -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 Stats prefix is hashed: false Stats prefix is hashed: false POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 Stats prefix is hashed: false Stats prefix is hashed: false POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher -- is hashing as well where appropriate -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher -- is hashing as well where appropriate -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') PREHOOK: type: DESCTABLE PREHOOK: Input: default@stats_part POSTHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_part # col_name data_type comment key string default value string default # Partition Information # col_name data_type comment ds string hr string # Detailed Partition Information Partition Value: [2010-04-08, 13] Database: default Table: stats_part #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') PREHOOK: type: DESCTABLE PREHOOK: Input: default@stats_part POSTHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_part # col_name data_type comment key string default value string default # Partition Information # col_name data_type comment ds string hr string # Detailed Partition Information Partition Value: [2010-04-08, 13] Database: default Table: stats_part #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr = '13') select key, value from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') PREHOOK: type: DESCTABLE PREHOOK: Input: default@stats_part POSTHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_part # col_name data_type comment key string default value string default # Partition Information # col_name data_type comment ds string hr string # Detailed Partition Information Partition Value: [2010-04-08, 13] Database: default Table: stats_part #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- Do the same for dynamic partitions -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 Stats prefix is hashed: true Stats prefix is hashed: true POSTHOOK: query: -- Do the same for dynamic partitions -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 Stats prefix is hashed: false Stats prefix is hashed: false POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 Stats prefix is hashed: false Stats prefix is hashed: false POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher -- is hashing as well where appropriate -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 POSTHOOK: query: -- Run the tests again and verify the stats are correct, this should verify that the stats publisher -- is hashing as well where appropriate -- The stats key should be hashed since the max length is too small insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') PREHOOK: type: DESCTABLE PREHOOK: Input: default@stats_part POSTHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_part # col_name data_type comment key string default value string default # Partition Information # col_name data_type comment ds string hr string # Detailed Partition Information Partition Value: [2010-04-08, 13] Database: default Table: stats_part #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 POSTHOOK: query: -- The stats key should not be hashed since the max length is large enough insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') PREHOOK: type: DESCTABLE PREHOOK: Input: default@stats_part POSTHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_part # col_name data_type comment key string default value string default # Partition Information # col_name data_type comment ds string hr string # Detailed Partition Information Partition Value: [2010-04-08, 13] Database: default Table: stats_part #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1 PREHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@stats_part@ds=2010-04-08 POSTHOOK: query: -- The stats key should not be hashed since negative values should imply hashing is turned off insert overwrite table stats_part partition (ds='2010-04-08', hr) select key, value, '13' from src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@stats_part@ds=2010-04-08/hr=13 POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: stats_part PARTITION(ds=2010-04-08,hr=13).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') PREHOOK: type: DESCTABLE PREHOOK: Input: default@stats_part POSTHOOK: query: desc formatted stats_part partition (ds='2010-04-08', hr = '13') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@stats_part # col_name data_type comment key string default value string default # Partition Information # col_name data_type comment ds string hr string # Detailed Partition Information Partition Value: [2010-04-08, 13] Database: default Table: stats_part #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE true numFiles 1 numRows 500 rawDataSize 5312 totalSize 5812 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Storage Desc Params: serialization.format 1