Saving all output to "!!{outputDirectory}!!/bucket_groupby.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/bucket_groupby.q >>> create table clustergroupby(key string, value string) partitioned by(ds string); No rows affected >>> describe extended clustergroupby; 'col_name','data_type','comment' 'key','string','' 'value','string','' 'ds','string','' '','','' 'Detailed Table Information','Table(tableName:clustergroupby, dbName:bucket_groupby, owner:!!{user.name}!!, createTime:!!UNIXTIME!!, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:!!{hive.metastore.warehouse.dir}!!/bucket_groupby.db/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{transient_lastDdlTime=!!UNIXTIME!!}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)','' 5 rows selected >>> alter table clustergroupby clustered by (key) into 1 buckets; No rows affected >>> >>> insert overwrite table clustergroupby partition (ds='100') select key, value from src sort by key; 'key','value' No rows selected >>> >>> explain select key, count(1) from clustergroupby where ds='100' group by key limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '100')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: key' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 70 rows selected >>> select key, count(1) from clustergroupby where ds='100' group by key limit 10; 'key','_c1' '0','3' '10','1' '100','2' '103','2' '104','2' '105','1' '11','1' '111','1' '113','2' '114','1' 10 rows selected >>> >>> describe extended clustergroupby; 'col_name','data_type','comment' 'key','string','' 'value','string','' 'ds','string','' '','','' 'Detailed Table Information','Table(tableName:clustergroupby, dbName:bucket_groupby, owner:!!{user.name}!!, createTime:!!UNIXTIME!!, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:!!{hive.metastore.warehouse.dir}!!/bucket_groupby.db/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[key], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{numPartitions=1, numFiles=1, last_modified_by=!!ELIDED!!, last_modified_time=!!UNIXTIME!!, transient_lastDdlTime=!!UNIXTIME!!, numRows=500, totalSize=5812, rawDataSize=5312}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)','' 5 rows selected >>> insert overwrite table clustergroupby partition (ds='101') select key, value from src distribute by key; 'key','value' No rows selected >>> >>> --normal-- >>> explain select key, count(1) from clustergroupby where ds='101' group by key limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: key' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 70 rows selected >>> select key, count(1) from clustergroupby where ds='101' group by key limit 10; 'key','_c1' '0','3' '10','1' '100','2' '103','2' '104','2' '105','1' '11','1' '111','1' '113','2' '114','1' 10 rows selected >>> >>> --function-- >>> explain select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION length (TOK_TABLE_OR_COL key))) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_FUNCTION length (TOK_TABLE_OR_COL key))) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: length(key)' ' type: int' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: int' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: int' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: int' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: int' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 70 rows selected >>> select length(key), count(1) from clustergroupby where ds='101' group by length(key) limit 10; '_c0','_c1' '1','10' '2','74' '3','416' 3 rows selected >>> explain select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION abs (TOK_FUNCTION length (TOK_TABLE_OR_COL key)))) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_FUNCTION abs (TOK_FUNCTION length (TOK_TABLE_OR_COL key)))) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: abs(length(key))' ' type: int' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: int' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: int' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: int' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: int' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 70 rows selected >>> select abs(length(key)), count(1) from clustergroupby where ds='101' group by abs(length(key)) limit 10; '_c0','_c1' '1','10' '2','74' '3','416' 3 rows selected >>> >>> --constant-- >>> explain select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 3) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: key' ' type: string' ' expr: 3' ' type: int' ' mode: hash' ' outputColumnNames: _col0, _col1, _col2' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: int' ' sort order: ++' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: int' ' tag: -1' ' value expressions:' ' expr: _col2' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' expr: KEY._col1' ' type: int' ' mode: mergepartial' ' outputColumnNames: _col0, _col1, _col2' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col2' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 78 rows selected >>> select key, count(1) from clustergroupby where ds='101' group by key,3 limit 10; 'key','_c1' '0','3' '10','1' '100','2' '103','2' '104','2' '105','1' '11','1' '111','1' '113','2' '114','1' 10 rows selected >>> >>> --subquery-- >>> explain select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value) key) (TOK_SELEXPR (TOK_TABLE_OR_COL key) value)) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '101')))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' subq:clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: value' ' type: string' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' outputColumnNames: _col0' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: _col0' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 75 rows selected >>> select key, count(1) from (select value as key, key as value from clustergroupby where ds='101')subq group by key limit 10; 'key','_c1' 'val_0','3' 'val_10','1' 'val_100','2' 'val_103','2' 'val_104','2' 'val_105','1' 'val_11','1' 'val_111','1' 'val_113','2' 'val_114','1' 10 rows selected >>> >>> explain select key, count(1) from clustergroupby group by key; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: key' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 69 rows selected >>> select key, count(1) from clustergroupby group by key; 'key','_c1' '0','6' '10','2' '100','4' '103','4' '104','4' '105','2' '11','2' '111','2' '113','4' '114','2' '116','2' '118','4' '119','6' '12','4' '120','4' '125','4' '126','2' '128','6' '129','4' '131','2' '133','2' '134','4' '136','2' '137','4' '138','8' '143','2' '145','2' '146','4' '149','4' '15','4' '150','2' '152','4' '153','2' '155','2' '156','2' '157','2' '158','2' '160','2' '162','2' '163','2' '164','4' '165','4' '166','2' '167','6' '168','2' '169','8' '17','2' '170','2' '172','4' '174','4' '175','4' '176','4' '177','2' '178','2' '179','4' '18','4' '180','2' '181','2' '183','2' '186','2' '187','6' '189','2' '19','2' '190','2' '191','4' '192','2' '193','6' '194','2' '195','4' '196','2' '197','4' '199','6' '2','2' '20','2' '200','4' '201','2' '202','2' '203','4' '205','4' '207','4' '208','6' '209','4' '213','4' '214','2' '216','4' '217','4' '218','2' '219','4' '221','4' '222','2' '223','4' '224','4' '226','2' '228','2' '229','4' '230','10' '233','4' '235','2' '237','4' '238','4' '239','4' '24','4' '241','2' '242','4' '244','2' '247','2' '248','2' '249','2' '252','2' '255','4' '256','4' '257','2' '258','2' '26','4' '260','2' '262','2' '263','2' '265','4' '266','2' '27','2' '272','4' '273','6' '274','2' '275','2' '277','8' '278','4' '28','2' '280','4' '281','4' '282','4' '283','2' '284','2' '285','2' '286','2' '287','2' '288','4' '289','2' '291','2' '292','2' '296','2' '298','6' '30','2' '302','2' '305','2' '306','2' '307','4' '308','2' '309','4' '310','2' '311','6' '315','2' '316','6' '317','4' '318','6' '321','4' '322','4' '323','2' '325','4' '327','6' '33','2' '331','4' '332','2' '333','4' '335','2' '336','2' '338','2' '339','2' '34','2' '341','2' '342','4' '344','4' '345','2' '348','10' '35','6' '351','2' '353','4' '356','2' '360','2' '362','2' '364','2' '365','2' '366','2' '367','4' '368','2' '369','6' '37','4' '373','2' '374','2' '375','2' '377','2' '378','2' '379','2' '382','4' '384','6' '386','2' '389','2' '392','2' '393','2' '394','2' '395','4' '396','6' '397','4' '399','4' '4','2' '400','2' '401','10' '402','2' '403','6' '404','4' '406','8' '407','2' '409','6' '41','2' '411','2' '413','4' '414','4' '417','6' '418','2' '419','2' '42','4' '421','2' '424','4' '427','2' '429','4' '43','2' '430','6' '431','6' '432','2' '435','2' '436','2' '437','2' '438','6' '439','4' '44','2' '443','2' '444','2' '446','2' '448','2' '449','2' '452','2' '453','2' '454','6' '455','2' '457','2' '458','4' '459','4' '460','2' '462','4' '463','4' '466','6' '467','2' '468','8' '469','10' '47','2' '470','2' '472','2' '475','2' '477','2' '478','4' '479','2' '480','6' '481','2' '482','2' '483','2' '484','2' '485','2' '487','2' '489','8' '490','2' '491','2' '492','4' '493','2' '494','2' '495','2' '496','2' '497','2' '498','6' '5','6' '51','4' '53','2' '54','2' '57','2' '58','4' '64','2' '65','2' '66','2' '67','4' '69','2' '70','6' '72','4' '74','2' '76','4' '77','2' '78','2' '8','2' '80','2' '82','2' '83','4' '84','4' '85','2' '86','2' '87','2' '9','2' '90','6' '92','2' '95','4' '96','2' '97','4' '98','4' 309 rows selected >>> >>> explain select key, count(1) from clustergroupby group by key, 3; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (TOK_TABLE_OR_COL key) 3)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: key' ' type: string' ' expr: 3' ' type: int' ' mode: hash' ' outputColumnNames: _col0, _col1, _col2' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: int' ' sort order: ++' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: int' ' tag: -1' ' value expressions:' ' expr: _col2' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' expr: KEY._col1' ' type: int' ' mode: mergepartial' ' outputColumnNames: _col0, _col1, _col2' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col2' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 77 rows selected >>> >>> -- number of buckets cannot be changed, so drop the table >>> drop table clustergroupby; No rows affected >>> create table clustergroupby(key string, value string) partitioned by(ds string); No rows affected >>> >>> --sort columns-- >>> alter table clustergroupby clustered by (value) sorted by (key, value) into 1 buckets; No rows affected >>> describe extended clustergroupby; 'col_name','data_type','comment' 'key','string','' 'value','string','' 'ds','string','' '','','' 'Detailed Table Information','Table(tableName:clustergroupby, dbName:bucket_groupby, owner:!!{user.name}!!, createTime:!!UNIXTIME!!, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:!!{hive.metastore.warehouse.dir}!!/bucket_groupby.db/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[value], sortCols:[Order(col:key, order:1), Order(col:value, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=!!ELIDED!!, last_modified_time=!!UNIXTIME!!, transient_lastDdlTime=!!UNIXTIME!!}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)','' 5 rows selected >>> insert overwrite table clustergroupby partition (ds='102') select key, value from src distribute by value sort by key, value; 'key','value' No rows selected >>> >>> explain select key, count(1) from clustergroupby where ds='102' group by key limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '102')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: true' ' keys:' ' expr: key' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 70 rows selected >>> select key, count(1) from clustergroupby where ds='102' group by key limit 10; 'key','_c1' '0','3' '10','1' '100','2' '103','2' '104','2' '105','1' '11','1' '111','1' '113','2' '114','1' 10 rows selected >>> explain select value, count(1) from clustergroupby where ds='102' group by value limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '102')) (TOK_GROUPBY (TOK_TABLE_OR_COL value)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: value' ' type: string' ' outputColumnNames: value' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: value' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 70 rows selected >>> select value, count(1) from clustergroupby where ds='102' group by value limit 10; 'value','_c1' 'val_0','3' 'val_10','1' 'val_100','2' 'val_103','2' 'val_104','2' 'val_105','1' 'val_11','1' 'val_111','1' 'val_113','2' 'val_114','1' 10 rows selected >>> explain select key, count(1) from clustergroupby where ds='102' group by key, value limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '102')) (TOK_GROUPBY (TOK_TABLE_OR_COL key) (TOK_TABLE_OR_COL value)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' outputColumnNames: key, value' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: true' ' keys:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1, _col2' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' sort order: ++' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col2' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' expr: KEY._col1' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1, _col2' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col2' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 80 rows selected >>> select key, count(1) from clustergroupby where ds='102' group by key, value limit 10; 'key','_c1' '0','3' '10','1' '100','2' '103','2' '104','2' '105','1' '11','1' '111','1' '113','2' '114','1' 10 rows selected >>> >>> -- number of buckets cannot be changed, so drop the table >>> drop table clustergroupby; No rows affected >>> create table clustergroupby(key string, value string) partitioned by(ds string); No rows affected >>> >>> alter table clustergroupby clustered by (value, key) sorted by (key) into 1 buckets; No rows affected >>> describe extended clustergroupby; 'col_name','data_type','comment' 'key','string','' 'value','string','' 'ds','string','' '','','' 'Detailed Table Information','Table(tableName:clustergroupby, dbName:bucket_groupby, owner:!!{user.name}!!, createTime:!!UNIXTIME!!, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:string, comment:null), FieldSchema(name:value, type:string, comment:null), FieldSchema(name:ds, type:string, comment:null)], location:!!{hive.metastore.warehouse.dir}!!/bucket_groupby.db/clustergroupby, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[value, key], sortCols:[Order(col:key, order:1)], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{})), partitionKeys:[FieldSchema(name:ds, type:string, comment:null)], parameters:{last_modified_by=!!ELIDED!!, last_modified_time=!!UNIXTIME!!, transient_lastDdlTime=!!UNIXTIME!!}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)','' 5 rows selected >>> insert overwrite table clustergroupby partition (ds='103') select key, value from src distribute by value, key sort by key; 'key','value' No rows selected >>> explain select key, count(1) from clustergroupby where ds='103' group by key limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '103')) (TOK_GROUPBY (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: true' ' keys:' ' expr: key' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 70 rows selected >>> select key, count(1) from clustergroupby where ds='103' group by key limit 10; 'key','_c1' '0','3' '10','1' '100','2' '103','2' '104','2' '105','1' '11','1' '111','1' '113','2' '114','1' 10 rows selected >>> explain select key, count(1) from clustergroupby where ds='103' group by value, key limit 10; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME clustergroupby))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '103')) (TOK_GROUPBY (TOK_TABLE_OR_COL value) (TOK_TABLE_OR_COL key)) (TOK_LIMIT 10)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' clustergroupby ' ' TableScan' ' alias: clustergroupby' ' Select Operator' ' expressions:' ' expr: value' ' type: string' ' expr: key' ' type: string' ' outputColumnNames: value, key' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: value' ' type: string' ' expr: key' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1, _col2' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' sort order: ++' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col2' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' expr: KEY._col1' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1, _col2' ' Select Operator' ' expressions:' ' expr: _col1' ' type: string' ' expr: _col2' ' type: bigint' ' outputColumnNames: _col0, _col1' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: 10' '' '' 80 rows selected >>> select key, count(1) from clustergroupby where ds='103' group by value, key limit 10; 'key','_c1' '0','3' '10','1' '100','2' '103','2' '104','2' '105','1' '11','1' '111','1' '113','2' '114','1' 10 rows selected >>> !record