Saving all output to "!!{outputDirectory}!!/sample10.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/sample10.q >>> >>> set hive.exec.dynamic.partition=true; No rows affected >>> set hive.exec.dynamic.partition.mode=nonstrict; No rows affected >>> set hive.enforce.bucketing=true; No rows affected >>> set hive.exec.reducers.max=4; No rows affected >>> set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; No rows affected >>> set hive.default.fileformat=RCFILE; No rows affected >>> set hive.exec.pre.hooks = org.apache.hadoop.hive.ql.hooks.PreExecutePrinter,org.apache.hadoop.hive.ql.hooks.EnforceReadOnlyTables,org.apache.hadoop.hive.ql.hooks.UpdateInputAccessTimeHook$PreExec; No rows affected >>> >>> -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) >>> >>> create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets; No rows affected >>> >>> insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10; 'key','value','ds','hr' No rows selected >>> >>> explain extended select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpartbucket) (TOK_TABLEBUCKETSAMPLE 1 4 (TOK_TABLE_OR_COL key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL ds)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL ds))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL ds)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-2 depends on stages: Stage-1' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' srcpartbucket ' ' TableScan' ' alias: srcpartbucket' ' GatherStats: false' ' Filter Operator' ' isSamplingPred: true' ' predicate:' ' expr: (((hash(key) & 2147483647) % 4) = 0)' ' type: boolean' ' Select Operator' ' expressions:' ' expr: ds' ' type: string' ' outputColumnNames: ds' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' keys:' ' expr: ds' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Needs Tagging: false' ' Path -> Alias:' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-08/hr=11/000000_0 [srcpartbucket]' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-08/hr=12/000000_0 [srcpartbucket]' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-09/hr=11/000000_0 [srcpartbucket]' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-09/hr=12/000000_0 [srcpartbucket]' ' Path -> Partition:' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-08/hr=11/000000_0 ' ' Partition' ' base file name: 000000_0' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' partition values:' ' ds 2008-04-08' ' hr 11' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-08/hr=11' ' name sample10.srcpartbucket' ' numFiles 4' ' numPartitions 4' ' numRows 10' ' partition_columns ds/hr' ' rawDataSize 60' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 307' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' ' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket' ' name sample10.srcpartbucket' ' numFiles 16' ' numPartitions 4' ' numRows 40' ' partition_columns ds/hr' ' rawDataSize 240' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 1228' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: sample10.srcpartbucket' ' name: sample10.srcpartbucket' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-08/hr=12/000000_0 ' ' Partition' ' base file name: 000000_0' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' partition values:' ' ds 2008-04-08' ' hr 12' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-08/hr=12' ' name sample10.srcpartbucket' ' numFiles 4' ' numPartitions 4' ' numRows 10' ' partition_columns ds/hr' ' rawDataSize 60' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 307' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' ' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket' ' name sample10.srcpartbucket' ' numFiles 16' ' numPartitions 4' ' numRows 40' ' partition_columns ds/hr' ' rawDataSize 240' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 1228' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: sample10.srcpartbucket' ' name: sample10.srcpartbucket' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-09/hr=11/000000_0 ' ' Partition' ' base file name: 000000_0' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' partition values:' ' ds 2008-04-09' ' hr 11' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-09/hr=11' ' name sample10.srcpartbucket' ' numFiles 4' ' numPartitions 4' ' numRows 10' ' partition_columns ds/hr' ' rawDataSize 60' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 307' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' ' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket' ' name sample10.srcpartbucket' ' numFiles 16' ' numPartitions 4' ' numRows 40' ' partition_columns ds/hr' ' rawDataSize 240' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 1228' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: sample10.srcpartbucket' ' name: sample10.srcpartbucket' ' !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-09/hr=12/000000_0 ' ' Partition' ' base file name: 000000_0' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' partition values:' ' ds 2008-04-09' ' hr 12' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket/ds=2008-04-09/hr=12' ' name sample10.srcpartbucket' ' numFiles 4' ' numPartitions 4' ' numRows 10' ' partition_columns ds/hr' ' rawDataSize 60' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 307' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' ' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' properties:' ' bucket_count 4' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/sample10.db/srcpartbucket' ' name sample10.srcpartbucket' ' numFiles 16' ' numPartitions 4' ' numRows 40' ' partition_columns ds/hr' ' rawDataSize 240' ' serialization.ddl struct srcpartbucket { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' totalSize 1228' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: sample10.srcpartbucket' ' name: sample10.srcpartbucket' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' directory: file:!!{hive.exec.scratchdir}!!' ' NumFilesPerFileSink: 1' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' ' properties:' ' columns _col0,_col1' ' columns.types string,bigint' ' escape.delim \' ' TotalFiles: 1' ' GatherStats: false' ' MultiFileSpray: false' '' ' Stage: Stage-2' ' Map Reduce' ' Alias -> Map Operator Tree:' ' file:!!{hive.exec.scratchdir}!! ' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' Needs Tagging: false' ' Path -> Alias:' ' file:!!{hive.exec.scratchdir}!! [file:!!{hive.exec.scratchdir}!!]' ' Path -> Partition:' ' file:!!{hive.exec.scratchdir}!! ' ' Partition' ' base file name: -mr-10002' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' ' properties:' ' columns _col0,_col1' ' columns.types string,bigint' ' escape.delim \' ' ' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' ' properties:' ' columns _col0,_col1' ' columns.types string,bigint' ' escape.delim \' ' Reduce Operator Tree:' ' Extract' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' directory: file:!!{hive.exec.scratchdir}!!' ' NumFilesPerFileSink: 1' ' Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' columns _col0,_col1' ' columns.types string:bigint' ' escape.delim \' ' serialization.format 1' ' TotalFiles: 1' ' GatherStats: false' ' MultiFileSpray: false' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 359 rows selected >>> >>> select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC; 'ds','_c1' '2008-04-08','10' '2008-04-09','10' 2 rows selected >>> >>> select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC; 'ds','_c1' '2008-04-08','12' '2008-04-09','12' 2 rows selected >>> >>> select * from srcpartbucket where ds is not null ORDER BY key ASC, value ASC, ds ASC, hr ASC; 'key','value','ds','hr' '0','val_0','2008-04-08','11' '0','val_0','2008-04-08','11' '0','val_0','2008-04-08','11' '0','val_0','2008-04-08','12' '0','val_0','2008-04-08','12' '0','val_0','2008-04-08','12' '0','val_0','2008-04-09','11' '0','val_0','2008-04-09','11' '0','val_0','2008-04-09','11' '0','val_0','2008-04-09','12' '0','val_0','2008-04-09','12' '0','val_0','2008-04-09','12' '2','val_2','2008-04-08','11' '2','val_2','2008-04-08','12' '2','val_2','2008-04-09','11' '2','val_2','2008-04-09','12' '4','val_4','2008-04-08','11' '4','val_4','2008-04-08','12' '4','val_4','2008-04-09','11' '4','val_4','2008-04-09','12' '5','val_5','2008-04-08','11' '5','val_5','2008-04-08','11' '5','val_5','2008-04-08','11' '5','val_5','2008-04-08','12' '5','val_5','2008-04-08','12' '5','val_5','2008-04-08','12' '5','val_5','2008-04-09','11' '5','val_5','2008-04-09','11' '5','val_5','2008-04-09','11' '5','val_5','2008-04-09','12' '5','val_5','2008-04-09','12' '5','val_5','2008-04-09','12' '8','val_8','2008-04-08','11' '8','val_8','2008-04-08','12' '8','val_8','2008-04-09','11' '8','val_8','2008-04-09','12' '9','val_9','2008-04-08','11' '9','val_9','2008-04-08','12' '9','val_9','2008-04-09','11' '9','val_9','2008-04-09','12' 40 rows selected >>> >>> >>> !record