Saving all output to "!!{outputDirectory}!!/reduce_deduplicate.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/reduce_deduplicate.q >>> set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; No rows affected >>> set hive.enforce.bucketing = true; No rows affected >>> set hive.exec.reducers.max = 1; No rows affected >>> set hive.exec.script.trust = true; No rows affected >>> >>> >>> >>> CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS; No rows affected >>> explain extended insert overwrite table bucket5_1 select * from src cluster by key; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME bucket5_1))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 depends on stages: Stage-1' ' Stage-2 depends on stages: Stage-0' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' src ' ' TableScan' ' alias: src' ' GatherStats: false' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' Needs Tagging: false' ' Path -> Alias:' ' !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/src [src]' ' Path -> Partition:' ' !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/src ' ' Partition' ' base file name: src' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/src' ' name reduce_deduplicate.src' ' numFiles 1' ' numPartitions 0' ' numRows 0' ' rawDataSize 0' ' serialization.ddl struct src { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 5812' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' ' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/src' ' name reduce_deduplicate.src' ' numFiles 1' ' numPartitions 0' ' numRows 0' ' rawDataSize 0' ' serialization.ddl struct src { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 5812' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: reduce_deduplicate.src' ' name: reduce_deduplicate.src' ' Reduce Operator Tree:' ' Extract' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' directory: pfile:!!{hive.exec.scratchdir}!!' ' NumFilesPerFileSink: 2' ' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count 2' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/bucket5_1' ' name reduce_deduplicate.bucket5_1' ' serialization.ddl struct bucket5_1 { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: reduce_deduplicate.bucket5_1' ' TotalFiles: 2' ' GatherStats: true' ' MultiFileSpray: true' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' replace: true' ' source: pfile:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count 2' ' bucket_field_name key' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/bucket5_1' ' name reduce_deduplicate.bucket5_1' ' serialization.ddl struct bucket5_1 { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: reduce_deduplicate.bucket5_1' ' tmp directory: pfile:!!{hive.exec.scratchdir}!!' '' ' Stage: Stage-2' ' Stats-Aggr Operator' ' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!' '' '' 154 rows selected >>> >>> insert overwrite table bucket5_1 select * from src cluster by key; 'key','value' No rows selected >>> >>> select sum(hash(key)),sum(hash(value)) from bucket5_1; '_c0','_c1' '21025334','36210398070' 1 row selected >>> select sum(hash(key)),sum(hash(value)) from src; '_c0','_c1' '21025334','36210398070' 1 row selected >>> >>> >>> create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string); No rows affected >>> >>> >>> create table complex_tbl_2(aet string, aes string) partitioned by (ds string); No rows affected >>> >>> explain extended insert overwrite table complex_tbl_1 partition (ds='2010-03-29') select s2.* from ( select TRANSFORM (aid,bid,t,ctime,etime,l,et) USING 'cat' AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) from ( select transform(aet,aes) using 'cat' as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) from complex_tbl_2 where ds ='2010-03-29' cluster by bid )s )s2; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME complex_tbl_2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aet) (TOK_TABLE_OR_COL aes)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2010-03-29')) (TOK_CLUSTERBY (TOK_TABLE_OR_COL bid)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aid) (TOK_TABLE_OR_COL bid) (TOK_TABLE_OR_COL t) (TOK_TABLE_OR_COL ctime) (TOK_TABLE_OR_COL etime) (TOK_TABLE_OR_COL l) (TOK_TABLE_OR_COL et)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))))) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME complex_tbl_1) (TOK_PARTSPEC (TOK_PARTVAL ds '2010-03-29')))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME s2))))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 depends on stages: Stage-1' ' Stage-2 depends on stages: Stage-0' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' s2:s:complex_tbl_2 ' ' TableScan' ' alias: complex_tbl_2' ' GatherStats: false' ' Filter Operator' ' isSamplingPred: false' ' predicate:' ' expr: (ds = '2010-03-29')' ' type: boolean' ' Select Operator' ' expressions:' ' expr: aet' ' type: string' ' expr: aes' ' type: string' ' outputColumnNames: _col0, _col1' ' Transform Operator' ' command: cat' ' output info:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' columns _col0,_col1,_col2,_col3,_col4,_col5,_col6' ' columns.types string,string,int,string,bigint,string,string' ' field.delim 9' ' serialization.format 9' ' Reduce Output Operator' ' key expressions:' ' expr: _col1' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col1' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: int' ' expr: _col3' ' type: string' ' expr: _col4' ' type: bigint' ' expr: _col5' ' type: string' ' expr: _col6' ' type: string' ' Needs Tagging: false' ' Reduce Operator Tree:' ' Extract' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: int' ' expr: _col3' ' type: string' ' expr: _col4' ' type: bigint' ' expr: _col5' ' type: string' ' expr: _col6' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6' ' Transform Operator' ' command: cat' ' output info:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' columns _col0,_col1,_col2,_col3,_col4,_col5,_col6' ' columns.types string,string,int,string,bigint,string,string' ' field.delim 9' ' serialization.format 9' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: int' ' expr: _col3' ' type: string' ' expr: _col4' ' type: bigint' ' expr: _col5' ' type: string' ' expr: _col6' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' directory: pfile:!!{hive.exec.scratchdir}!!' ' NumFilesPerFileSink: 1' ' Static Partition Specification: ds=2010-03-29/' ' Stats Publishing Key Prefix: pfile:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns aid,bid,t,ctime,etime,l,et' ' columns.types string:string:int:string:bigint:string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/complex_tbl_1' ' name reduce_deduplicate.complex_tbl_1' ' partition_columns ds' ' serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: reduce_deduplicate.complex_tbl_1' ' TotalFiles: 1' ' GatherStats: true' ' MultiFileSpray: false' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' partition:' ' ds 2010-03-29' ' replace: true' ' source: pfile:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns aid,bid,t,ctime,etime,l,et' ' columns.types string:string:int:string:bigint:string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/reduce_deduplicate.db/complex_tbl_1' ' name reduce_deduplicate.complex_tbl_1' ' partition_columns ds' ' serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: reduce_deduplicate.complex_tbl_1' ' tmp directory: pfile:!!{hive.exec.scratchdir}!!' '' ' Stage: Stage-2' ' Stats-Aggr Operator' ' Stats Aggregation Key Prefix: pfile:!!{hive.exec.scratchdir}!!' '' '' 170 rows selected >>> >>> >>> >>> >>> >>> !record