Saving all output to "!!{outputDirectory}!!/rcfile_merge1.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/rcfile_merge1.q >>> set hive.merge.rcfile.block.level=false; No rows affected >>> set hive.exec.dynamic.partition=true; No rows affected >>> set mapred.max.split.size=100; No rows affected >>> set mapref.min.split.size=1; No rows affected >>> >>> DROP TABLE rcfile_merge1; No rows affected >>> DROP TABLE rcfile_merge1b; No rows affected >>> >>> CREATE TABLE rcfile_merge1 (key INT, value STRING) PARTITIONED BY (ds STRING, part STRING) STORED AS RCFILE; No rows affected >>> CREATE TABLE rcfile_merge1b (key INT, value STRING) PARTITIONED BY (ds STRING, part STRING) STORED AS RCFILE; No rows affected >>> >>> -- Use non block-level merge >>> EXPLAIN INSERT OVERWRITE TABLE rcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 100) as part FROM src; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge1) (TOK_PARTSPEC (TOK_PARTVAL ds '1') (TOK_PARTVAL part)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 100) part))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5' ' Stage-4' ' Stage-0 depends on stages: Stage-4, Stage-3, Stage-6' ' Stage-2 depends on stages: Stage-0' ' Stage-3' ' Stage-5' ' Stage-6 depends on stages: Stage-5' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' src ' ' TableScan' ' alias: src' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' expr: pmod(hash(key), 100)' ' type: int' ' outputColumnNames: _col0, _col1, _col2' ' Select Operator' ' expressions:' ' expr: UDFToInteger(_col0)' ' type: int' ' expr: _col1' ' type: string' ' expr: _col2' ' type: int' ' outputColumnNames: _col0, _col1, _col2' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: rcfile_merge1.rcfile_merge1' '' ' Stage: Stage-7' ' Conditional Operator' '' ' Stage: Stage-4' ' Move Operator' ' files:' ' hdfs directory: true' ' destination: pfile:!!{hive.exec.scratchdir}!!' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' partition:' ' ds 1' ' part ' ' replace: true' ' table:' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: rcfile_merge1.rcfile_merge1' '' ' Stage: Stage-2' ' Stats-Aggr Operator' '' ' Stage: Stage-3' ' Map Reduce' ' Alias -> Map Operator Tree:' ' pfile:!!{hive.exec.scratchdir}!! ' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: rcfile_merge1.rcfile_merge1' '' ' Stage: Stage-5' ' Map Reduce' ' Alias -> Map Operator Tree:' ' pfile:!!{hive.exec.scratchdir}!! ' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: rcfile_merge1.rcfile_merge1' '' ' Stage: Stage-6' ' Move Operator' ' files:' ' hdfs directory: true' ' destination: pfile:!!{hive.exec.scratchdir}!!' '' '' 105 rows selected >>> INSERT OVERWRITE TABLE rcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 100) as part FROM src; '_col0','_col1','_col2' No rows selected >>> >>> set hive.merge.rcfile.block.level=true; No rows affected >>> EXPLAIN INSERT OVERWRITE TABLE rcfile_merge1b PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 100) as part FROM src; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME src))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge1b) (TOK_PARTSPEC (TOK_PARTVAL ds '1') (TOK_PARTVAL part)))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value)) (TOK_SELEXPR (TOK_FUNCTION PMOD (TOK_FUNCTION HASH (TOK_TABLE_OR_COL key)) 100) part))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5' ' Stage-4' ' Stage-0 depends on stages: Stage-4, Stage-3, Stage-6' ' Stage-2 depends on stages: Stage-0' ' Stage-3' ' Stage-5' ' Stage-6 depends on stages: Stage-5' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' src ' ' TableScan' ' alias: src' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' expr: pmod(hash(key), 100)' ' type: int' ' outputColumnNames: _col0, _col1, _col2' ' Select Operator' ' expressions:' ' expr: UDFToInteger(_col0)' ' type: int' ' expr: _col1' ' type: string' ' expr: _col2' ' type: int' ' outputColumnNames: _col0, _col1, _col2' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: rcfile_merge1.rcfile_merge1b' '' ' Stage: Stage-7' ' Conditional Operator' '' ' Stage: Stage-4' ' Move Operator' ' files:' ' hdfs directory: true' ' destination: pfile:!!{hive.exec.scratchdir}!!' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' partition:' ' ds 1' ' part ' ' replace: true' ' table:' ' input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe' ' name: rcfile_merge1.rcfile_merge1b' '' ' Stage: Stage-2' ' Stats-Aggr Operator' '' ' Stage: Stage-3' ' Block level merge' '' ' Stage: Stage-5' ' Block level merge' '' ' Stage: Stage-6' ' Move Operator' ' files:' ' hdfs directory: true' ' destination: pfile:!!{hive.exec.scratchdir}!!' '' '' 85 rows selected >>> INSERT OVERWRITE TABLE rcfile_merge1b PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 100) as part FROM src; '_col0','_col1','_col2' No rows selected >>> >>> -- Verify >>> SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM rcfile_merge1 WHERE ds='1' ) t; '_c0' '59521204047' 1 row selected >>> SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(*) USING 'tr \t _' AS (c) FROM rcfile_merge1b WHERE ds='1' ) t; '_c0' '59521204047' 1 row selected >>> >>> DROP TABLE rcfile_merge1; No rows affected >>> DROP TABLE rcfile_merge1b; No rows affected >>> !record