Saving all output to "!!{outputDirectory}!!/bucketizedhiveinputformat.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/bucketizedhiveinputformat.q >>> set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; No rows affected >>> set mapred.min.split.size = 64; No rows affected >>> >>> CREATE TABLE T1(name STRING) STORED AS TEXTFILE; No rows affected >>> >>> LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T1; No rows affected >>> >>> CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE; No rows affected >>> >>> EXPLAIN INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 JOIN (SELECT 'MMM' AS n FROM T1) tmp2 JOIN (SELECT 'MMM' AS n FROM T1) tmp3 ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL name)) (TOK_SELEXPR 'MMM' n)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp2)) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp3) (AND (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp2) n)) (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp3) n))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) name) name)))) ttt)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 5000000)))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-2 depends on stages: Stage-1' ' Stage-3 depends on stages: Stage-2' ' Stage-0 depends on stages: Stage-3' ' Stage-4 depends on stages: Stage-0' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' ttt:tmp1:t1 ' ' TableScan' ' alias: t1' ' Select Operator' ' expressions:' ' expr: name' ' type: string' ' expr: 'MMM'' ' type: string' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' sort order: ' ' tag: 0' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' ttt:tmp2:t1 ' ' TableScan' ' alias: t1' ' Select Operator' ' expressions:' ' expr: 'MMM'' ' type: string' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: 1' ' value expressions:' ' expr: _col0' ' type: string' ' Reduce Operator Tree:' ' Join Operator' ' condition map:' ' Inner Join 0 to 1' ' condition expressions:' ' 0 {VALUE._col0} {VALUE._col1}' ' 1 {VALUE._col0}' ' handleSkewJoin: false' ' outputColumnNames: _col0, _col1, _col2' ' Filter Operator' ' predicate:' ' expr: (_col1 = _col2)' ' type: boolean' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-2' ' Map Reduce' ' Alias -> Map Operator Tree:' ' $INTNAME ' ' Reduce Output Operator' ' key expressions:' ' expr: _col1' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col1' ' type: string' ' tag: 0' ' value expressions:' ' expr: _col0' ' type: string' ' ttt:tmp3:t1 ' ' TableScan' ' alias: t1' ' Select Operator' ' expressions:' ' expr: 'MMM'' ' type: string' ' outputColumnNames: _col0' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: 1' ' Reduce Operator Tree:' ' Join Operator' ' condition map:' ' Inner Join 0 to 1' ' condition expressions:' ' 0 {VALUE._col1}' ' 1 ' ' handleSkewJoin: false' ' outputColumnNames: _col1' ' Select Operator' ' expressions:' ' expr: _col1' ' type: string' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' outputColumnNames: _col0' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-3' ' Map Reduce' ' Alias -> Map Operator Tree:' ' file:!!{hive.exec.scratchdir}!! ' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' Reduce Operator Tree:' ' Extract' ' Limit' ' File Output Operator' ' compressed: false' ' GlobalTableId: 1' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: bucketizedhiveinputformat.t2' '' ' Stage: Stage-0' ' Move Operator' ' tables:' ' replace: true' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: bucketizedhiveinputformat.t2' '' ' Stage: Stage-4' ' Stats-Aggr Operator' '' '' 162 rows selected >>> >>> >>> INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 JOIN (SELECT 'MMM' AS n FROM T1) tmp2 JOIN (SELECT 'MMM' AS n FROM T1) tmp3 ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000; 'name' No rows selected >>> >>> EXPLAIN SELECT COUNT(1) FROM T2; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' t2 ' ' TableScan' ' alias: t2' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' 50 rows selected >>> SELECT COUNT(1) FROM T2; '_c0' '5000000' 1 row selected >>> >>> CREATE TABLE T3(name STRING) STORED AS TEXTFILE; No rows affected >>> LOAD DATA LOCAL INPATH '../data/files/kv1.txt' INTO TABLE T3; No rows affected >>> LOAD DATA LOCAL INPATH '../data/files/kv2.txt' INTO TABLE T3; No rows affected >>> >>> EXPLAIN SELECT COUNT(1) FROM T3; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' t3 ' ' TableScan' ' alias: t3' ' Select Operator' ' Group By Operator' ' aggregations:' ' expr: count(1)' ' bucketGroup: false' ' mode: hash' ' outputColumnNames: _col0' ' Reduce Output Operator' ' sort order: ' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' mode: mergepartial' ' outputColumnNames: _col0' ' Select Operator' ' expressions:' ' expr: _col0' ' type: bigint' ' outputColumnNames: _col0' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' 50 rows selected >>> SELECT COUNT(1) FROM T3; '_c0' '1000' 1 row selected >>> !record