-- Licensed to the Apache Software Foundation (ASF) under one or more -- contributor license agreements. See the NOTICE file distributed with -- this work for additional information regarding copyright ownership. -- The ASF licenses this file to You under the Apache License, Version 2.0 -- (the "License") you may not use this file except in compliance with -- the License. You may obtain a copy of the License at -- -- http://www.apache.org/licenses/LICENSE-2.0 -- -- Unless required by applicable law or agreed to in writing, software -- distributed under the License is distributed on an "AS IS" BASIS, -- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -- See the License for the specific language governing permissions and -- limitations under the License. set hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat set mapred.min.split.size = 64 CREATE TABLE T1(name STRING) STORED AS TEXTFILE LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T1 Copying file: file:/var/lib/hudson/workspace/Nightly-smoke-testing-monster/examples/hive/target/seed_data_files/kv1.txt CREATE TABLE T2(name STRING) STORED AS SEQUENCEFILE EXPLAIN INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 JOIN (SELECT 'MMM' AS n FROM T1) tmp2 JOIN (SELECT 'MMM' AS n FROM T1) tmp3 ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL name)) (TOK_SELEXPR 'MMM' n)))) tmp1) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp2)) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T1))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR 'MMM' n)))) tmp3) (AND (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp2) n)) (= (. (TOK_TABLE_OR_COL tmp1) n) (. (TOK_TABLE_OR_COL tmp3) n))))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL tmp1) name) name)))) ttt)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME T2))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_LIMIT 5000000))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: ttt:tmp1:t1 TableScan alias: t1 Select Operator expressions: expr: name type: string expr: 'MMM' type: string outputColumnNames: _col0, _col1 Reduce Output Operator sort order: tag: 0 value expressions: expr: _col0 type: string expr: _col1 type: string ttt:tmp2:t1 TableScan alias: t1 Select Operator expressions: expr: 'MMM' type: string outputColumnNames: _col0 Reduce Output Operator sort order: tag: 1 value expressions: expr: _col0 type: string Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} handleSkewJoin: false outputColumnNames: _col0, _col1, _col2 Filter Operator predicate: expr: (_col1 = _col2) type: boolean File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: $INTNAME Reduce Output Operator key expressions: expr: _col1 type: string sort order: + Map-reduce partition columns: expr: _col1 type: string tag: 0 value expressions: expr: _col0 type: string ttt:tmp3:t1 TableScan alias: t1 Select Operator expressions: expr: 'MMM' type: string outputColumnNames: _col0 Reduce Output Operator key expressions: expr: _col0 type: string sort order: + Map-reduce partition columns: expr: _col0 type: string tag: 1 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col1} 1 handleSkewJoin: false outputColumnNames: _col1 Select Operator expressions: expr: _col1 type: string outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: string outputColumnNames: _col0 Limit File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: hdfs://monster01.sf.cloudera.com:17020/tmp/hive-hudson/hive_2011-04-01_12-29-52_242_1273761163922951124/-mr-10003 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: string Reduce Operator Tree: Extract Limit File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 Stage: Stage-4 Stats-Aggr Operator INSERT OVERWRITE TABLE T2 SELECT * FROM ( SELECT tmp1.name as name FROM ( SELECT name, 'MMM' AS n FROM T1) tmp1 JOIN (SELECT 'MMM' AS n FROM T1) tmp2 JOIN (SELECT 'MMM' AS n FROM T1) tmp3 ON tmp1.n = tmp2.n AND tmp1.n = tmp3.n) ttt LIMIT 5000000 Deleted hdfs://monster01.sf.cloudera.com:17020/user/hive/warehouse/t2 EXPLAIN SELECT COUNT(1) FROM T2 ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T2))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t2 TableScan alias: t2 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 SELECT COUNT(1) FROM T2 5000000 CREATE TABLE T3(name STRING) STORED AS TEXTFILE LOAD DATA LOCAL INPATH 'seed_data_files/kv1.txt' INTO TABLE T3 Copying file: file:/var/lib/hudson/workspace/Nightly-smoke-testing-monster/examples/hive/target/seed_data_files/kv1.txt LOAD DATA LOCAL INPATH 'seed_data_files/kv2.txt' INTO TABLE T3 Copying file: file:/var/lib/hudson/workspace/Nightly-smoke-testing-monster/examples/hive/target/seed_data_files/kv2.txt EXPLAIN SELECT COUNT(1) FROM T3 ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME T3))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION COUNT 1))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: t3 TableScan alias: t3 Select Operator Group By Operator aggregations: expr: count(1) bucketGroup: false mode: hash outputColumnNames: _col0 Reduce Output Operator sort order: tag: -1 value expressions: expr: _col0 type: bigint Reduce Operator Tree: Group By Operator aggregations: expr: count(VALUE._col0) bucketGroup: false mode: mergepartial outputColumnNames: _col0 Select Operator expressions: expr: _col0 type: bigint outputColumnNames: _col0 File Output Operator compressed: false GlobalTableId: 0 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 SELECT COUNT(1) FROM T3 1000