PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20,0.20S) -- SORT_QUERY_RESULTS CREATE TABLE T1(key STRING, val STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T1 PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@t1 POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@t1 PREHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T1 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t1 POSTHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T1 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t1 POSTHOOK: Lineage: t1.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t1.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE outputTbl1(key int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@outputTbl1 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key -- matches the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [t1] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE outputTbl2(key1 int, key2 string, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@outputTbl2 PREHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY POSTHOOK: query: -- no map-side group by even if the group by key is a superset of sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl2 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key TOK_TABLE_OR_COL val STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10001 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:string:int #### A masked pattern was here #### name default.outputtbl2 serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 TotalFiles: 1 GatherStats: true MultiFileSpray: false Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:string:int #### A masked pattern was here #### name default.outputtbl2 serialization.ddl struct outputtbl2 { i32 key1, string key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl2 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl2 POSTHOOK: Lineage: outputtbl2.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl2.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl2.key2 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl2 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl2 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl2 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl2 #### A masked pattern was here #### 1 11 1 2 12 1 3 13 1 7 17 1 8 18 1 8 28 1 PREHOOK: query: -- It should work for sub-queries EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY POSTHOOK: query: -- It should work for sub-queries EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_TABLE_OR_COL val subq1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq1:t1] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: -- It should work for sub-queries with column aliases EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY POSTHOOK: query: -- It should work for sub-queries with column aliases EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key k TOK_SELEXPR TOK_TABLE_OR_COL val v subq1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL k TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL k STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq1:t1] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE outputTbl3(key1 int, key2 int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@outputTbl3 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed -- by a match to the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant followed -- by a match to the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl3 TOK_SELECT TOK_SELEXPR 1 TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY 1 TOK_TABLE_OR_COL key STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: 1 (type: int), key (type: string) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [t1] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 name: default.outputtbl3 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 name: default.outputtbl3 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl3 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl3 POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 SIMPLE [] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl3 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl3 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl3 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl3 #### A masked pattern was here #### 1 1 1 1 2 1 1 3 1 1 7 1 1 8 2 PREHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE outputTbl4(key1 int, key2 int, key3 string, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@outputTbl4 PREHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY POSTHOOK: query: -- no map-side group by if the group by key contains a constant followed by another column EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl4 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR 1 TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key 1 TOK_TABLE_OR_COL val STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string), 1 (type: int), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col3 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 columns.types string,int,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int), _col2 (type: string) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col3 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10001 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 columns.types string,int,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2,_col3 columns.types string,int,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: int), KEY._col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### 1 1 11 1 2 1 12 1 3 1 13 1 7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY POSTHOOK: query: -- no map-side group by if the group by key contains a function EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl3 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR + TOK_TABLE_OR_COL key 1 TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key + TOK_TABLE_OR_COL key 1 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string), (key + 1) (type: double) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: double) mode: partials outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10001 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: double) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 numFiles 1 numRows 5 rawDataSize 25 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 TotalFiles: 1 GatherStats: true MultiFileSpray: false Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,cnt columns.comments columns.types int:int:int #### A masked pattern was here #### name default.outputtbl3 numFiles 1 numRows 5 rawDataSize 25 serialization.ddl struct outputtbl3 { i32 key1, i32 key2, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl3 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl3 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl3 POSTHOOK: Lineage: outputtbl3.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl3.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl3.key2 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl3 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl3 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl3 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl3 #### A masked pattern was here #### 1 2 1 2 3 1 3 4 1 7 8 1 8 9 2 PREHOOK: query: -- it should not matter what follows the group by -- test various cases -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY POSTHOOK: query: -- it should not matter what follows the group by -- test various cases -- group by followed by another group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 cnt TOK_GROUPBY TOK_TABLE_OR_COL key subq1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR + TOK_TABLE_OR_COL key TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION sum TOK_TABLE_OR_COL cnt TOK_GROUPBY + TOK_TABLE_OR_COL key TOK_TABLE_OR_COL key STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: sum(_col1) keys: (_col0 + _col0) (type: double) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq1:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: double) mode: partials outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10001 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: double) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 group by key + key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 14 1 16 2 2 1 4 1 6 1 PREHOOK: query: -- group by followed by a union EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_UNION TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key subq1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:subq1-subquery1:t1, null-subquery2:subq1-subquery2:t1] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 17 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 22 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key, count(1) FROM T1 GROUP BY key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 1 1 2 1 2 1 3 1 3 1 7 1 7 1 8 2 8 2 PREHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a union where one of the sub-queries is map-side group by EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_UNION TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR + TOK_TABLE_OR_COL key TOK_TABLE_OR_COL key key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY + TOK_TABLE_OR_COL key TOK_TABLE_OR_COL key subq1 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-9 is a root stage Stage-10 depends on stages: Stage-9 Stage-2 depends on stages: Stage-10 Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-9 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: (key + key) (type: double) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [null-subquery2:subq1-subquery2:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: double) mode: partials outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-10 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: double) sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: double) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToDouble(_col0) (type: double), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Union Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false TableScan GatherStats: false Union Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types double,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:subq1-subquery1:t1] #### A masked pattern was here #### Stage: Stage-8 Conditional Operator Stage: Stage-5 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-4 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 30 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 40 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 name: default.outputtbl1 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-7 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key UNION ALL SELECT key + key as key, count(1) as cnt FROM T1 GROUP BY key + key ) subq1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), (t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 14 1 16 2 2 1 2 1 3 1 4 1 6 1 7 1 8 2 PREHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a join EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 cnt TOK_GROUPBY TOK_TABLE_OR_COL key subq1 TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 cnt TOK_GROUPBY TOK_TABLE_OR_COL key subq2 = . TOK_TABLE_OR_COL subq1 key . TOK_TABLE_OR_COL subq2 key TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR . TOK_TABLE_OR_COL subq1 key TOK_SELEXPR + . TOK_TABLE_OR_COL subq1 cnt . TOK_TABLE_OR_COL subq2 cnt STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: bigint) auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq1:t1, subq2:t1] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {VALUE._col0} outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger((_col1 + _col3)) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 10 rawDataSize 32 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 42 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 JOIN (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq2 ON subq1.key = subq2.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t1)t1.null, (t1)t1.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 2 2 2 3 2 7 2 8 4 PREHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN (SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 ON subq1.key = subq2.key PREHOOK: type: QUERY POSTHOOK: query: -- group by followed by a join where one of the sub-queries can be performed in the mapper EXPLAIN EXTENDED SELECT * FROM (SELECT key, count(1) FROM T1 GROUP BY key) subq1 JOIN (SELECT key, val, count(1) FROM T1 GROUP BY key, val) subq2 ON subq1.key = subq2.key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key subq1 TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T1 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key TOK_TABLE_OR_COL val subq2 = . TOK_TABLE_OR_COL subq1 key . TOK_TABLE_OR_COL subq2 key TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string), val (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq2:t1] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col2 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE tag: 0 value expressions: _col1 (type: bigint) auto parallelism: false TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE tag: 1 value expressions: _col1 (type: string), _col2 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1,_col2 columns.types string,string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition base file name: t1 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t1 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t1 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 name: default.t1 Truncated Path -> Alias: /t1 [subq1:t1] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {KEY.reducesinkkey0} {VALUE._col0} 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: string), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3,_col4 columns.types string:bigint:string:string:bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE T2(key STRING, val STRING) CLUSTERED BY (key, val) SORTED BY (key, val) INTO 2 BUCKETS STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@T2 PREHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T2 select key, val from T1 PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Output: default@t2 POSTHOOK: query: -- perform an insert to make sure there are 2 files INSERT OVERWRITE TABLE T2 select key, val from T1 POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Output: default@t2 POSTHOOK: Lineage: t2.key SIMPLE [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: t2.val SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY POSTHOOK: query: -- no mapside sort group by if the group by is a prefix of the sorted key EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T2 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl1 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) bucketGroup: true keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [t2] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10001 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key,cnt columns.comments columns.types int:int #### A masked pattern was here #### name default.outputtbl1 numFiles 1 numRows 5 rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 Stage: Stage-3 Stats-Aggr Operator #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl1 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl1 POSTHOOK: Lineage: outputtbl1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl1 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl1 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T2 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl4 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR 1 TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key 1 TOK_TABLE_OR_COL val STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string), 1 (type: int), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [t2] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### 1 1 11 1 2 1 12 1 3 1 13 1 7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE outputTbl5(key1 int, key2 int, key3 string, key4 int, cnt int) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@outputTbl5 PREHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys followed by anything EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY POSTHOOK: query: -- The plan should be converted to a map-side group by if the group by key contains a constant in between the -- sorted keys followed by anything EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T2 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl5 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR 1 TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR 2 TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key 1 TOK_TABLE_OR_COL val 2 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string), 1 (type: int), val (type: string), 2 (type: int) mode: final outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [t2] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 name: default.outputtbl5 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns key1,key2,key3,key4,cnt columns.comments columns.types int:int:string:int:int #### A masked pattern was here #### name default.outputtbl5 serialization.ddl struct outputtbl5 { i32 key1, i32 key2, string key3, i32 key4, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl5 name: default.outputtbl5 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl5 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl5 POSTHOOK: Lineage: outputtbl5.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl5.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl5.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl5.key4 SIMPLE [] PREHOOK: query: SELECT * FROM outputTbl5 ORDER BY key1, key2, key3, key4 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl5 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl5 ORDER BY key1, key2, key3, key4 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl5 #### A masked pattern was here #### 1 1 11 2 1 2 1 12 2 1 3 1 13 2 1 7 1 17 2 1 8 1 18 2 1 8 1 28 2 1 PREHOOK: query: -- contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val PREHOOK: type: QUERY POSTHOOK: query: -- contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T2 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR 1 constant TOK_SELEXPR TOK_TABLE_OR_COL val subq TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl4 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_TABLE_OR_COL constant TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key TOK_TABLE_OR_COL constant TOK_TABLE_OR_COL val STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string), 1 (type: int), val (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string), _col1 (type: int), _col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [subq:t2] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq group by key, constant, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### 1 1 11 1 2 1 12 1 3 1 13 1 7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: -- multiple levels of contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val PREHOOK: type: QUERY POSTHOOK: query: -- multiple levels of contants from sub-queries should work fine EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME T2 TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR 1 constant TOK_SELEXPR TOK_TABLE_OR_COL val subq TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_TABLE_OR_COL constant constant2 TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR 2 constant3 subq2 TOK_INSERT TOK_DESTINATION TOK_TAB TOK_TABNAME outputTbl4 TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL key TOK_SELEXPR TOK_TABLE_OR_COL constant3 TOK_SELEXPR TOK_TABLE_OR_COL val TOK_SELEXPR TOK_FUNCTION count 1 TOK_GROUPBY TOK_TABLE_OR_COL key TOK_TABLE_OR_COL constant3 TOK_TABLE_OR_COL val STAGE DEPENDENCIES: Stage-1 is a root stage Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Select Operator expressions: key (type: string), 2 (type: int), val (type: string) outputColumnNames: _col0, _col3, _col2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string), _col3 (type: int), _col2 (type: string) mode: final outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: true MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: t2 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key columns key,val columns.comments columns.types string:string #### A masked pattern was here #### name default.t2 numFiles 1 numRows 6 rawDataSize 24 serialization.ddl struct t2 { string key, string val} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 30 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 name: default.t2 Truncated Path -> Alias: /t2 [subq2:subq:t2] Stage: Stage-7 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true #### A masked pattern was here #### Stage: Stage-0 Move Operator tables: replace: true #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 Stage: Stage-2 Stats-Aggr Operator #### A masked pattern was here #### Stage: Stage-3 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-5 Map Reduce Map Operator Tree: TableScan GatherStats: false File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -ext-10001 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns key1,key2,key3,cnt columns.comments columns.types int:int:string:int #### A masked pattern was here #### name default.outputtbl4 numFiles 1 numRows 6 rawDataSize 48 serialization.ddl struct outputtbl4 { i32 key1, i32 key2, string key3, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 54 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl4 name: default.outputtbl4 Truncated Path -> Alias: #### A masked pattern was here #### Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@outputtbl4 POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( SELECT key, constant as constant2, val, 2 as constant3 from (SELECT key, 1 as constant, val from T2)subq )subq2 group by key, constant3, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@outputtbl4 POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: SELECT * FROM outputTbl4 PREHOOK: type: QUERY PREHOOK: Input: default@outputtbl4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM outputTbl4 POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl4 #### A masked pattern was here #### 1 2 11 1 2 2 12 1 3 2 13 1 7 2 17 1 8 2 18 1 8 2 28 1 PREHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE DEST1(key INT, cnt INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@DEST1 PREHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: CREATE TABLE DEST2(key INT, val STRING, cnt INT) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@DEST2 PREHOOK: query: EXPLAIN FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string) outputColumnNames: key Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) bucketGroup: true keys: key (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Select Operator expressions: key (type: string), val (type: string) outputColumnNames: key, val Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: key (type: string), val (type: string) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Stage: Stage-5 Stats-Aggr Operator PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 POSTHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: select * from DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### POSTHOOK: query: select * from DEST1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### 1 1 2 1 3 1 7 1 8 2 PREHOOK: query: select * from DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 #### A masked pattern was here #### POSTHOOK: query: select * from DEST2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### 1 11 1 2 12 1 3 13 1 7 17 1 8 18 1 8 28 1 PREHOOK: query: -- multi-table insert with a sub-query EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY POSTHOOK: query: -- multi-table insert with a sub-query EXPLAIN FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: t2 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (key = 8) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: val (type: string) outputColumnNames: _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: '8' (type: string) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Select Operator expressions: '8' (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Map Reduce Map Operator Tree: TableScan Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Stage: Stage-4 Stats-Aggr Operator Stage: Stage-1 Move Operator tables: replace: true table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Stage: Stage-5 Stats-Aggr Operator PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val PREHOOK: type: QUERY PREHOOK: Input: default@t2 PREHOOK: Output: default@dest1 PREHOOK: Output: default@dest2 POSTHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Output: default@dest1 POSTHOOK: Output: default@dest2 POSTHOOK: Lineage: dest1.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest1.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.cnt EXPRESSION [(t2)t2.null, ] POSTHOOK: Lineage: dest2.key EXPRESSION [(t2)t2.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest2.val SIMPLE [(t2)t2.FieldSchema(name:val, type:string, comment:null), ] PREHOOK: query: select * from DEST1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 #### A masked pattern was here #### POSTHOOK: query: select * from DEST1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### 8 2 PREHOOK: query: select * from DEST2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 #### A masked pattern was here #### POSTHOOK: query: select * from DEST2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### 8 18 1 8 28 1