PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS -- List bucketing query logic test case. -- Test condition: -- 1. where clause has only one skewed column -- 2. where clause doesn't have non-skewed column -- Test focus: -- 1. list bucketing query logic works fine for subquery -- Test result: -- 1. pruner only pick up right directory -- 2. query result is right -- create 2 tables: fact_daily and fact_tz -- fact_daily will be used for list bucketing query -- fact_tz is a table used to prepare data and test directories CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@fact_daily POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- SORT_QUERY_RESULTS -- List bucketing query logic test case. -- Test condition: -- 1. where clause has only one skewed column -- 2. where clause doesn't have non-skewed column -- Test focus: -- 1. list bucketing query logic works fine for subquery -- Test result: -- 1. pruner only pick up right directory -- 2. query result is right -- create 2 tables: fact_daily and fact_tz -- fact_daily will be used for list bucketing query -- fact_tz is a table used to prepare data and test directories CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING) POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@fact_daily PREHOOK: query: CREATE TABLE fact_tz(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING) #### A masked pattern was here #### PREHOOK: type: CREATETABLE #### A masked pattern was here #### PREHOOK: Output: database:default PREHOOK: Output: default@fact_tz POSTHOOK: query: CREATE TABLE fact_tz(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING) #### A masked pattern was here #### POSTHOOK: type: CREATETABLE #### A masked pattern was here #### POSTHOOK: Output: database:default POSTHOOK: Output: default@fact_tz PREHOOK: query: -- create /fact_tz/ds=1/hr=1 directory INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='1') SELECT key, value FROM src WHERE key=484 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@fact_tz@ds=1/hr=1 POSTHOOK: query: -- create /fact_tz/ds=1/hr=1 directory INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='1') SELECT key, value FROM src WHERE key=484 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@fact_tz@ds=1/hr=1 POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- create /fact_tz/ds=1/hr=2 directory INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='2') SELECT key+11, value FROM src WHERE key=484 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@fact_tz@ds=1/hr=2 POSTHOOK: query: -- create /fact_tz/ds=1/hr=2 directory INSERT OVERWRITE TABLE fact_tz PARTITION (ds='1', hr='2') SELECT key+11, value FROM src WHERE key=484 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@fact_tz@ds=1/hr=2 POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_tz PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] #### A masked pattern was here #### PREHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_tz/ds=1 alter table fact_daily skewed by (x) on (484) PREHOOK: type: ALTERTABLE_SKEWED PREHOOK: Input: default@fact_daily PREHOOK: Output: default@fact_daily POSTHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_tz/ds=1 alter table fact_daily skewed by (x) on (484) POSTHOOK: type: ALTERTABLE_SKEWED POSTHOOK: Input: default@fact_daily POSTHOOK: Output: default@fact_daily PREHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE') PREHOOK: type: ALTERTABLE_PROPERTIES PREHOOK: Input: default@fact_daily PREHOOK: Output: default@fact_daily POSTHOOK: query: ALTER TABLE fact_daily SET TBLPROPERTIES('EXTERNAL'='TRUE') POSTHOOK: type: ALTERTABLE_PROPERTIES POSTHOOK: Input: default@fact_daily POSTHOOK: Output: default@fact_daily PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1') #### A masked pattern was here #### PREHOOK: type: ALTERTABLE_ADDPARTS #### A masked pattern was here #### PREHOOK: Output: default@fact_daily POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1') #### A masked pattern was here #### POSTHOOK: type: ALTERTABLE_ADDPARTS #### A masked pattern was here #### POSTHOOK: Output: default@fact_daily POSTHOOK: Output: default@fact_daily@ds=1 PREHOOK: query: -- set List Bucketing location map #### A masked pattern was here #### PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION PREHOOK: Input: default@fact_daily PREHOOK: Output: default@fact_daily@ds=1 #### A masked pattern was here #### POSTHOOK: query: -- set List Bucketing location map #### A masked pattern was here #### POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1 POSTHOOK: Output: default@fact_daily@ds=1 #### A masked pattern was here #### PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1') PREHOOK: type: DESCTABLE PREHOOK: Input: default@fact_daily POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1') POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@fact_daily # col_name data_type comment x int y string # Partition Information # col_name data_type comment ds string # Detailed Partition Information Partition Value: [1] Database: default Table: fact_daily #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: COLUMN_STATS_ACCURATE false #### A masked pattern was here #### numFiles 2 numRows -1 rawDataSize -1 totalSize 24 #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Skewed Columns: [x] Skewed Values: [[484]] #### A masked pattern was here #### Storage Desc Params: serialization.format 1 PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily PREHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### 484 val_484 1 495 val_484 1 PREHOOK: query: -- The first subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select x from (select x from fact_daily where ds = '1') subq where x = 484 PREHOOK: type: QUERY POSTHOOK: query: -- The first subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select x from (select x from fact_daily where ds = '1') subq where x = 484 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME fact_daily TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL x TOK_WHERE = TOK_TABLE_OR_COL ds '1' subq TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL x TOK_WHERE = TOK_TABLE_OR_COL x 484 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: fact_daily Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (x = 484) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 484 (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0 columns.types int escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: x=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 properties: COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 2 numRows -1 partition_columns ds partition_columns.types string rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: EXTERNAL TRUE bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily partition_columns ds partition_columns.types string serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: -- List Bucketing Query select x from (select * from fact_daily where ds = '1') subq where x = 484 PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily PREHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### POSTHOOK: query: -- List Bucketing Query select x from (select * from fact_daily where ds = '1') subq where x = 484 POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### 484 PREHOOK: query: -- The second subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 PREHOOK: type: QUERY POSTHOOK: query: -- The second subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME fact_daily TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL x x1 TOK_SELEXPR TOK_TABLE_OR_COL y y1 TOK_WHERE = TOK_TABLE_OR_COL ds '1' subq TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL x1 TOK_SELEXPR TOK_TABLE_OR_COL y1 TOK_WHERE = TOK_TABLE_OR_COL x1 484 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: fact_daily Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (x = 484) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: 484 (type: int), y (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1 columns.types int:string escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: x=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 properties: COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 2 numRows -1 partition_columns ds partition_columns.types string rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: EXTERNAL TRUE bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily partition_columns ds partition_columns.types string serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: /fact_tz/ds=1/x=484 [$hdt$_0:fact_daily] Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: -- List Bucketing Query select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily PREHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### POSTHOOK: query: -- List Bucketing Query select x1, y1 from(select x as x1, y as y1 from fact_daily where ds ='1') subq where x1 = 484 POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### 484 val_484 PREHOOK: query: -- The third subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select y, count(1) from fact_daily where ds ='1' and x = 484 group by y PREHOOK: type: QUERY POSTHOOK: query: -- The third subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select y, count(1) from fact_daily where ds ='1' and x = 484 group by y POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME fact_daily TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL y TOK_SELEXPR TOK_FUNCTION count 1 TOK_WHERE and = TOK_TABLE_OR_COL ds '1' = TOK_TABLE_OR_COL x 484 TOK_GROUPBY TOK_TABLE_OR_COL y STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: fact_daily Statistics: Num rows: 0 Data size: 24 Basic stats: PARTIAL Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (x = 484) (type: boolean) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: y (type: string) outputColumnNames: _col0 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: x=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 properties: COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 2 numRows -1 partition_columns ds partition_columns.types string rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: EXTERNAL TRUE bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily partition_columns ds partition_columns.types string serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: /fact_tz/ds=1/x=484 [$hdt$_0:$hdt$_0:fact_daily] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1 columns.types string:bigint escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: -- List Bucketing Query select y, count(1) from fact_daily where ds ='1' and x = 484 group by y PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily PREHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### POSTHOOK: query: -- List Bucketing Query select y, count(1) from fact_daily where ds ='1' and x = 484 group by y POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### val_484 1 PREHOOK: query: -- The fourth subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select x, c from (select x, count(1) as c from fact_daily where ds = '1' group by x) subq where x = 484 PREHOOK: type: QUERY POSTHOOK: query: -- The fourth subquery -- explain plan shows which directory selected: Truncated Path -> Alias explain extended select x, c from (select x, count(1) as c from fact_daily where ds = '1' group by x) subq where x = 484 POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_SUBQUERY TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME fact_daily TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL x TOK_SELEXPR TOK_FUNCTION count 1 c TOK_WHERE = TOK_TABLE_OR_COL ds '1' TOK_GROUPBY TOK_TABLE_OR_COL x subq TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL x TOK_SELEXPR TOK_TABLE_OR_COL c TOK_WHERE = TOK_TABLE_OR_COL x 484 STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: fact_daily Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false predicate: (x = 484) (type: boolean) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: 484 (type: int) outputColumnNames: _col0 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: x=484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 properties: COLUMN_STATS_ACCURATE false bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 2 numRows -1 partition_columns ds partition_columns.types string rawDataSize -1 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 24 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: EXTERNAL TRUE bucket_count -1 columns x,y columns.comments columns.types int:string #### A masked pattern was here #### name default.fact_daily partition_columns ds partition_columns.types string serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: /fact_tz/ds=1/x=484 [$hdt$_0:$hdt$_0:fact_daily] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1 columns.types int:bigint escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: -- List Bucketing Query select x, c from (select x, count(1) as c from fact_daily where ds = '1' group by x) subq where x = 484 PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily PREHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### POSTHOOK: query: -- List Bucketing Query select x, c from (select x, count(1) as c from fact_daily where ds = '1' group by x) subq where x = 484 POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1 #### A masked pattern was here #### 484 1