PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- List bucketing query logic test case. We simulate the directory structure by DML here. -- Test condition: -- 1. where clause has multiple skewed columns and non-skewed columns -- 3. where clause has a few operators -- Test focus: -- 1. basic list bucketing query work -- Test result: -- 1. pruner only pick up right directory -- 2. query result is right -- create 1 table: fact_daily -- 1. create a few partitions -- 2. dfs move partition according to list bucketing structure (simulate DML) -- $/fact_daily/ds=1/hr=4/x=../y=.. -- notes: waste all partitions except ds=1 and hr=4 for list bucketing query test -- 3. alter it to skewed table and set up location map -- 4. list bucketing query -- fact_daily (ds=1 and hr=4) will be used for list bucketing query CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING) #### A masked pattern was here #### PREHOOK: type: CREATETABLE POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.23) -- List bucketing query logic test case. We simulate the directory structure by DML here. -- Test condition: -- 1. where clause has multiple skewed columns and non-skewed columns -- 3. where clause has a few operators -- Test focus: -- 1. basic list bucketing query work -- Test result: -- 1. pruner only pick up right directory -- 2. query result is right -- create 1 table: fact_daily -- 1. create a few partitions -- 2. dfs move partition according to list bucketing structure (simulate DML) -- $/fact_daily/ds=1/hr=4/x=../y=.. -- notes: waste all partitions except ds=1 and hr=4 for list bucketing query test -- 3. alter it to skewed table and set up location map -- 4. list bucketing query -- fact_daily (ds=1 and hr=4) will be used for list bucketing query CREATE TABLE fact_daily(x int, y STRING) PARTITIONED BY (ds STRING, hr STRING) #### A masked pattern was here #### POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@fact_daily PREHOOK: query: -- create /fact_daily/ds=1/hr=1 directory INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1') SELECT key, value FROM src WHERE key=484 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@fact_daily@ds=1/hr=1 POSTHOOK: query: -- create /fact_daily/ds=1/hr=1 directory INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='1') SELECT key, value FROM src WHERE key=484 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@fact_daily@ds=1/hr=1 POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- create /fact_daily/ds=1/hr=2 directory INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2') SELECT key, value FROM src WHERE key=369 or key=406 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@fact_daily@ds=1/hr=2 POSTHOOK: query: -- create /fact_daily/ds=1/hr=2 directory INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='2') SELECT key, value FROM src WHERE key=369 or key=406 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@fact_daily@ds=1/hr=2 POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- create /fact_daily/ds=1/hr=3 directory INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3') SELECT key, value FROM src WHERE key=238 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@fact_daily@ds=1/hr=3 POSTHOOK: query: -- create /fact_daily/ds=1/hr=3 directory INSERT OVERWRITE TABLE fact_daily PARTITION (ds='1', hr='3') SELECT key, value FROM src WHERE key=238 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@fact_daily@ds=1/hr=3 POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] #### A masked pattern was here #### PREHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_daily/ds=1 alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238')) PREHOOK: type: ALTERTABLE_SKEWED PREHOOK: Input: default@fact_daily PREHOOK: Output: default@fact_daily POSTHOOK: query: -- switch fact_daily to skewed table and point its location to /fact_daily/ds=1 alter table fact_daily skewed by (x,y) on ((484,'val_484'),(238,'val_238')) POSTHOOK: type: ALTERTABLE_SKEWED POSTHOOK: Input: default@fact_daily POSTHOOK: Output: default@fact_daily POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4') PREHOOK: type: ALTERTABLE_ADDPARTS PREHOOK: Input: default@fact_daily POSTHOOK: query: ALTER TABLE fact_daily ADD PARTITION (ds='1', hr='4') POSTHOOK: type: ALTERTABLE_ADDPARTS POSTHOOK: Input: default@fact_daily POSTHOOK: Output: default@fact_daily@ds=1/hr=4 POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: -- set List Bucketing location map #### A masked pattern was here #### PREHOOK: type: ALTERTBLPART_SKEWED_LOCATION PREHOOK: Input: default@fact_daily PREHOOK: Output: default@fact_daily@ds=1/hr=4 POSTHOOK: query: -- set List Bucketing location map #### A masked pattern was here #### POSTHOOK: type: ALTERTBLPART_SKEWED_LOCATION POSTHOOK: Input: default@fact_daily POSTHOOK: Input: default@fact_daily@ds=1/hr=4 POSTHOOK: Output: default@fact_daily@ds=1/hr=4 POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4') PREHOOK: type: DESCTABLE POSTHOOK: query: describe formatted fact_daily PARTITION (ds = '1', hr='4') POSTHOOK: type: DESCTABLE POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] # col_name data_type comment x int None y string None # Partition Information # col_name data_type comment ds string None hr string None # Detailed Partition Information Partition Value: [1, 4] Database: default Table: fact_daily #### A masked pattern was here #### Protect Mode: None #### A masked pattern was here #### Partition Parameters: #### A masked pattern was here #### # Storage Information SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No Num Buckets: -1 Bucket Columns: [] Sort Columns: [] Skewed Columns: [x, y] Skewed Values: [[484, val_484], [238, val_238]] #### A masked pattern was here #### Storage Desc Params: serialization.format 1 PREHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='4' PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM fact_daily WHERE ds='1' and hr='4' POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 369 val_369 1 4 406 val_406 1 4 369 val_369 1 4 369 val_369 1 4 406 val_406 1 4 406 val_406 1 4 406 val_406 1 4 238 val_238 1 4 238 val_238 1 4 484 val_484 1 4 PREHOOK: query: -- pruner only pick up default directory -- explain plan shows which directory selected: Truncated Path -> Alias explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484' PREHOOK: type: QUERY POSTHOOK: query: -- pruner only pick up default directory -- explain plan shows which directory selected: Truncated Path -> Alias explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484' POSTHOOK: type: QUERY POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (= (TOK_TABLE_OR_COL y) 'val_484'))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: fact_daily TableScan alias: fact_daily GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (y = 'val_484') type: boolean Select Operator expressions: expr: x type: int expr: y type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1 columns.types int:string escape.delim \ serialization.format 1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 hr 4 properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily #### A masked pattern was here #### Partition base file name: y=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 hr 4 properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily] /fact_daily/ds=1/hr=4/x=484/y=val_484 [fact_daily] Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- List Bucketing Query SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484' PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: query: -- List Bucketing Query SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and y= 'val_484' POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 484 val_484 PREHOOK: query: -- pruner only pick up default directory -- explain plan shows which directory selected: Truncated Path -> Alias explain extended SELECT x FROM fact_daily WHERE ds='1' and hr='4' and x= 406 PREHOOK: type: QUERY POSTHOOK: query: -- pruner only pick up default directory -- explain plan shows which directory selected: Truncated Path -> Alias explain extended SELECT x FROM fact_daily WHERE ds='1' and hr='4' and x= 406 POSTHOOK: type: QUERY POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (= (TOK_TABLE_OR_COL x) 406))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: fact_daily TableScan alias: fact_daily GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (x = 406) type: boolean Select Operator expressions: expr: x type: int outputColumnNames: _col0 File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0 columns.types int escape.delim \ serialization.format 1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 hr 4 properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: /fact_daily/ds=1/hr=4/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME/HIVE_DEFAULT_LIST_BUCKETING_DIR_NAME [fact_daily] Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- List Bucketing Query SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and x= 406 PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: query: -- List Bucketing Query SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and x= 406 POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 406 val_406 406 val_406 406 val_406 406 val_406 PREHOOK: query: -- pruner only pick up skewed-value directory -- explain plan shows which directory selected: Truncated Path -> Alias explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238')) PREHOOK: type: QUERY POSTHOOK: query: -- pruner only pick up skewed-value directory -- explain plan shows which directory selected: Truncated Path -> Alias explain extended SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238')) POSTHOOK: type: QUERY POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME fact_daily))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL x)) (TOK_SELEXPR (TOK_TABLE_OR_COL y))) (TOK_WHERE (and (and (= (TOK_TABLE_OR_COL ds) '1') (= (TOK_TABLE_OR_COL hr) '4')) (or (and (= (TOK_TABLE_OR_COL x) 484) (= (TOK_TABLE_OR_COL y) 'val_484')) (and (= (TOK_TABLE_OR_COL x) 238) (= (TOK_TABLE_OR_COL y) 'val_238'))))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: fact_daily TableScan alias: fact_daily GatherStats: false Filter Operator isSamplingPred: false predicate: expr: (((x = 484) and (y = 'val_484')) or ((x = 238) and (y = 'val_238'))) type: boolean Select Operator expressions: expr: x type: int expr: y type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1 columns.types int:string escape.delim \ serialization.format 1 TotalFiles: 1 GatherStats: false MultiFileSpray: false Needs Tagging: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: y=val_238 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 hr 4 properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily #### A masked pattern was here #### Partition base file name: y=val_484 input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat partition values: ds 1 hr 4 properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: bucket_count -1 columns x,y columns.types int:string #### A masked pattern was here #### name default.fact_daily numFiles 3 numPartitions 3 numRows 10 partition_columns ds/hr rawDataSize 110 serialization.ddl struct fact_daily { i32 x, string y} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 120 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.fact_daily name: default.fact_daily Truncated Path -> Alias: /fact_daily/ds=1/hr=4/x=238/y=val_238 [fact_daily] /fact_daily/ds=1/hr=4/x=484/y=val_484 [fact_daily] Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- List Bucketing Query SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238')) PREHOOK: type: QUERY PREHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: query: -- List Bucketing Query SELECT x,y FROM fact_daily WHERE ds='1' and hr='4' and ( (x=484 and y ='val_484') or (x=238 and y= 'val_238')) POSTHOOK: type: QUERY POSTHOOK: Input: default@fact_daily@ds=1/hr=4 #### A masked pattern was here #### POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] 238 val_238 238 val_238 484 val_484 PREHOOK: query: -- clean up drop table fact_daily PREHOOK: type: DROPTABLE PREHOOK: Input: default@fact_daily PREHOOK: Output: default@fact_daily POSTHOOK: query: -- clean up drop table fact_daily POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@fact_daily POSTHOOK: Output: default@fact_daily POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=1).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=2).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).x EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: fact_daily PARTITION(ds=1,hr=3).y SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]