PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@srcpartbucket POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) create table srcpartbucket (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 4 buckets POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@srcpartbucket PREHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@srcpartbucket POSTHOOK: query: insert overwrite table srcpartbucket partition(ds, hr) select * from srcpart where ds is not null and key < 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpart POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=11 POSTHOOK: Output: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Output: default@srcpartbucket@ds=2008-04-09/hr=12 POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: srcpartbucket PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: explain extended select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC PREHOOK: type: QUERY POSTHOOK: query: explain extended select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC POSTHOOK: type: QUERY ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_TABREF TOK_TABNAME srcpartbucket TOK_TABLEBUCKETSAMPLE 1 4 TOK_TABLE_OR_COL key TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_TABLE_OR_COL ds TOK_SELEXPR TOK_FUNCTION count 1 TOK_WHERE TOK_FUNCTION TOK_ISNOTNULL TOK_TABLE_OR_COL ds TOK_GROUPBY TOK_TABLE_OR_COL ds TOK_ORDERBY TOK_TABSORTCOLNAMEASC TOK_TABLE_OR_COL ds STAGE DEPENDENCIES: Stage-1 is a root stage Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: srcpartbucket Statistics: Num rows: 40 Data size: 240 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: true predicate: (((hash(key) & 2147483647) % 4) = 0) (type: boolean) sampleDesc: BUCKET 1 OUT OF 4 Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: ds (type: string) outputColumnNames: ds Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 120 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: ds 2008-04-08 hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket numFiles 4 numRows 10 partition_columns ds/hr partition_columns.types string:string rawDataSize 60 serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe totalSize 351 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket partition_columns ds/hr partition_columns.types string:string serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.srcpartbucket name: default.srcpartbucket #### A masked pattern was here #### Partition base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: ds 2008-04-08 hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket numFiles 4 numRows 10 partition_columns ds/hr partition_columns.types string:string rawDataSize 60 serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe totalSize 351 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket partition_columns ds/hr partition_columns.types string:string serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.srcpartbucket name: default.srcpartbucket #### A masked pattern was here #### Partition base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: ds 2008-04-09 hr 11 properties: COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket numFiles 4 numRows 10 partition_columns ds/hr partition_columns.types string:string rawDataSize 60 serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe totalSize 351 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket partition_columns ds/hr partition_columns.types string:string serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.srcpartbucket name: default.srcpartbucket #### A masked pattern was here #### Partition base file name: 000000_0 input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat partition values: ds 2008-04-09 hr 12 properties: COLUMN_STATS_ACCURATE true bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket numFiles 4 numRows 10 partition_columns ds/hr partition_columns.types string:string rawDataSize 60 serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe totalSize 351 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat properties: bucket_count 4 bucket_field_name key columns key,value columns.comments columns.types string:string #### A masked pattern was here #### name default.srcpartbucket partition_columns ds/hr partition_columns.types string:string serialization.ddl struct srcpartbucket { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.srcpartbucket name: default.srcpartbucket Truncated Path -> Alias: /srcpartbucket/ds=2008-04-08/hr=11/000000_0 [srcpartbucket] /srcpartbucket/ds=2008-04-08/hr=12/000000_0 [srcpartbucket] /srcpartbucket/ds=2008-04-09/hr=11/000000_0 [srcpartbucket] /srcpartbucket/ds=2008-04-09/hr=12/000000_0 [srcpartbucket] Needs Tagging: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-2 Map Reduce Map Operator Tree: TableScan GatherStats: false Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: columns _col0,_col1 columns.types string,bigint escape.delim \ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### Needs Tagging: false Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 10 Data size: 60 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1 columns.types string:bigint escape.delim \ hive.serialization.extend.additional.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC PREHOOK: type: QUERY PREHOOK: Input: default@srcpartbucket PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 4 on key) where ds is not null group by ds ORDER BY ds ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpartbucket POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2008-04-08 10 2008-04-09 10 PREHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC PREHOOK: type: QUERY PREHOOK: Input: default@srcpartbucket PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select ds, count(1) from srcpartbucket tablesample (bucket 1 out of 2 on key) where ds is not null group by ds ORDER BY ds ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpartbucket POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### 2008-04-08 12 2008-04-09 12 PREHOOK: query: select * from srcpartbucket where ds is not null ORDER BY key ASC, value ASC, ds ASC, hr ASC PREHOOK: type: QUERY PREHOOK: Input: default@srcpartbucket PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### POSTHOOK: query: select * from srcpartbucket where ds is not null ORDER BY key ASC, value ASC, ds ASC, hr ASC POSTHOOK: type: QUERY POSTHOOK: Input: default@srcpartbucket POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpartbucket@ds=2008-04-09/hr=12 #### A masked pattern was here #### 0 val_0 2008-04-08 11 0 val_0 2008-04-08 11 0 val_0 2008-04-08 11 0 val_0 2008-04-08 12 0 val_0 2008-04-08 12 0 val_0 2008-04-08 12 0 val_0 2008-04-09 11 0 val_0 2008-04-09 11 0 val_0 2008-04-09 11 0 val_0 2008-04-09 12 0 val_0 2008-04-09 12 0 val_0 2008-04-09 12 2 val_2 2008-04-08 11 2 val_2 2008-04-08 12 2 val_2 2008-04-09 11 2 val_2 2008-04-09 12 4 val_4 2008-04-08 11 4 val_4 2008-04-08 12 4 val_4 2008-04-09 11 4 val_4 2008-04-09 12 5 val_5 2008-04-08 11 5 val_5 2008-04-08 11 5 val_5 2008-04-08 11 5 val_5 2008-04-08 12 5 val_5 2008-04-08 12 5 val_5 2008-04-08 12 5 val_5 2008-04-09 11 5 val_5 2008-04-09 11 5 val_5 2008-04-09 11 5 val_5 2008-04-09 12 5 val_5 2008-04-09 12 5 val_5 2008-04-09 12 8 val_8 2008-04-08 11 8 val_8 2008-04-08 12 8 val_8 2008-04-09 11 8 val_8 2008-04-09 12 9 val_9 2008-04-08 11 9 val_9 2008-04-08 12 9 val_9 2008-04-09 11 9 val_9 2008-04-09 12