Saving all output to "!!{outputDirectory}!!/rand_partitionpruner3.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/rand_partitionpruner3.q >>> set hive.optimize.ppd=true; No rows affected >>> -- complex predicates in the where clause >>> >>> explain extended select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (and (and (and (< (TOK_FUNCTION rand 1) 0.1) (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08')) (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2')))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' a ' ' TableScan' ' alias: a' ' GatherStats: false' ' Filter Operator' ' isSamplingPred: false' ' predicate:' ' expr: ((rand(1) < 0.1) and (not ((key > 50.0) or (key < 10.0))))' ' type: boolean' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' expr: ds' ' type: string' ' expr: hr' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' directory: file:!!{hive.exec.scratchdir}!!' ' NumFilesPerFileSink: 1' ' Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' columns _col0,_col1,_col2,_col3' ' columns.types string:string:string:string' ' escape.delim \' ' serialization.format 1' ' TotalFiles: 1' ' GatherStats: false' ' MultiFileSpray: false' ' Needs Tagging: false' ' Path -> Alias:' ' !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart/ds=2008-04-08/hr=12 [a]' ' Path -> Partition:' ' !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart/ds=2008-04-08/hr=12 ' ' Partition' ' base file name: hr=12' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' partition values:' ' ds 2008-04-08' ' hr 12' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart/ds=2008-04-08/hr=12' ' name rand_partitionpruner3.srcpart' ' numFiles 1' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 5812' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' ' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart' ' name rand_partitionpruner3.srcpart' ' numFiles 4' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 23248' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: rand_partitionpruner3.srcpart' ' name: rand_partitionpruner3.srcpart' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 109 rows selected >>> select a.* from srcpart a where rand(1) < 0.1 and a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'; 'key','value','ds','hr' '42','val_42','2008-04-08','12' '44','val_44','2008-04-08','12' '26','val_26','2008-04-08','12' '18','val_18','2008-04-08','12' '37','val_37','2008-04-08','12' 5 rows selected >>> >>> -- without rand for comparison >>> explain extended select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME srcpart) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME a)))) (TOK_WHERE (and (and (= (. (TOK_TABLE_OR_COL a) ds) '2008-04-08') (not (or (> (TOK_TABLE_OR_COL key) 50) (< (TOK_TABLE_OR_COL key) 10)))) (like (. (TOK_TABLE_OR_COL a) hr) '%2')))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' a ' ' TableScan' ' alias: a' ' GatherStats: false' ' Filter Operator' ' isSamplingPred: false' ' predicate:' ' expr: (not ((key > 50.0) or (key < 10.0)))' ' type: boolean' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' expr: value' ' type: string' ' expr: ds' ' type: string' ' expr: hr' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' directory: file:!!{hive.exec.scratchdir}!!' ' NumFilesPerFileSink: 1' ' Stats Publishing Key Prefix: file:!!{hive.exec.scratchdir}!!' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' columns _col0,_col1,_col2,_col3' ' columns.types string:string:string:string' ' escape.delim \' ' serialization.format 1' ' TotalFiles: 1' ' GatherStats: false' ' MultiFileSpray: false' ' Needs Tagging: false' ' Path -> Alias:' ' !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart/ds=2008-04-08/hr=12 [a]' ' Path -> Partition:' ' !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart/ds=2008-04-08/hr=12 ' ' Partition' ' base file name: hr=12' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' partition values:' ' ds 2008-04-08' ' hr 12' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart/ds=2008-04-08/hr=12' ' name rand_partitionpruner3.srcpart' ' numFiles 1' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 5812' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' ' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' properties:' ' bucket_count -1' ' columns key,value' ' columns.types string:string' ' file.inputformat org.apache.hadoop.mapred.TextInputFormat' ' file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' ' location !!{hive.metastore.warehouse.dir}!!/rand_partitionpruner3.db/srcpart' ' name rand_partitionpruner3.srcpart' ' numFiles 4' ' numPartitions 4' ' numRows 0' ' partition_columns ds/hr' ' rawDataSize 0' ' serialization.ddl struct srcpart { string key, string value}' ' serialization.format 1' ' serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' totalSize 23248' ' transient_lastDdlTime !!UNIXTIME!!' ' serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' ' name: rand_partitionpruner3.srcpart' ' name: rand_partitionpruner3.srcpart' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 109 rows selected >>> select a.* from srcpart a where a.ds = '2008-04-08' and not(key > 50 or key < 10) and a.hr like '%2'; 'key','value','ds','hr' '27','val_27','2008-04-08','12' '37','val_37','2008-04-08','12' '15','val_15','2008-04-08','12' '17','val_17','2008-04-08','12' '20','val_20','2008-04-08','12' '47','val_47','2008-04-08','12' '35','val_35','2008-04-08','12' '12','val_12','2008-04-08','12' '24','val_24','2008-04-08','12' '42','val_42','2008-04-08','12' '26','val_26','2008-04-08','12' '43','val_43','2008-04-08','12' '15','val_15','2008-04-08','12' '19','val_19','2008-04-08','12' '10','val_10','2008-04-08','12' '35','val_35','2008-04-08','12' '11','val_11','2008-04-08','12' '34','val_34','2008-04-08','12' '42','val_42','2008-04-08','12' '41','val_41','2008-04-08','12' '30','val_30','2008-04-08','12' '33','val_33','2008-04-08','12' '35','val_35','2008-04-08','12' '44','val_44','2008-04-08','12' '12','val_12','2008-04-08','12' '24','val_24','2008-04-08','12' '26','val_26','2008-04-08','12' '18','val_18','2008-04-08','12' '18','val_18','2008-04-08','12' '28','val_28','2008-04-08','12' '37','val_37','2008-04-08','12' 31 rows selected >>> !record