PREHOOK: query: -- SORT_QUERY_RESULTS --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition create table dest_1 (key STRING, value STRING) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_1 POSTHOOK: query: -- SORT_QUERY_RESULTS --HIVE-2101 mapjoin sometimes gives wrong results if there is a filter in the on condition create table dest_1 (key STRING, value STRING) stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_1 PREHOOK: query: insert overwrite table dest_1 select * from src1 order by src1.value limit 8 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_1 POSTHOOK: query: insert overwrite table dest_1 select * from src1 order by src1.value limit 8 POSTHOOK: type: QUERY POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_1 POSTHOOK: Lineage: dest_1.key SIMPLE [(src1)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_1.value SIMPLE [(src1)src1.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: insert into table dest_1 select "333444","555666" from src1 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_1 POSTHOOK: query: insert into table dest_1 select "333444","555666" from src1 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src1 POSTHOOK: Output: default@dest_1 POSTHOOK: Lineage: dest_1.key SIMPLE [] POSTHOOK: Lineage: dest_1.value SIMPLE [] PREHOOK: query: create table dest_2 (key STRING, value STRING) stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@dest_2 POSTHOOK: query: create table dest_2 (key STRING, value STRING) stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest_2 PREHOOK: query: insert into table dest_2 select * from dest_1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_1 PREHOOK: Output: default@dest_2 POSTHOOK: query: insert into table dest_2 select * from dest_1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_1 POSTHOOK: Output: default@dest_2 POSTHOOK: Lineage: dest_2.key SIMPLE [(dest_1)dest_1.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest_2.value SIMPLE [(dest_1)dest_1.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: SELECT * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@dest_1 PREHOOK: Input: default@dest_2 PREHOOK: Input: default@src1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_1 POSTHOOK: Input: default@dest_2 POSTHOOK: Input: default@src1 #### A masked pattern was here #### val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 128 128 128 146 val_146 146 val_146 146 val_146 224 224 224 369 369 369 NULL NULL 333444 555666 333444 555666 PREHOOK: query: explain SELECT /*+ mapjoin(src1, src2) */ * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY POSTHOOK: query: explain SELECT /*+ mapjoin(src1, src2) */ * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage Stage-1 depends on stages: Stage-4 Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-4 Map Reduce Local Work Alias -> Map Local Tables: src1 Fetch Operator limit: -1 src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: src1 TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) src2 TableScan alias: src2 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 Inner Join 1 to 2 keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@dest_1 PREHOOK: Input: default@dest_2 PREHOOK: Input: default@src1 #### A masked pattern was here #### POSTHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_1 POSTHOOK: Input: default@dest_2 POSTHOOK: Input: default@src1 #### A masked pattern was here #### val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 128 128 128 146 val_146 146 val_146 146 val_146 224 224 224 369 369 369 NULL NULL 333444 555666 333444 555666 PREHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src1.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@dest_1 PREHOOK: Input: default@dest_2 PREHOOK: Input: default@src1 #### A masked pattern was here #### POSTHOOK: query: SELECT /*+ mapjoin(src1, src2) */ * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src1.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_1 POSTHOOK: Input: default@dest_2 POSTHOOK: Input: default@src1 #### A masked pattern was here #### val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 128 128 128 146 val_146 146 val_146 146 val_146 224 224 224 369 369 369 PREHOOK: query: SELECT * FROM src1 LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src1.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@dest_1 PREHOOK: Input: default@dest_2 PREHOOK: Input: default@src1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM src1 LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src1.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_1 POSTHOOK: Input: default@dest_2 POSTHOOK: Input: default@src1 #### A masked pattern was here #### val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 128 128 128 146 val_146 146 val_146 146 val_146 224 224 224 369 369 369 PREHOOK: query: SELECT * FROM src1 LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@dest_1 PREHOOK: Input: default@dest_2 PREHOOK: Input: default@src1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM src1 LEFT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_1 POSTHOOK: Input: default@dest_2 POSTHOOK: Input: default@src1 #### A masked pattern was here #### val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 128 128 128 146 val_146 146 val_146 146 val_146 224 224 224 369 369 369 PREHOOK: query: explain SELECT * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY POSTHOOK: query: explain SELECT * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-6 is a root stage Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: src1 Fetch Operator limit: -1 src2 Fetch Operator limit: -1 Alias -> Map Local Operator Tree: src1 TableScan alias: src1 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) src2 TableScan alias: src2 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE HashTable Sink Operator keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) Stage: Stage-2 Map Reduce Map Operator Tree: TableScan alias: src3 Statistics: Num rows: 9 Data size: 40 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Right Outer Join0 to 1 Inner Join 1 to 2 keys: 0 key (type: string) 1 key (type: string) 2 key (type: string) outputColumnNames: _col0, _col1, _col5, _col6, _col10, _col11 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col5 (type: string), _col6 (type: string), _col10 (type: string), _col11 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) sort order: ++++++ Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE Local Work: Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false Statistics: Num rows: 55 Data size: 420 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: SELECT * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@dest_1 PREHOOK: Input: default@dest_2 PREHOOK: Input: default@src1 #### A masked pattern was here #### POSTHOOK: query: SELECT * FROM src1 RIGHT OUTER JOIN dest_1 src2 ON (src1.key = src2.key) JOIN dest_2 src3 ON (src2.key = src3.key) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_1 POSTHOOK: Input: default@dest_2 POSTHOOK: Input: default@src1 #### A masked pattern was here #### val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_165 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_193 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_265 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_27 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_409 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 val_484 128 128 128 146 val_146 146 val_146 146 val_146 224 224 224 369 369 369 NULL NULL 333444 555666 333444 555666