PREHOOK: query: create table if not exists emp_staging ( lastname string, deptid int ) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: create table if not exists emp_staging ( lastname string, deptid int ) row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emp_staging PREHOOK: query: create table if not exists dept_staging ( deptid int, deptname string ) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: create table if not exists dept_staging ( deptid int, deptname string ) row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dept_staging PREHOOK: query: create table if not exists loc_staging ( state string, locid int, zip bigint, year int ) row format delimited fields terminated by '|' stored as textfile PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: create table if not exists loc_staging ( state string, locid int, zip bigint, year int ) row format delimited fields terminated by '|' stored as textfile POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@loc_staging PREHOOK: query: create table if not exists emp_orc like emp_staging PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: create table if not exists emp_orc like emp_staging POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@emp_orc PREHOOK: query: alter table emp_orc set fileformat orc PREHOOK: type: ALTERTABLE_FILEFORMAT PREHOOK: Input: default@emp_orc PREHOOK: Output: default@emp_orc POSTHOOK: query: alter table emp_orc set fileformat orc POSTHOOK: type: ALTERTABLE_FILEFORMAT POSTHOOK: Input: default@emp_orc POSTHOOK: Output: default@emp_orc PREHOOK: query: create table if not exists dept_orc like dept_staging PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: create table if not exists dept_orc like dept_staging POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dept_orc PREHOOK: query: alter table dept_orc set fileformat orc PREHOOK: type: ALTERTABLE_FILEFORMAT PREHOOK: Input: default@dept_orc PREHOOK: Output: default@dept_orc POSTHOOK: query: alter table dept_orc set fileformat orc POSTHOOK: type: ALTERTABLE_FILEFORMAT POSTHOOK: Input: default@dept_orc POSTHOOK: Output: default@dept_orc PREHOOK: query: create table loc_orc like loc_staging PREHOOK: type: CREATETABLE PREHOOK: Output: database:default POSTHOOK: query: create table loc_orc like loc_staging POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@loc_orc PREHOOK: query: alter table loc_orc set fileformat orc PREHOOK: type: ALTERTABLE_FILEFORMAT PREHOOK: Input: default@loc_orc PREHOOK: Output: default@loc_orc POSTHOOK: query: alter table loc_orc set fileformat orc POSTHOOK: type: ALTERTABLE_FILEFORMAT POSTHOOK: Input: default@loc_orc POSTHOOK: Output: default@loc_orc PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@emp_staging POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@emp_staging PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@dept_staging POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@dept_staging PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging PREHOOK: type: LOAD #### A masked pattern was here #### PREHOOK: Output: default@loc_staging POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@loc_staging PREHOOK: query: insert overwrite table emp_orc select * from emp_staging PREHOOK: type: QUERY PREHOOK: Input: default@emp_staging PREHOOK: Output: default@emp_orc POSTHOOK: query: insert overwrite table emp_orc select * from emp_staging POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_staging POSTHOOK: Output: default@emp_orc POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] PREHOOK: query: insert overwrite table dept_orc select * from dept_staging PREHOOK: type: QUERY PREHOOK: Input: default@dept_staging PREHOOK: Output: default@dept_orc POSTHOOK: query: insert overwrite table dept_orc select * from dept_staging POSTHOOK: type: QUERY POSTHOOK: Input: default@dept_staging POSTHOOK: Output: default@dept_orc POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] PREHOOK: query: insert overwrite table loc_orc select * from loc_staging PREHOOK: type: QUERY PREHOOK: Input: default@loc_staging PREHOOK: Output: default@loc_orc POSTHOOK: query: insert overwrite table loc_orc select * from loc_staging POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_staging POSTHOOK: Output: default@loc_orc POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] PREHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@emp_orc #### A masked pattern was here #### POSTHOOK: query: analyze table emp_orc compute statistics for columns lastname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@emp_orc #### A masked pattern was here #### POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] PREHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid PREHOOK: type: QUERY PREHOOK: Input: default@dept_orc #### A masked pattern was here #### POSTHOOK: query: analyze table dept_orc compute statistics for columns deptname,deptid POSTHOOK: type: QUERY POSTHOOK: Input: default@dept_orc #### A masked pattern was here #### POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] PREHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year PREHOOK: type: QUERY PREHOOK: Input: default@loc_orc #### A masked pattern was here #### POSTHOOK: query: analyze table loc_orc compute statistics for columns state,locid,zip,year POSTHOOK: type: QUERY POSTHOOK: Input: default@loc_orc #### A masked pattern was here #### POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] PREHOOK: query: -- number of rows -- emp_orc - 6 -- dept_orc - 4 -- loc_orc - 8 -- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) -- emp_orc.deptid - 3 -- emp_orc.lastname - 7 -- dept_orc.deptid - 6 -- dept_orc.deptname - 5 -- loc_orc.locid - 6 -- loc_orc.state - 7 -- Expected output rows: 4 -- Reason: #rows = (6*4)/max(3,6) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) PREHOOK: type: QUERY POSTHOOK: query: -- number of rows -- emp_orc - 6 -- dept_orc - 4 -- loc_orc - 8 -- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows) -- emp_orc.deptid - 3 -- emp_orc.lastname - 7 -- dept_orc.deptid - 6 -- dept_orc.deptname - 5 -- loc_orc.locid - 6 -- loc_orc.state - 7 -- Expected output rows: 4 -- Reason: #rows = (6*4)/max(3,6) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) POSTHOOK: type: QUERY POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_TABREF TOK_TABNAME emp_orc e TOK_TABREF TOK_TABNAME dept_orc d = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL d deptid TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: d Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: deptid (type: int), deptname (type: string) TableScan alias: e Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: lastname (type: string), deptid (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: dept_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dept_orc name: default.dept_orc #### A masked pattern was here #### Partition base file name: emp_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.emp_orc name: default.emp_orc Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col4, _col5 Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 4 Data size: 760 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3 columns.types string:int:int:string escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- 3 way join -- Expected output rows: 4 -- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) PREHOOK: type: QUERY POSTHOOK: query: -- 3 way join -- Expected output rows: 4 -- Reason: #rows = (6*4*6)/max(3,6)*max(6,3) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid) POSTHOOK: type: QUERY POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_JOIN TOK_TABREF TOK_TABNAME emp_orc e TOK_TABREF TOK_TABNAME dept_orc d = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL d deptid TOK_TABREF TOK_TABNAME emp_orc e1 = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL e1 deptid TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: d Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: deptid (type: int), deptname (type: string) TableScan alias: e1 Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 2 value expressions: lastname (type: string), deptid (type: int) TableScan alias: e Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: lastname (type: string), deptid (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: dept_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dept_orc name: default.dept_orc #### A masked pattern was here #### Partition base file name: emp_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.emp_orc name: default.emp_orc Truncated Path -> Alias: /dept_orc [d] /emp_orc [e1, e] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9 Statistics: Num rows: 4 Data size: 1136 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 4 Data size: 1136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 4 Data size: 1136 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3,_col4,_col5 columns.types string:int:int:string:string:int escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- Expected output rows: 5 -- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) PREHOOK: type: QUERY POSTHOOK: query: -- Expected output rows: 5 -- Reason: #rows = (6*4*8)/max(3,6)*max(6,6) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid) POSTHOOK: type: QUERY POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_JOIN TOK_TABREF TOK_TABNAME emp_orc e TOK_TABREF TOK_TABNAME dept_orc d = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL d deptid TOK_TABREF TOK_TABNAME loc_orc l = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL l locid TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: d Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: deptid (type: int), deptname (type: string) TableScan alias: e Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int) sort order: + Map-reduce partition columns: deptid (type: int) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: lastname (type: string), deptid (type: int) TableScan alias: l Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: locid (type: int) sort order: + Map-reduce partition columns: locid (type: int) Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE tag: 2 value expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: dept_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dept_orc name: default.dept_orc #### A masked pattern was here #### Partition base file name: emp_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.emp_orc name: default.emp_orc #### A masked pattern was here #### Partition base file name: loc_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns state,locid,zip,year columns.comments columns.types string:int:bigint:int field.delim | #### A masked pattern was here #### name default.loc_orc numFiles 1 numRows 8 rawDataSize 796 serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 489 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns state,locid,zip,year columns.comments columns.types string:int:bigint:int field.delim | #### A masked pattern was here #### name default.loc_orc numFiles 1 numRows 8 rawDataSize 796 serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 489 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.loc_orc name: default.loc_orc Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] /loc_orc [l] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 Statistics: Num rows: 5 Data size: 1449 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 5 Data size: 1449 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 5 Data size: 1449 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 columns.types string:int:int:string:string:int:bigint:int escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- join keys of different types -- Expected output rows: 4 -- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) PREHOOK: type: QUERY POSTHOOK: query: -- join keys of different types -- Expected output rows: 4 -- Reason: #rows = (6*4*8)/max(3,6)*max(6,7) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state) POSTHOOK: type: QUERY POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_JOIN TOK_TABREF TOK_TABNAME emp_orc e TOK_TABREF TOK_TABNAME dept_orc d = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL d deptid TOK_TABREF TOK_TABNAME loc_orc l = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL l state TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: d Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: UDFToDouble(deptid) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(deptid) (type: double) Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: deptid (type: int), deptname (type: string) TableScan alias: e Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: UDFToDouble(deptid) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(deptid) (type: double) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: lastname (type: string), deptid (type: int) TableScan alias: l Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: UDFToDouble(state) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(state) (type: double) Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE tag: 2 value expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: dept_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dept_orc name: default.dept_orc #### A masked pattern was here #### Partition base file name: emp_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.emp_orc name: default.emp_orc #### A masked pattern was here #### Partition base file name: loc_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns state,locid,zip,year columns.comments columns.types string:int:bigint:int field.delim | #### A masked pattern was here #### name default.loc_orc numFiles 1 numRows 8 rawDataSize 796 serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 489 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns state,locid,zip,year columns.comments columns.types string:int:bigint:int field.delim | #### A masked pattern was here #### name default.loc_orc numFiles 1 numRows 8 rawDataSize 796 serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 489 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.loc_orc name: default.loc_orc Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] /loc_orc [l] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 Statistics: Num rows: 4 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 4 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 4 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 columns.types string:int:int:string:string:int:bigint:int escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- multi-attribute join -- Expected output rows: 0 -- Reason: #rows = (6*4)/max(3,6)*max(7,5) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) PREHOOK: type: QUERY POSTHOOK: query: -- multi-attribute join -- Expected output rows: 0 -- Reason: #rows = (6*4)/max(3,6)*max(7,5) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) POSTHOOK: type: QUERY POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_TABREF TOK_TABNAME emp_orc e TOK_TABREF TOK_TABNAME dept_orc d and = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL d deptid = . TOK_TABLE_OR_COL e lastname . TOK_TABLE_OR_COL d deptname TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: d Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int), deptname (type: string) sort order: ++ Map-reduce partition columns: deptid (type: int), deptname (type: string) Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: deptid (type: int), deptname (type: string) TableScan alias: e Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int), lastname (type: string) sort order: ++ Map-reduce partition columns: deptid (type: int), lastname (type: string) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: lastname (type: string), deptid (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: dept_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dept_orc name: default.dept_orc #### A masked pattern was here #### Partition base file name: emp_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.emp_orc name: default.emp_orc Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} outputColumnNames: _col0, _col1, _col4, _col5 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3 columns.types string:int:int:string escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1 PREHOOK: query: -- 3 way and multi-attribute join -- Expected output rows: 0 -- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) PREHOOK: type: QUERY POSTHOOK: query: -- 3 way and multi-attribute join -- Expected output rows: 0 -- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7) explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state) POSTHOOK: type: QUERY POSTHOOK: Lineage: dept_orc.deptid SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: dept_orc.deptname SIMPLE [(dept_staging)dept_staging.FieldSchema(name:deptname, type:string, comment:null), ] POSTHOOK: Lineage: emp_orc.deptid SIMPLE [(emp_staging)emp_staging.FieldSchema(name:deptid, type:int, comment:null), ] POSTHOOK: Lineage: emp_orc.lastname SIMPLE [(emp_staging)emp_staging.FieldSchema(name:lastname, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.locid SIMPLE [(loc_staging)loc_staging.FieldSchema(name:locid, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.state SIMPLE [(loc_staging)loc_staging.FieldSchema(name:state, type:string, comment:null), ] POSTHOOK: Lineage: loc_orc.year SIMPLE [(loc_staging)loc_staging.FieldSchema(name:year, type:int, comment:null), ] POSTHOOK: Lineage: loc_orc.zip SIMPLE [(loc_staging)loc_staging.FieldSchema(name:zip, type:bigint, comment:null), ] ABSTRACT SYNTAX TREE: TOK_QUERY TOK_FROM TOK_JOIN TOK_JOIN TOK_TABREF TOK_TABNAME emp_orc e TOK_TABREF TOK_TABNAME dept_orc d and = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL d deptid = . TOK_TABLE_OR_COL e lastname . TOK_TABLE_OR_COL d deptname TOK_TABREF TOK_TABNAME loc_orc l and = . TOK_TABLE_OR_COL e deptid . TOK_TABLE_OR_COL l locid = . TOK_TABLE_OR_COL e lastname . TOK_TABLE_OR_COL l state TOK_INSERT TOK_DESTINATION TOK_DIR TOK_TMP_FILE TOK_SELECT TOK_SELEXPR TOK_ALLCOLREF STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 is a root stage STAGE PLANS: Stage: Stage-1 Map Reduce Map Operator Tree: TableScan alias: d Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int), deptname (type: string) sort order: ++ Map-reduce partition columns: deptid (type: int), deptname (type: string) Statistics: Num rows: 4 Data size: 384 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: deptid (type: int), deptname (type: string) TableScan alias: e Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: deptid (type: int), lastname (type: string) sort order: ++ Map-reduce partition columns: deptid (type: int), lastname (type: string) Statistics: Num rows: 6 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: lastname (type: string), deptid (type: int) TableScan alias: l Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: locid (type: int), state (type: string) sort order: ++ Map-reduce partition columns: locid (type: int), state (type: string) Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: COMPLETE tag: 2 value expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition base file name: dept_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns deptid,deptname columns.comments columns.types int:string field.delim | #### A masked pattern was here #### name default.dept_orc numFiles 1 numRows 4 rawDataSize 384 serialization.ddl struct dept_orc { i32 deptid, string deptname} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 329 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dept_orc name: default.dept_orc #### A masked pattern was here #### Partition base file name: emp_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns lastname,deptid columns.comments columns.types string:int field.delim | #### A masked pattern was here #### name default.emp_orc numFiles 1 numRows 6 rawDataSize 560 serialization.ddl struct emp_orc { string lastname, i32 deptid} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 349 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.emp_orc name: default.emp_orc #### A masked pattern was here #### Partition base file name: loc_orc input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns state,locid,zip,year columns.comments columns.types string:int:bigint:int field.delim | #### A masked pattern was here #### name default.loc_orc numFiles 1 numRows 8 rawDataSize 796 serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 489 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: COLUMN_STATS_ACCURATE true bucket_count -1 columns state,locid,zip,year columns.comments columns.types string:int:bigint:int field.delim | #### A masked pattern was here #### name default.loc_orc numFiles 1 numRows 8 rawDataSize 796 serialization.ddl struct loc_orc { string state, i32 locid, i64 zip, i32 year} serialization.format | serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde totalSize 489 #### A masked pattern was here #### serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.loc_orc name: default.loc_orc Truncated Path -> Alias: /dept_orc [d] /emp_orc [e] /loc_orc [l] Needs Tagging: true Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 Inner Join 0 to 2 condition expressions: 0 {VALUE._col0} {VALUE._col1} 1 {VALUE._col0} {VALUE._col1} 2 {VALUE._col0} {VALUE._col1} {VALUE._col2} {VALUE._col3} outputColumnNames: _col0, _col1, _col4, _col5, _col8, _col9, _col10, _col11 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: int), _col4 (type: int), _col5 (type: string), _col8 (type: string), _col9 (type: int), _col10 (type: bigint), _col11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 columns.types string:int:int:string:string:int:bigint:int escape.delim \ hive.serialization.extend.nesting.levels true serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe TotalFiles: 1 GatherStats: false MultiFileSpray: false Stage: Stage-0 Fetch Operator limit: -1