PREHOOK: query: DROP TABLE rcfile_merge3a PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE rcfile_merge3a POSTHOOK: type: DROPTABLE PREHOOK: query: DROP TABLE rcfile_merge3b PREHOOK: type: DROPTABLE POSTHOOK: query: DROP TABLE rcfile_merge3b POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE rcfile_merge3a (key int, value string) PARTITIONED BY (ds string) STORED AS TEXTFILE PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE rcfile_merge3a (key int, value string) PARTITIONED BY (ds string) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@rcfile_merge3a PREHOOK: query: CREATE TABLE rcfile_merge3b (key int, value string) STORED AS RCFILE PREHOOK: type: CREATETABLE POSTHOOK: query: CREATE TABLE rcfile_merge3b (key int, value string) STORED AS RCFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@rcfile_merge3b PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3a PARTITION (ds='1') SELECT * FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@rcfile_merge3a@ds=1 POSTHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3a PARTITION (ds='1') SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@rcfile_merge3a@ds=1 POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3a PARTITION (ds='2') SELECT * FROM src PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@rcfile_merge3a@ds=2 POSTHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3a PARTITION (ds='2') SELECT * FROM src POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@rcfile_merge3a@ds=2 POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a POSTHOOK: type: QUERY POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] ABSTRACT SYNTAX TREE: (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME rcfile_merge3a))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB (TOK_TABNAME rcfile_merge3b))) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL key)) (TOK_SELEXPR (TOK_TABLE_OR_COL value))))) STAGE DEPENDENCIES: Stage-1 is a root stage Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 Stage-2 depends on stages: Stage-0 Stage-3 STAGE PLANS: Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: rcfile_merge3a TableScan alias: rcfile_merge3a Select Operator expressions: expr: key type: int expr: value type: string outputColumnNames: _col0, _col1 File Output Operator compressed: false GlobalTableId: 1 table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge3b Stage: Stage-5 Conditional Operator Stage: Stage-4 Move Operator files: hdfs directory: true destination: pfile:/data/users/franklin/hive-block-merge/build/ql/scratchdir/hive_2011-06-10_11-35-56_180_2101176296464995430/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge3b Stage: Stage-2 Stats-Aggr Operator Stage: Stage-3 Block level merge PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a PREHOOK: type: QUERY PREHOOK: Input: default@rcfile_merge3a@ds=1 PREHOOK: Input: default@rcfile_merge3a@ds=2 PREHOOK: Output: default@rcfile_merge3b POSTHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_merge3a@ds=1 POSTHOOK: Input: default@rcfile_merge3a@ds=2 POSTHOOK: Output: default@rcfile_merge3b POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3b.key SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: rcfile_merge3b.value SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) FROM rcfile_merge3a ) t PREHOOK: type: QUERY PREHOOK: Input: default@rcfile_merge3a@ds=1 PREHOOK: Input: default@rcfile_merge3a@ds=2 PREHOOK: Output: file:/tmp/franklin/hive_2011-06-10_11-36-00_682_3122909838685909929/-mr-10000 POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) FROM rcfile_merge3a ) t POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_merge3a@ds=1 POSTHOOK: Input: default@rcfile_merge3a@ds=2 POSTHOOK: Output: file:/tmp/franklin/hive_2011-06-10_11-36-00_682_3122909838685909929/-mr-10000 POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3b.key SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: rcfile_merge3b.value SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:value, type:string, comment:null), ] 14412220296 PREHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) FROM rcfile_merge3b ) t PREHOOK: type: QUERY PREHOOK: Input: default@rcfile_merge3b PREHOOK: Output: file:/tmp/franklin/hive_2011-06-10_11-36-04_156_4816517057321682165/-mr-10000 POSTHOOK: query: SELECT SUM(HASH(c)) FROM ( SELECT TRANSFORM(key, value) USING 'tr \t _' AS (c) FROM rcfile_merge3b ) t POSTHOOK: type: QUERY POSTHOOK: Input: default@rcfile_merge3b POSTHOOK: Output: file:/tmp/franklin/hive_2011-06-10_11-36-04_156_4816517057321682165/-mr-10000 POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3b.key SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: rcfile_merge3b.value SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:value, type:string, comment:null), ] 14412220296 PREHOOK: query: DROP TABLE rcfile_merge3a PREHOOK: type: DROPTABLE PREHOOK: Input: default@rcfile_merge3a PREHOOK: Output: default@rcfile_merge3a POSTHOOK: query: DROP TABLE rcfile_merge3a POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@rcfile_merge3a POSTHOOK: Output: default@rcfile_merge3a POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3b.key SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: rcfile_merge3b.value SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: DROP TABLE rcfile_merge3b PREHOOK: type: DROPTABLE PREHOOK: Input: default@rcfile_merge3b PREHOOK: Output: default@rcfile_merge3b POSTHOOK: query: DROP TABLE rcfile_merge3b POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@rcfile_merge3b POSTHOOK: Output: default@rcfile_merge3b POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3a PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: rcfile_merge3b.key SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: rcfile_merge3b.value SIMPLE [(rcfile_merge3a)rcfile_merge3a.FieldSchema(name:value, type:string, comment:null), ]