Saving all output to "!!{outputDirectory}!!/udtf_json_tuple.q.raw". Enter "record" with no arguments to stop it. >>> !run !!{qFileDirectory}!!/udtf_json_tuple.q >>> create table json_t (key string, jstring string); No rows affected >>> >>> insert overwrite table json_t select * from ( select '1', '{"f1": "value1", "f2": "value2", "f3": 3, "f5": 5.23}' from src limit 1 union all select '2', '{"f1": "value12", "f3": "value3", "f2": 2, "f4": 4.01}' from src limit 1 union all select '3', '{"f1": "value13", "f4": "value44", "f3": "value33", "f2": 2, "f5": 5.01}' from src limit 1 union all select '4', cast(null as string) from src limit 1 union all select '5', '{"f1": "", "f5": null}' from src limit 1 union all select '6', '[invalid JSON string]' from src limit 1 ) s; '_c0','_c1' No rows selected >>> >>> explain select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 order by a.key; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME json_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (TOK_ALLCOLREF (TOK_TABNAME b)))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' a ' ' TableScan' ' alias: a' ' Lateral View Forward' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Lateral View Join Operator' ' outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col2' ' type: string' ' expr: _col3' ' type: string' ' expr: _col4' ' type: string' ' expr: _col5' ' type: string' ' expr: _col6' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: string' ' expr: _col3' ' type: string' ' expr: _col4' ' type: string' ' expr: _col5' ' type: string' ' Select Operator' ' expressions:' ' expr: jstring' ' type: string' ' expr: 'f1'' ' type: string' ' expr: 'f2'' ' type: string' ' expr: 'f3'' ' type: string' ' expr: 'f4'' ' type: string' ' expr: 'f5'' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5' ' UDTF Operator' ' function name: json_tuple' ' Lateral View Join Operator' ' outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col2' ' type: string' ' expr: _col3' ' type: string' ' expr: _col4' ' type: string' ' expr: _col5' ' type: string' ' expr: _col6' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: string' ' expr: _col3' ' type: string' ' expr: _col4' ' type: string' ' expr: _col5' ' type: string' ' Reduce Operator Tree:' ' Extract' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 123 rows selected >>> >>> select a.key, b.* from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 order by a.key; 'key','f1','f2','f3','f4','f5' '1','value1','value2','3','','5.23' '2','value12','2','value3','4.01','' '3','value13','2','value33','value44','5.01' '4','','','','','' '5','','','','','' '6','','','','','' 6 rows selected >>> >>> explain select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a order by f1, f2, f3; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME json_t) a)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f1)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f2)) (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f3)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' a ' ' TableScan' ' alias: a' ' Select Operator' ' expressions:' ' expr: jstring' ' type: string' ' expr: 'f1'' ' type: string' ' expr: 'f2'' ' type: string' ' expr: 'f3'' ' type: string' ' expr: 'f4'' ' type: string' ' expr: 'f5'' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5' ' UDTF Operator' ' function name: json_tuple' ' Reduce Output Operator' ' key expressions:' ' expr: c0' ' type: string' ' expr: c1' ' type: string' ' expr: c2' ' type: string' ' sort order: +++' ' tag: -1' ' value expressions:' ' expr: c0' ' type: string' ' expr: c1' ' type: string' ' expr: c2' ' type: string' ' expr: c3' ' type: string' ' expr: c4' ' type: string' ' Reduce Operator Tree:' ' Extract' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 66 rows selected >>> >>> select json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') as (f1, f2, f3, f4, f5) from json_t a order by f1, f2, f3; 'f1','f2','f3','f4','f5' '','','','','' '','','','','' '','','','','' 'value1','value2','3','','5.23' 'value12','2','value3','4.01','' 'value13','2','value33','value44','5.01' 6 rows selected >>> >>> explain select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 order by a.key; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME json_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f2)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) f5))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' a ' ' TableScan' ' alias: a' ' Lateral View Forward' ' Select Operator' ' expressions:' ' expr: key' ' type: string' ' outputColumnNames: key' ' Lateral View Join Operator' ' outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col3' ' type: string' ' expr: _col6' ' type: string' ' outputColumnNames: _col0, _col1, _col2' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: string' ' Select Operator' ' expressions:' ' expr: jstring' ' type: string' ' expr: 'f1'' ' type: string' ' expr: 'f2'' ' type: string' ' expr: 'f3'' ' type: string' ' expr: 'f4'' ' type: string' ' expr: 'f5'' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5' ' UDTF Operator' ' function name: json_tuple' ' Lateral View Join Operator' ' outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col3' ' type: string' ' expr: _col6' ' type: string' ' outputColumnNames: _col0, _col1, _col2' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: string' ' expr: _col2' ' type: string' ' Reduce Operator Tree:' ' Extract' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' '' 99 rows selected >>> >>> select a.key, b.f2, b.f5 from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 order by a.key; 'key','f2','f5' '1','value2','5.23' '2','2','' '3','2','5.01' '4','','' '5','','' '6','','' 6 rows selected >>> >>> explain select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2; 'Explain' 'ABSTRACT SYNTAX TREE:' ' (TOK_QUERY (TOK_FROM (TOK_LATERAL_VIEW (TOK_SELECT (TOK_SELEXPR (TOK_FUNCTION json_tuple (. (TOK_TABLE_OR_COL a) jstring) 'f1' 'f2' 'f3' 'f4' 'f5') f1 f2 f3 f4 f5 (TOK_TABALIAS b))) (TOK_TABREF (TOK_TABNAME json_t) a))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL f2)) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_WHERE (TOK_FUNCTION TOK_ISNOTNULL (TOK_TABLE_OR_COL f1))) (TOK_GROUPBY (TOK_TABLE_OR_COL f2)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (TOK_TABLE_OR_COL f2)))))' '' 'STAGE DEPENDENCIES:' ' Stage-1 is a root stage' ' Stage-2 depends on stages: Stage-1' ' Stage-0 is a root stage' '' 'STAGE PLANS:' ' Stage: Stage-1' ' Map Reduce' ' Alias -> Map Operator Tree:' ' a ' ' TableScan' ' alias: a' ' Lateral View Forward' ' Select Operator' ' Lateral View Join Operator' ' outputColumnNames: _col2, _col3, _col4, _col5, _col6' ' Filter Operator' ' predicate:' ' expr: _col2 is not null' ' type: boolean' ' Select Operator' ' expressions:' ' expr: _col3' ' type: string' ' outputColumnNames: _col3' ' Group By Operator' ' aggregations:' ' expr: count()' ' bucketGroup: false' ' keys:' ' expr: _col3' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Select Operator' ' expressions:' ' expr: jstring' ' type: string' ' expr: 'f1'' ' type: string' ' expr: 'f2'' ' type: string' ' expr: 'f3'' ' type: string' ' expr: 'f4'' ' type: string' ' expr: 'f5'' ' type: string' ' outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5' ' UDTF Operator' ' function name: json_tuple' ' Lateral View Join Operator' ' outputColumnNames: _col2, _col3, _col4, _col5, _col6' ' Filter Operator' ' predicate:' ' expr: _col2 is not null' ' type: boolean' ' Select Operator' ' expressions:' ' expr: _col3' ' type: string' ' outputColumnNames: _col3' ' Group By Operator' ' aggregations:' ' expr: count()' ' bucketGroup: false' ' keys:' ' expr: _col3' ' type: string' ' mode: hash' ' outputColumnNames: _col0, _col1' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' Map-reduce partition columns:' ' expr: _col0' ' type: string' ' tag: -1' ' value expressions:' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Group By Operator' ' aggregations:' ' expr: count(VALUE._col0)' ' bucketGroup: false' ' keys:' ' expr: KEY._col0' ' type: string' ' mode: mergepartial' ' outputColumnNames: _col0, _col1' ' Select Operator' ' expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' outputColumnNames: _col0, _col1' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.SequenceFileInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat' '' ' Stage: Stage-2' ' Map Reduce' ' Alias -> Map Operator Tree:' ' file:!!{hive.exec.scratchdir}!! ' ' Reduce Output Operator' ' key expressions:' ' expr: _col0' ' type: string' ' sort order: +' ' tag: -1' ' value expressions:' ' expr: _col0' ' type: string' ' expr: _col1' ' type: bigint' ' Reduce Operator Tree:' ' Extract' ' File Output Operator' ' compressed: false' ' GlobalTableId: 0' ' table:' ' input format: org.apache.hadoop.mapred.TextInputFormat' ' output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' '' ' Stage: Stage-0' ' Fetch Operator' ' limit: -1' '' 150 rows selected >>> >>> select f2, count(*) from json_t a lateral view json_tuple(a.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') b as f1, f2, f3, f4, f5 where f1 is not null group by f2 order by f2; 'f2','_c1' '','1' '2','2' 'value2','1' 3 rows selected >>> !record