#!/usr/bin/env perl ############################################################################ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################### # Nightly tests for pig. # # #PigSetup::setup(); #my $me = `whoami`; #chomp $me; $cfg = { 'driver' => 'Pig', 'nummachines' => 5, 'groups' => [ { 'name' => 'Describe_cmdline', 'floatpostprocess' => 0, 'delimiter' => ' ', 'tests' => [ #JIRA[PIG-372] #JIRA[PIG-374] #JIRA[PIG-384] { 'num' => 1, 'pig' => q\ A= load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa); describe A;\, 'expected_out_regex' => "A: {name: bytearray,age: bytearray,gpa: bytearray}", }, #JIRA[PIG-19], Commented out until fixed. # { # 'num' => 2, # 'pig' => q\ #A=load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name, age, gpa); #describe A;\, # 'expected_out_regex' => "A: {name: bytearray,age: bytearray,gpa: bytearray}", # }, #JIRA[PIG-373] { 'num' => 3, 'pig' => q\ A= load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray, age:int, gpa:double); describe A;\, 'expected_out_regex' => "A: {name: chararray,age: int,gpa: double}", }, #******************************************************** #QUESTION: S/B SQL VERIFIER for DUMP statement? #******************************************************** # #JIRA[PIG-373] # { # 'num' => 4, # 'pig' => q\ #A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray); #describe A; #A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as (name:chararray); #dump A;\, # # 'sql' => "select name, age, gpa from studenttab10k;", # }, #JIRA[PIG-373] #JIRA[PIG-405] { 'num' => 5, 'pig' => q\ A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); describe A;\, 'expected_out_regex' =>"A: {m: map\\[\\],x: bytearray,y: bytearray}", }, #JIRA[PIG-373] #JIRA[PIG-405] { 'num' => 6, 'pig' => q\ A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); B= foreach A generate m; describe A; describe B;\, #Expect # A : { m:map, x:bytearray, y:bytearray } # B : { m :map } 'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}", }, #JIRA[PIG-373] #JIRA[PIG-405] { 'num' => 7, 'pig' => q\ A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); describe A; B= foreach A generate m; describe B;\, #Expect # A : { m:map, x:bytearray, y:bytearray } # B : { m :map } 'expected_out_regex' => "A: {m: map\\[\\],x: bytearray,y: bytearray}\nB: {m: map\\[\\]}", }, #JIRA[PIG-373] #JIRA[PIG-405] { 'num' => 8, 'pig' => q\ A = load ':INPATH:/singlefile/studenttabcomplex10k' using PigStorage() as (m:map[],x,y); B= foreach A generate m; describe B; describe A;\, #Expect # A : { m:map, x:bytearray, y:bytearray } # B : { m :map } 'expected_out_regex' => "B: {m: map\\[\\]}\nA: {m: map\\[\\],x: bytearray,y: bytearray}", }, { 'num' => 14, 'pig' => q\A = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as (name:chararray, age:int, gpa:double, instate:boolean); describe A;\, 'expected_out_regex' => "A: {name: chararray,age: int,gpa: double,instate: boolean}", }, #JIRA[PIG-379] { 'num' => 9, 'pig' => q\ A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); describe A; B= foreach A generate name, age; describe B; C= filter B by age > 30; describe C; D= group C by name; describe D;\, # EXPECT # A: {name: chararray,age: integer,gpa: double} # B: {name: chararray,age: integer} # C: {name: chararray,age: integer} # D: {group: chararray C: {name: chararray,age: integer}} 'expected_out_regex' => "A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {\\(name: chararray,age: int\\)}}", }, { 'num' => 10, 'pig' => q\ A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); B= foreach A generate name, age; C= filter B by age > 30; D= group C by name; describe A; describe B; describe C; describe D;\, # EXPECT # A: {name: chararray,age: integer,gpa: double} # B: {name: chararray,age: integer} # C: {name: chararray,age: integer} # D: {group: chararray C: {name: chararray,age: integer}} 'expected_out_regex' => "A: {name: chararray,age: int,gpa: float}\nB: {name: chararray,age: int}\nC: {name: chararray,age: int}\nD: {group: chararray,C: {\\(name: chararray,age: int\\)}}\n", }, { 'num' => 11, 'pig' => q\ A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); B= foreach A generate name, age; C= filter B by age > 30; D= group C by name; describe D; describe C; describe B; describe A;\, # EXPECT # A: {name: chararray,age: integer,gpa: double} # B: {name: chararray,age: integer} # C: {name: chararray,age: integer} # D: {group: chararray C: {name: chararray,age: integer}} 'expected_out_regex' => "D: {group: chararray,C: {\\(name: chararray,age: int\\)}}\nC: {name: chararray,age: int}\nB: {name: chararray,age: int}\nA: {name: chararray,age: int,gpa: float}", }, { 'num' => 12, 'pig' => q\ A = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age:int, gpa:float); B= foreach A generate name, age; C= filter B by age > 30; D= group C by name; describe X; describe D; describe C; describe B; describe A;\, # EXPECT # A: {name: chararray,age: integer,gpa: double} # B: {name: chararray,age: integer} # C: {name: chararray,age: integer} # D: {group: chararray C: {name: chararray,age: integer}} 'expected_err_regex' => "No plan for X to describe" }, { 'num' => 13, 'pig' => q\ A = LOAD ':INPATH:/singlefile/studenttab10k' AS (name: chararray, age: int, gpa: float); B = LOAD 'voter_data' AS (name: chararray, age: int, registration: chararray, contributions: float); C = COGROUP A BY name, B BY name; D = FOREACH C GENERATE group, flatten((not IsEmpty(A) ? A : (bag{tuple(chararray, int, float)}){(null, null, null)})), flatten((not IsEmpty(B) ? B : (bag{tuple(chararray, int, chararray, float)}){(null,null,null, null)})); describe D;\, # EXPECT # D: {group: chararray,A::name: chararray,A::age: int,A::gpa: float,B::name: chararray,B::age: int,B::registration: chararray,B::contributions: float} 'expected_out_regex' => "D: {group: chararray,A::name: chararray,A::age: int,A::gpa: float,B::name: chararray,B::age: int,B::registration: chararray,B::contributions: float}" } ], }, { 'name' => 'Unicode_cmdline', 'floatpostprocess' => 0, 'delimiter' => ' ', 'tests' => [ { 'num' => 1, 'pig' => q\ A = load ':INPATH:/singlefile/unicode100' as (name:chararray); dump A;\, 'expected_out_regex' => "(bobĪ„)" }, ], }, { 'name' => 'Warning', 'floatpostprocess' => 0, 'execonly' => 'mapred', # Warnings use counters, which don't work in local mode 'delimiter' => ' ', 'tests' => [ { #Checking divide by zero warning 'num' => 1, 'pig' => q\a = load ':INPATH:/singlefile/studentnulltab10k' as (name:chararray, age:int, gpa:double); b = foreach a generate (int)((int)gpa/((int)gpa - 1)) as norm_gpa:int; c = foreach b generate (norm_gpa is null? 0 :norm_gpa); store c into ':OUTPATH:';\, 'expected_err_regex' => "Encountered Warning DIVIDE_BY_ZERO 2387 time.*", }, { #Checking field discarded warning 'num' => 2, 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age:int, gpa:double); b = foreach a generate (int)name; store b into ':OUTPATH:';\, 'expected_err_regex' => "Encountered Warning FIELD_DISCARDED_TYPE_CONVERSION_FAILED 10000 time.*", }, { #Checking type cast warnings 'num' => 3, 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name:chararray, age, gpa); b = foreach a generate age + 1, gpa + 0.1f; describe b;\, 'expected_err_regex' => "(Encountered Warning IMPLICIT_CAST_TO_INT 1 time.*\n.*Encountered Warning IMPLICIT_CAST_TO_FLOAT 1 time.*)|(Encountered Warning IMPLICIT_CAST_TO_FLOAT 1 time.*\n.*Encountered Warning IMPLICIT_CAST_TO_INT 1 time.*)", }, { #Checking udf warnings 'num' => 4, 'pig' => q\ register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studentnulltab10k' as (name, age: int, gpa: float); b = foreach a generate org.apache.pig.test.udf.evalfunc.TestWarningFunc(name, age, gpa); store b into ':OUTPATH:';\, 'expected_err_regex' => "(Encountered Warning UDF_WARNING_4 22 time.*\n.*Encountered Warning UDF_WARNING_3 10989 time.*)|(Encountered Warning UDF_WARNING_3 10989 time.*\n.*Encountered Warning UDF_WARNING_4 22 time.*)", }, { #Checking non existent field warnings 'num' => 5, 'pig' => q\ register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studentnulltab10k'; b = foreach a generate $3; store b into ':OUTPATH:';\, 'expected_err_regex' => "Encountered Warning ACCESSING_NON_EXISTENT_FIELD 10000 time.*", }, ], }, { # 1 Test that a nested foreach gives instant feedback: after user issue the foreach statement in Grunt # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias # 3 Describe single Alias resulting from a nested Foreach # 4 Describe multiple Alias resulting from a nested Foreach # 5 Describe resulting from a nested Foreach that contains a positional parameter # 6 Describe for child Alias resulting from a nested Foreach where the child alias had multiple assignments The expected behavior is that the last assignment will destermine the result of the describe statement. # 7 Describe for an Alias resulting from a nested Foreach where the projection for the nested alias is empty # 8 Describe within a foreach statement # 9 Describe for alias with complex data types # 10 Describe that uses references an alias from an AS clause 'name' => 'NestedDescribe', 'tests' => [ { # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias # 3 Describe single Alias resulting from a nested Foreach 'num' => 1, 'pig' => q\ A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B = group A by name; C = foreach B { D = distinct A.age; generate COUNT(D), group;} describe C::D; \, ,'expected_out_regex' => "D: {age: bytearray}" },{ # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias # 7 Describe for an Alias resulting from a nested Foreach where the projection for the nested alias is empty 'num' => 2, 'pig' => q\ A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B = group A by name; C = foreach B { D = distinct A.age; E= filter D by age > 1000; generate COUNT(E), group;} describe C; \, ,'expected_out_regex' => "C: {long,group: bytearray}" },{ # 1 Test that a nested foreach gives instant feedback: after user issue the foreach statement in Grunt # 2 Test that a nested foreach gives instant feedback: as part of execution: when we run this foreach statement, we will dump the schema for the nested alias # 3 Describe single Alias resulting from a nested Foreach # 4 Describe multiple Alias resulting from a nested Foreach # 5 Describe resulting from a nested Foreach that contains a positional parameter 'num' => 3, 'pig' => q\ A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B = group A by name; C = foreach B { D = distinct A.$1; generate COUNT(D), group;} describe C::D; \, ,'expected_out_regex' => "D: {age: bytearray}" }, ] } ] } ;