#!/usr/bin/env perl ############################################################################ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################### # Nightly tests for pig. # # #PigSetup::setup(); $cfg = { 'driver' => 'Pig', 'nummachines' => 5, 'groups' => [ { 'name' => 'HadoopError', 'tests' => [ { 'num' => 1, 'pig' => q\ a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = group a by name; c = foreach b generate group, COUNT($5); dump c;\, 'expected_err_regex' => "Out of bound access. Trying to access non-existent column: 5.", }, ] }, { 'name' => 'NoSuchFile', 'tests' => [ { 'num' => 1, 'pig' => q\a = load '/user/gates/nosuchfile'; dump a;\, 'expected_err_regex' => "ERROR 2118: Input path does not exist", }, { 'num' => 2, 'pig' => q\register bla.jar\, 'expected_err_regex' => "ERROR 101: file 'bla.jar' does not exist.", } ] }, { 'name' => 'BadFunc', 'tests' => [ { # PIG-431 'num' => 1, 'pig' => "a = load ':INPATH:/singlefile/studenttab10k' using NoSuchFunction(':');", 'expected_err_regex' => "Could not resolve NoSuchFunction using imports", }, ] }, { 'name' => 'FileExists', 'tests' => [ { 'num' => 1, 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k'; store a into ':INPATH:/singlefile/fileexists';\, 'expected_err_regex' => ".* already exists", }, ] }, { 'name' => 'NegForeach', 'tests' => [ { 'num' => 1, 'ignore' => 1, # it is valid now # testing that nested foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = group a by name; c = foreach b { ba = filter a by age < '25'; bb = foreach ba generate gpa; generate group, flatten(bb);}\, 'expected_err_regex' => " Encountered \" \"foreach\" \"foreach \"\" at line.*\nWas expecting one of:", }, { 'num' => 2, # testing that group within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = group a by name; c = foreach b { ba = filter a by age < '25'; g = group ba by gpa; x = foreach g generate group, flatten(g); generate x;}\, 'expected_err_regex' => "Syntax error, unexpected symbol at or near 'ba'", }, { 'num' => 3, # testing that cogroup within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = cogroup a by (name, age), b by (name, age) ; d = foreach c { e = cogroup a by (name), b by (name); x= foreach a generate flatten(a), flatten(b); generate x;}\, 'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN", }, { 'num' => 4, # testing that join within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = cogroup a by (name), b by (name) ; d = foreach c { e = join a by (age), b by (age); generate e;}\, 'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN", }, { 'num' => 5, 'ignore' => 1, # it is valid now # testing that cross within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = cogroup a by (name), b by (name) ; d = foreach c { e = cross a,b; generate e;}\, 'expected_err_regex' => "Encountered \" \"cross\" \"cross \"\" at .*\nWas expecting one of", }, { 'num' => 6, # testing that union within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = cogroup a by (name), b by (name) ; d = foreach c { e = union a,b; generate e;}\, 'expected_err_regex' => "mismatched input 'a' expecting LEFT_PAREN", }, { 'num' => 7, # testing that split within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = group a by name; c = foreach b { split a into ba if age < '25', bb if age > '40'; generate group, COUNT(ba), COUNT(bb);}\, 'expected_err_regex' => "Syntax error, unexpected symbol at or near 'split'", }, { 'num' => 8, # testing that load within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = group a by name; c = foreach b { d = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); generate *;}\, 'expected_err_regex' => "mismatched input ''.*/singlefile/votertab10k'' expecting LEFT_PAREN", }, { 'num' => 9, # testing that store within foreach is not allowed 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = group a by name; c = foreach b { store a into ':OUTPATH:'; generate *;}\, 'expected_err_regex' => "Syntax error, unexpected symbol at or near 'store'", }, ] }, { # test not allowed operations 'name' => 'NotAllowed', 'tests' => [ { # currently (as of 09/18/2008), the following are not allowed #a = b; 'num' => 1, 'ignore' => 1, # different error message for different version of hadoop 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = a;\, 'expected_err_regex' => "mismatched input ';' expecting LEFT_PAREN", }, { # currently (as of 09/18/2008), the following are not allowed #a = b as (x,y,z); 'num' => 2, 'ignore' => 1, # different error message for different version of hadoop 'pig' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = a as (x,y,z);\, 'expected_err_regex' => "mismatched input 'as' expecting LEFT_PAREN", }, ] }, { # test with udf which throws exception 'name' => 'UdfException', 'tests' => [ { 'num' => 1, 'pig' => q\ register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); c = cogroup a by name, b by name; d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf(a,b)); store d into ':OUTPATH:';\, 'expected_err_regex' => "Out of bounds access", 'expected_err_regex23' => "Unable to recreate exception", }, { 'num' => 2, 'pig' => q\ register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); c = cogroup a by name, b by name; d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf2(a,b)); store d into ':OUTPATH:';\, 'expected_err_regex' => "Out of bounds access", 'expected_err_regex23' => "Unable to recreate exception", }, { 'num' => 3, 'pig' => q\ register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); c = cogroup a by name, b by name; d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf3(a,b)); store d into ':OUTPATH:';\, 'expected_err_regex' => "Out of bounds access", 'expected_err_regex23' => "Unable to recreate exception", }, { 'num' => 4, 'pig' => q\ register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); c = cogroup a by name, b by name; d = foreach c generate flatten(org.apache.pig.test.udf.evalfunc.BadUdf4(a,b)); store d into ':OUTPATH:';\, 'expected_err_regex' => "ERROR 2078: .*", 'expected_err_regex23' => "Unable to recreate exception", }, ] }, { # test for sytax errors 'name' => 'SyntaxErrors', 'tests' => [ { # missing quotes around command 'num' => 1, 'pig' => q# A = load ':INPATH:/singlefile/studenttab10k'; B = foreach A generate $2, $1, $0; C = stream B through awk 'BEGIN {FS = "\t"; OFS = "\t"} {print $3, $2, $1}'; dump C;#, #'expected_err_regex' => "mismatched input ''BEGIN {FS = \"\\t\"; OFS = \"\\t\"} {print \$"."3, \$"."2, \$"."1}'' expecting SEMI_COLON", 'expected_err_regex' => "mismatched input ''BEGIN {.*} {.*}'' expecting SEMI_COLON", }, { # input spec missing parenthesis 'num' => 2, 'pig' => q# define CMD `perl PigStreaming.pl foo -` input 'foo' using PigStorage() ship(':SCRIPTHOMEPATH:/PigStreaming.pl'); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;#, 'expected_err_regex' => "mismatched input ''foo'' expecting LEFT_PAREN", }, { # no serializer name after using 'num' => 3, 'pig' => q# define CMD `perl PigStreaming.pl foo -` output ('foo' using ); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;#, 'expected_err_regex' => "Syntax error, unexpected symbol at or near '\\)'", }, { # alias name missing from define 'num' => 4, 'pig' => q# define `perl PigStreaming.pl foo -`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;#, 'expected_err_regex' => "mismatched input '`perl PigStreaming.pl foo -`' expecting IDENTIFIER_L", }, { # quotes missing from name of the file in ship script 'num' => 5, 'pig' => q# define CMD `perl PigStreaming.pl foo -` ship(:SCRIPTHOMEPATH:/PigStreaming.pl); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;#, 'expected_err_regex' => "mismatched input '.' expecting RIGHT_PAREN", }, ] }, { # relevant information missing/incorrect in streaming command 'name' => 'CmdErrors', 'tests' => [ { # Define uses using non-existent command (autoship) 'num' => 1, 'execonly' => 'mapred', 'pig' => q\ define CMD `perl PigStreamingNotThere.pl`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;\, 'expected_err_regex' => "failed with exit status: 2", }, { # Define uses non-existent command with ship clause 'num' => 2, 'pig' => q\ define CMD `perl PigStreamingNotThere.pl foo -` ship(':SCRIPTHOMEPATH:/PigStreamingNotThere.pl'); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;\, 'expected_err_regex' => "Invalid ship specification: .*/PigStreamingNotThere.pl.* does not exist!", }, { # Define uses non-existent command with cache clause 'num' => 3, 'pig' => q\ define CMD `perl PigStreaming.pl - - nameMap` ship(':SCRIPTHOMEPATH:/PigStreaming.pl') cache(':SCRIPTHOMEPATH:/PigStreamingNotThere.pl#NotThere'); A = load ':INPATH:/singlefile/studenttab10k'; B = foreach A generate $0; C = stream B through CMD as (name); D = group C by name; E = foreach D generate group, COUNT(C); dump E;\, 'expected_err_regex' => "Invalid cache specification: .*/PigStreamingNotThere.pl.*", }, { # Define uses non-existent serializer 'num' => 4, 'pig' => q\ define CMD `perl PigStreaming.pl foo -` input('foo' using SerializerNotThere()) ship(':SCRIPTHOMEPATH:/PigStreaming.pl'); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;\, 'expected_err_regex' => "Could not resolve SerializerNotThere using imports", }, { # Define uses non-existent deserializer 'num' => 5, 'pig' => q\ define CMD `perl PigStreaming.pl` output(stdout using DeserializerNotThere()) ship(':SCRIPTHOMEPATH:/PigStreaming.pl'); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;\, 'expected_err_regex' => "Could not resolve DeserializerNotThere using imports", }, { # Invalid skip path 'num' => 6, 'pig' => q\ set stream.skippath 'foo'; define CMD `perl PigStreaming.pl`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;\, 'expected_err_regex' => "Invalid value for stream.skippath.*", }, { # Invalid command alias in stream operator 'num' => 7, 'pig' => q\ define CMD `perl PigStreaming.pl`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD1; dump B;\, 'expected_err_regex' => "pig script failed to validate: Undefined command-alias \\[CMD1\\]", }, { # Invalid operator alias in stream operator 'num' => 8, 'pig' => q\ define CMD `perl PigStreaming.pl`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream C through CMD; dump B;\, 'expected_err_regex' => "Undefined alias: C", }, ] }, { # streaming application failures 'name' => 'StreamingErrors', 'tests' => [ { # Streaming application fails in the beginning of processing # NEED TO CHECK STDERR MANUALLY FOR NOW 'num' => 1, 'execonly' => 'mapred', 'pig' => q\ define CMD `perl PigStreamingBad.pl start` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl') stderr('CMD' limit 1); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "failed with exit status: 1", }, { # Streaming application fails in the middle of processing # NEED TO CHECK STDERR MANUALLY FOR NOW 'num' => 2, 'execonly' => 'mapred', 'pig' => q\ define CMD `perl PigStreamingBad.pl middle` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl') stderr('CMD' limit 1); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "failed with exit status: 2", }, { # Streaming application fails in the end of processing # bring logs to dfs # NEED TO CHECK STDERR MANUALLY FOR NOW 'num' => 3, 'execonly' => 'mapred', 'pig' => q\ define CMD `perl PigStreamingBad.pl end` ship(':SCRIPTHOMEPATH:/PigStreamingBad.pl') stderr('CMD' limit 1); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "failed with exit status: 3", }, { # Streaming application fails in the randomly during processing # bring logs to dfs # NEED TO CHECK STDERR MANUALLY FOR NOW 'num' => 4, 'execonly' => 'mapred', 'pig' => q\ define CMD `perl DieRandomly.pl 10000 2` ship(':SCRIPTHOMEPATH:/DieRandomly.pl') stderr('CMD' limit 1); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "failed with exit status: 2", }, ] }, { # errors (de)serializing data for streaming 'name' => 'IOErrors', 'tests' => [ { # Invalid deserializer - throws exception 'num' => 1, 'execonly' => 'mapred', 'pig' => q\ register :FUNCPATH:/testudf.jar; define CMD `perl PigStreaming.pl` input(stdin) output(stdout using org.apache.pig.test.udf.streaming.DumpStreamerBad) ship(':SCRIPTHOMEPATH:/PigStreaming.pl'); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "Error reading output from Streaming binary", }, { # Invalid serializer - throws exception 'num' => 2, 'execonly' => 'mapred', 'pig' => q\ define CMD `perl PigStreamingDepend.pl` input(stdin using StringStoreBad) ship(':SCRIPTHOMEPATH:/PigStreamingDepend.pl', ':SCRIPTHOMEPATH:/PigStreamingModule.pm'); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through `perl PigStreaming.pl`; C = stream B through CMD as (name, age, gpa); D = foreach C generate name, age; store D into ':OUTPATH:';\, 'expected_err_regex' => "Could not resolve StringStoreBad using imports", }, ] }, { 'name' => 'StreamingLocalErrors', 'tests' => [ { # Define uses using non-existent command 'num' => 1, 'execonly' => 'local', 'pig' => q\ define CMD `perl PigStreamingNotThere.pl`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; dump B;\, 'expected_err_regex' => "Can't open perl script .*PigStreamingNotThere.pl.*: No such file or directory", }, { # Streaming application fails in the beginning of processing # NEED TO CHECK STDERR MANUALLY FOR NOW 'num' => 2, 'execonly' => 'local', 'pig' => q\ define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl start`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "failed with exit status: 1", }, { # Streaming application fails in the middle of processing # NEED TO CHECK STDERR MANUALLY FOR NOW 'num' => 3, 'execonly' => 'local', 'pig' => q\ define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl middle`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "failed with exit status: 2", }, { # Streaming application fails in the middle of processing # bring logs to dfs # NEED TO CHECK STDERR MANUALLY FOR NOW 'num' => 4, 'execonly' => 'local', 'pig' => q\ define CMD `perl :SCRIPTHOMEPATH:/PigStreamingBad.pl end`; A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "failed with exit status: 3", }, { # Invalid deserializer - throws exception 'num' => 5, 'execonly' => 'local', 'pig' => q\ register :FUNCPATH:/testudf.jar; define CMD `perl :SCRIPTHOMEPATH:/PigStreaming.pl` input(stdin using PigDump) output(stdout using org.apache.pig.test.udf.storefunc.DumpLoaderBad); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through CMD; store B into ':OUTPATH:';\, 'expected_err_regex' => "Could not resolve PigDump using imports", }, { # Invalid serializer - throws exception 'num' => 6, 'execonly' => 'local', 'pig' => q\ define CMD `perl :SCRIPTHOMEPATH:/PigStreamingDepend.pl` input(stdin using StringStoreBad); A = load ':INPATH:/singlefile/studenttab10k'; B = stream A through `perl PigStreaming.pl`; C = stream B through CMD as (name, age, gpa); D = foreach C generate name, age; store D into ':OUTPATH:';\, 'expected_err_regex' => "Could not resolve StringStoreBad using imports", }, ] }, { 'name' => 'LineageErrors', 'tests' => [ { # UDF returns a bytearray that is cast to an integer 'num' => 1, 'pig' => q\register :FUNCPATH:/testudf.jar; a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = filter a by name lt 'b'; c = foreach b generate org.apache.pig.test.udf.evalfunc.CreateMap((chararray)name, age); d = foreach c generate $0#'alice young'; split d into e if $0 < 42, f if $0 >= 42; store e into ':OUTPATH:';\, 'expected_err_regex' => "Received a bytearray from the UDF. Cannot determine how to convert the bytearray", }, ] } ] } ;