#!/usr/bin/env perl ############################################################################ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################### # Tests for Jython embedding # # our $PASSED="Pig job PASSED"; our $FAILED="Pig job FAILED"; $cfg = { 'driver' => 'Pig', ,'groups' => [ { 'name' => 'Jython_Checkin', 'tests' => [ { 'num' => 2, 'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input1=':INPATH:/singlefile/studenttab10k' input2=':INPATH:/singlefile/votertab10k' output=':OUTPATH:' P = Pig.compile(""" a = load '$in1' as (name, age, gpa); b = load '$in2' as (name, age, registration, contributions); c = filter a by age < 50; d = filter b by age < 50; e = cogroup c by (name, age), d by (name, age) ; f = foreach e generate flatten(c), flatten(d); g = group f by registration; h = foreach g generate group, SUM(f.d::contributions); i = order h by $1; store i into '$out'; """).bind({'in1':input1,'in2':input2, 'out':output}).runSingle() if P.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\a = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = filter a by age < 50; d = filter b by age < 50; e = cogroup c by (name, age), d by (name, age) ; f = foreach e generate flatten(c), flatten(d); g = group f by registration; h = foreach g generate group, SUM(f.d::contributions); i = order h by $1; store i into ':OUTPATH:'; \, 'sortArgs' => ['-t', ' ', '+1', '-2'], }, { 'num' => 3, #jython uses 'python.home'/cachedir when python.cachedir is not specified. #To test python.cachedir is set correctly by the framework, #setting python.home to a random path 'java_params' => ['-Dpython.home=/dev/null/fake'], 'pig' => q\#!/usr/bin/python import sys from org.apache.hadoop.conf import * from org.apache.hadoop.fs import * config = Configuration() hdfs = FileSystem.get(config) \, ,'rc' => 0 } ] }, { 'name' => 'Jython_Embedded', 'tests' => [ { 'num' => 1, ,'pig' => q\#!/usr/bin/python # JYTHON COMMENT from org.apache.pig.scripting import Pig P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';""") Q = P.bind() result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';\, } ] }, { 'name' => 'Jython_CompileBindRun' ,'tests' => [ { # bind() with no parameters, runSingle 'num' => 1 ,'pig' => q\#!/usr/bin/python # JYTHON COMMENT from org.apache.pig.scripting import Pig P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';""") Q = P.bind() result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';\, ,'floatpostprocess' => 1 ,'delimiter' => ' ' },{ # 9.2 1 bind single input parameter and no output parameters 'num' => 2 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input= ":INPATH:/singlefile/studenttab10k" output = ":OUTPATH:" P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into ':OUTPATH:';""") Q = P.bind({'in':input}) result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';\, ,'floatpostprocess' => 1 ,'delimiter' => ' ' # ,'expected_out_regex' => "Pig job PASSED" },{ # bind parallel execution with a multiple entries 'num' => 3 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input= ":INPATH:/singlefile/studenttab10k" max1="50"; output1= ":OUTPATH:.1" max2="40.0"; output2= ":OUTPATH:.2" max3="30.0f"; output3= ":OUTPATH:.3" P = Pig.compile(""" A = load '$in' as (name, age, gpa); B= filter A by age < $max; C = foreach B generate name; store C into '$out'; -- store C into put here as comment to fake multi-query """) Q = P.bind([ {'in':input ,'max':max1 ,'out':output1 } ,{'in':input ,'max':max2 ,'out':output2 } ,{'in':input ,'max':max3 ,'out':output3 } ]) results = Q.run() for i in [0, 1, 2]: result = results[i] if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= filter A by age < 50; C = foreach B generate name; store C into ':OUTPATH:.1'; A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= filter A by age < 40.0; C = foreach B generate name; store C into ':OUTPATH:.2'; A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= filter A by age < 30.0f; C = foreach B generate name; store C into ':OUTPATH:.3'; \, },{ # 8.6 compile pig script file with no input and no output parameters #12.2 import python modules # 'num' => 4 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig #create pig script pig_script = ":TMP:/script.pig" pigfile = open( pig_script, 'w+') pigfile.write(""" A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; """) pigfile.close() #execute pig script result = Pig.compileFromFile( pig_script ).bind().runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; \ ,'floatpostprocess' => 1 ,'delimiter' => ' ' },{ # 8.7 compile pig script file with no input and with output parameters 'num' => 5 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig #create pig script from org.apache.pig.scripting import Pig #create pig script pig_script = ":TMP:/script.pig" pigfile = open( pig_script, 'w+') pigfile.write(""" A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into '$out'; """) pigfile.close() #execute pig script output= ":OUTPATH:" result = Pig.compileFromFile(pig_script).bind({'out':output}).runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; \ ,'floatpostprocess' => 1 ,'delimiter' => ' ' },{ # 11.15 1 results.getResults(alias) for null results 'num' => 6 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); EMPTY= filter A by age > 9999; store EMPTY into ':OUTPATH:'; """) result = P.bind().runSingle() \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); EMPTY= filter A by age > 9999; store EMPTY into ':OUTPATH:';\ }, { # bind reading from python context 'num' => 7 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig P = Pig.compile("""A = load '$inpath' as (name, age, gpa); B= filter A by age < $max; store B into '$out'; """) #execute pig script inpath= ":INPATH:/singlefile/studenttab10k" out= ":OUTPATH:" max = 40 result = P.bind().runSingle() \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= filter A by age < 40; store B into ':OUTPATH:';\ },{ # bind multiple times 'num' => 8 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig P = Pig.compile("""A = load '$inpath' as (name, age, gpa); B= foreach A generate age + $i; store B into '$out'; -- extra store B into to force multiquery """) #execute pig script inpath= ":INPATH:/singlefile/studenttab10k" for i in [1,2,3]: out= ":OUTPATH:." + str(i) result = P.bind().runSingle() \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= foreach A generate age + 1; store B into ':OUTPATH:.1'; A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= foreach A generate age + 2; store B into ':OUTPATH:.2'; A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= foreach A generate age + 3; store B into ':OUTPATH:.3';\, }, { # invoke .run() on a non-parallel pig script 'num' => 9 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig P = Pig.compile(""" A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; """) result = P.bind().run() \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';\, }, { # 8.6 compile pig script file with no input and no output parameters #12.2 import python modules # 'num' => 10 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig #create pig script pig_script = ":TMP:/script.pig" pigfile = open( pig_script, 'w+') pigfile.write(""" A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); -- a comment store A into ':OUTPATH:'; """) pigfile.close() #execute pig script result = Pig.compileFromFile( pig_script ).bind().runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; \ ,'floatpostprocess' => 1 ,'delimiter' => ' ' },{ 'num' => 11 ,'pig_params' => ['-p', qq(loadfile='studenttab10k')], ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig P = Pig.compile("""A = load ':INPATH:/singlefile/$loadfile' as (name, age, gpa); store A into ':OUTPATH:';""") Q = P.bind() result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; \ ,'floatpostprocess' => 1 ,'delimiter' => ' ' },{ 'num' => 12 ,'pig_params' => ['-m', ":PARAMPATH:/params_3"], ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig P = Pig.compile("""A = load ':INPATH:/singlefile/$fname' as (name, age, gpa); store A into ':OUTPATH:';""") Q = P.bind() result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; \ ,'floatpostprocess' => 1 ,'delimiter' => ' ' },{ 'num' => 13 ,'additional_cmd_args' => ['studenttab10k'] ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig import sys P = Pig.compile("A = load ':INPATH:/singlefile/" + sys.argv[1] + "' as (name, age, gpa);" + "store A into ':OUTPATH:';"); Q = P.bind() result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:'; \ ,'floatpostprocess' => 1 ,'delimiter' => ' ' } ] },{ 'name' => 'Jython_Diagnostics' ,'tests' => [ { # 11.23 1 explain() on a complex query 'num' => 1 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1 = ":OUTPATH:.1" output2 = ":OUTPATH:.2" maximum="20"; #No Schema specified P = Pig.compile(""" A = load '$in1' as (name, age, gpa); B= filter A by age < $max; C= foreach B generate name,age; store C into '$out1'; D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); E= filter D by age < $max; F= foreach E generate name,age; store F into '$out2'; """) result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).explain() \ ,'rc'=> 0 }, { #11.22 1 illustrate() on a complex query 'num' => 2 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1 = ":OUTPATH:.1" output2 = ":OUTPATH:.2" maximum="20"; #No Schema specified P = Pig.compile(""" A = load '$in1' as (name, age, gpa); B= filter A by age < $max; C= foreach B generate name,age; store C into '$out1'; D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); E= filter D by age < $max; F= foreach E generate name,age; store F into '$out2'; """) result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).illustrate(); \ ,'rc'=> 0 ,'expected_out_regex' => "A.*name:bytearray.*age:bytearray.*gpa:bytearray" }, { # 11.24 1 describe() on an alias 'num' => 3 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1 = ":OUTPATH:.1" output2 = ":OUTPATH:.2" maximum="20"; #No Schema specified P = Pig.compile(""" A = load '$in1' as (name, age, gpa); B= filter A by age < $max; C= foreach B generate name,age; store C into '$out1'; D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); E= filter D by age < $max; F= foreach E generate name,age; store F into '$out2'; """) result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).describe('A'); \ ,'rc'=> 0 ,'expected_out_regex' => "A:.*{name:.*bytearray,age:.*bytearray,gpa:.*bytearray}" }, { #11.29 1 describe() on an undefined alias 'num' => 4 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1 = ":OUTPATH:.1" output2 = ":OUTPATH:.2" maximum="20"; #No Schema specified P = Pig.compile(""" A = load '$in1' as (name, age, gpa); B= filter A by age < $max; C= foreach B generate name,age; store C into '$out1'; D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); E= filter D by age < $max; F= foreach E generate name,age; store F into '$out2'; """) result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).describe('INVALID_ALIAS'); \ ,'rc'=> 6 ,'expected_err_regex' => "ERROR 1005: No plan for INVALID_ALIAS to describe" }, { # 11.27 1 illustrate(alias) 'num' => 5 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1 = ":OUTPATH:.1" output2 = ":OUTPATH:.2" maximum="20"; #No Schema specified P = Pig.compile(""" A = load '$in1' as (name, age, gpa); B= filter A by age < $max; C= foreach B generate name,age; store C into '$out1'; D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); E= filter D by age < $max; F= foreach E generate name,age; store F into '$out2'; """) result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).illustrate('D'); \ ,'rc'=> 6 ,'expected_err_regex' => "ERROR 1121" }, { # 11.28 1 explain(alias) 'num' => 6 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1 = ":OUTPATH:.1" output2 = ":OUTPATH:.2" maximum="20"; #No Schema specified P = Pig.compile(""" A = load '$in1' as (name, age, gpa); B= filter A by age < $max; C= foreach B generate name,age; store C into '$out1'; D = load '$in2' using PigStorage() as (name:chararray, age:int, registration, contributions:double); E= filter D by age < $max; F= foreach E generate name,age; store F into '$out2'; """) result = P.bind({'in1':input1, 'in2':input2, 'max':maximum, 'out1':output1, 'out2':output2 }).explain('C') \ ,'rc'=> 6 ,'expected_err_regex' => "ERROR 1121" # ,'expected_err_regex' => "TypeError: explain(): expected 0 args; got 1" } ] }, { 'name' => 'Iterator' ,'tests' => [ { 'num' => 1 ,'pig' => q@#!/usr/bin/python from org.apache.pig.scripting import Pig P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k'; store A into ':OUTPATH:.tmp'; """) #execute pig script result = P.bind().runSingle() if not result.isSuccessful(): raise "Pig job FAILED" pigfile = open(":TMP:/iterator_output.txt", 'w+') iter = result.result("A").iterator() while iter.hasNext(): t = iter.next() name = t.get(0) age = t.get(1) gpa = t.get(2) pigfile.write(str(name) + "\\t" + str(age) + "\\t" + str(gpa) + "\\n") pigfile.close() Pig.fs("-mkdir :OUTPATH:") Pig.fs("-copyFromLocal :TMP:/iterator_output.txt :OUTPATH:/part-m-00000") @, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k'; store A into ':OUTPATH:';\, }, ] }, { # 12.2 import python modules # 12.1 python comments # 12.6 fs lists a file 'name' => 'Jython_Misc' ,'tests' => [ { 'num' => 1 ,'pig' => q\#!/usr/bin/python # JYTHON COMMENT from org.apache.pig.scripting import Pig Pig.fs( "-ls :INPATH:/singlefile/studenttab10k"); \ ,'expected_out_regex' => "studenttab10k" } ] }, { 'name' => 'Jython_Macro' ,'tests' => [ { 'num' => 1 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig #create pig script # mymodule.pig pig_script = ":TMP:/module.pig" pigfile = open( pig_script, 'w+') pigfile.write(""" define group_and_count (A, group_key) returns B { D = group $A by $group_key; $B = foreach D generate group, COUNT($A); }; """) pigfile.close() #main.pig P = Pig.compile("""import ':TMP:/module.pig'; alpha = load ':INPATH:/singlefile/studenttab10k' as (user, age, gpa); gamma = group_and_count (alpha, user); store gamma into ':OUTPATH:'; """) P.bind().runSingle() \, 'verify_pig_script' => q\A = load ':INPATH:/singlefile/studenttab10k' as (user, age, gpa); B = group A by user; C = foreach B generate group, COUNT(A); store C into ':OUTPATH:';\ } ] }, { # SET debug 'on' # SET job.name 'my job' # SET default_parallel 100 'name' => 'Jython_Properties', 'tests' => [ { 'num' => 1 ,'ignore' => 1 # This is a good test except that we can't verify it. ,'pig' => q\#!/usr/bin/python # JYTHON COMMENT from org.apache.pig.scripting import Pig from java.util import Properties; P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';""") Q = P.bind() prop = Properties() prop.put("mapred.job.name", "friendship") result = Q.runSingle(prop) if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'sql' => "select name, age, gpa+0.00 from studenttab10k;" ,'floatpostprocess' => 1 ,'delimiter' => ' ' } ] }, { 'name' => 'Jython_Error', 'tests' => [ { # run a script that returns single negative result 'num' => 1 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input= ":INPATH:/singlefile/studenttab10k" output = ":OUTPATH:" P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into '$out';""") Q = P.bind({'in':bad_input, 'out':output}) result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'rc' => 6 ,'expected_err_regex' => "ERROR 1121" }, { # run a script that returns single negative result 'num' => 2 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input= ":INPATH:/singlefile/studenttab10k" output = ":OUTPATH:" P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into '$out';""") Q = P.bind({'in':input, 'out':bad_output}) result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'rc' => 6 ,'expected_err_regex' => "name 'bad_output' is not defined" },{ # bind an undefined input parameter 'num' => 3 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig input= ":INPATH:/singlefile/studenttab10k" output = ":OUTPATH:" P = Pig.compile("""A = load '$in' as (name, age, gpa); store A into '$out';""") Q = P.bind({'in':invalid_parameter, 'out':output}) result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'expected_err_regex' => "ERROR 1121" ,'rc'=> 6 }, { # compileFromFile for pig script file that does not exist throws IOException 'num' => 4 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig # intentionally don't create pig script pig_script = tmp_dir + "/script.pig" #execute pig script input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1= ":OUTPATH:.1" output2= ":OUTPATH:.2" result = Pig.compileFromFile(pig_script).bind({'in1':input1,'in2':input2, 'out1':output1, 'out2':output2 }).run() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'expected_err_regex' => "ERROR 1121" ,'rc'=> 6 }, { # compileFromFile for pig script file that does not have read permissions throws IOException 'num' => 5 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig #create pig script pig_script = ":TMP:/script.pig" pigfile = open( pig_script, 'w') #no read permissions and file is left open until afer compile statement pigfile.write(""" A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); store A into '$out1'; store B into '$out2'; """) pigfile.close() #execute pig script input1= ":INPATH:/singlefile/studenttab10k" input2= ":INPATH:/singlefile/votertab10k" output1= ":OUTPATH:.1" output2= ":OUTPATH:.2" result = Pig.compileFromFile(pig_script).bind({'in1':input1,'in2':input2, 'out1':output1, 'out2':output2 }).run() pigfile.close() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'expected_err_regex' => "ERROR 1121" ,'rc'=> 6 },{ # compileFromFile for pig script file without python library 'num' => 6 ,'pig' => q\#!/usr/bin/python # JYTHON COMMENT #from org.apache.pig.scripting import Pig P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); store A into ':OUTPATH:';""") Q = P.bind() result = Q.runSingle() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'rc' => 6 ,'expected_err_regex' => "ERROR 1121" }, { # 11.10 iter.next for an alias that is undefined 'num' => 7 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig #create pig script P = Pig.compile("""A = load ':INPATH:/singlefile/studenttab10k' as (name, age, gpa); B= filter A by age < 50; store B into '$out1'; C = load ':INPATH:/singlefile/votertab10k' using PigStorage() as (name:chararray, age:int, registration, contributions:double); D = filter C by name matches '^fred*'; store D into '$out2'; """) result = P.bind().run() if result.isSuccessful(): print "Pig job PASSED" else: raise "Pig job FAILED" \ ,'rc' => 6 ,'expected_err_regex' => "ERROR 1121" } ] }, { 'name' => 'Jython_Command', 'tests' => [ { # sql command 'num' => 3 ,'pig' => q\#!/usr/bin/python from org.apache.pig.scripting import Pig Q = Pig.compile("sh cat nonexistfile") result = Q.bind().runSingle() if result.isSuccessful() : print 'Pig job succeeded' else : raise 'Cant run sh command' \ ,'rc' => 6 } ] } ] } ;