#!/usr/bin/env perl ############################################################################ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. ############################################################################### # Nightly tests for pig. # # #PigSetup::setup(); $cfg = { 'driver' => 'Pig', 'execonly' => 'mapred', 'execonly' => 'mapred', 'groups' => [ { 'name' => 'BigData', 'tests' => [ { 'num' => 1, ,'floatpostprocess' => 1 ,'delimiter' => ' ', 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = filter a by age < '50'; d = filter b by age < '50'; e = cogroup c by (name, age), d by (name, age) parallel 20; f = foreach e generate flatten(c), flatten(d); g = group f by registration parallel 20; h = foreach g generate group, SUM(f.d::contributions); i = order h by $1, $0 parallel 20; store i into ':OUTPATH:';\, }, { 'num' => 2, 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); a1 = filter a by age < '50'; b = group a1 by (name, age); c = foreach b generate group as g, AVG(a1.gpa); d = filter c by $1 > 3.0; d1 = foreach d generate g.$0 as name, g.$1 as age, $1 as gpa; e = group d1 by name; f = foreach e generate group, AVG(d1.age); store f into ':OUTPATH:';\, }, { 'num' => 3, 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); b = load ':INPATH:/singlefile/votertab10k' as (name, age, registration, contributions); c = foreach a generate name; d = foreach b generate name; e = filter c by name matches '.*allen$'; f = union d, e; g = distinct f parallel 20; store g into ':OUTPATH:';\, }, { 'num' => 4, 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); b = group a all parallel 20; c = foreach b generate COUNT(a.$0); store c into ':OUTPATH:';\, }, { 'num' => 5, 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); b = group a by name parallel 20; c = foreach b generate group, COUNT($1); store c into ':OUTPATH:';\, }, { 'num' => 6, 'pig' => q\ define cmd `perl PigStreaming.pl` ship(':SCRIPTHOMEPATH:/PigStreaming.pl') stderr('CMD' limit 3); a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); b = stream a through cmd as (n, a, g); c = foreach b generate n, a; store c into ':OUTPATH:';\, }, { 'num' => 7, 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); b = order a by name parallel 20; store b into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '+0', '-1'], }, { 'num' => 8, 'pig' => q\a = load ':INPATH:/singlefile/studenttab20m' as (name:chararray, age:int, gpa:double); b = order a by name, age desc parallel 20; store b into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '+0', '-1', '+1rn', '-2'], }, { 'num' => 9, 'pig' => q\A = load ':INPATH:/singlefile/studenttab20m' as (name, age, gpa); B = filter A by age > 20; C = group B by name; D = foreach C generate group, COUNT(B) PARALLEL 16; E = order D by $0 PARALLEL 16; F = limit E 10; store F into ':OUTPATH:';\, 'sortArgs' => ['-t', ' ', '+0', '-1'], }, ] }, ] } ;