--This script tests a join too large for fragment and replicate. It also --contains a join followed by a group by on the same key, something that we --could potentially optimize by not regrouping. register $PIGMIX_JAR A = load '$HDFS_ROOT/page_views' using org.apache.pig.test.pigmix.udf.PigPerformanceLoader() as (user, action, timespent, query_term, ip_addr, timestamp, estimated_revenue, page_info, page_links); B = foreach A generate user, (double)estimated_revenue; alpha = load '$HDFS_ROOT/users' using PigStorage('\u0001') as (name, phone, address, city, state, zip); beta = foreach alpha generate name; C = join beta by name, B by user parallel $PARALLEL; D = group C by $0 parallel $PARALLEL; E = foreach D generate group, SUM(C.estimated_revenue); store E into '$PIGMIX_OUTPUT/L3out';