# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Pig configuration file. All values can be overwritten by command line arguments. # see bin/pig -help # log4jconf log4j configuration file # log4jconf=./conf/log4j.properties # brief logging (no timestamps) brief=false # clustername, name of the hadoop jobtracker. If no port is defined port 50020 will be used. #cluster #debug level, INFO is default debug=INFO # a file that contains pig script #file= # load jarfile, colon separated #jar= #verbose print all log messages to screen (default to print only INFO and above to screen) verbose=false #exectype local|mapreduce, mapreduce is default #exectype=mapreduce # hod realted properties #ssh.gateway #hod.expect.root #hod.expect.uselatest #hod.command #hod.config.dir #hod.param #Do not spill temp files smaller than this size (bytes) pig.spill.size.threshold=5000000 #EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes) #This should help reduce the number of files being spilled. pig.spill.gc.activation.size=40000000 ###################### # Everything below this line is Yahoo specific. Note that I've made # (almost) no changes to the lines above to make merging in from Apache # easier. Any values I don't want from above I override below. # # This file is configured for use with HOD on the production clusters. If you # want to run pig with a static cluster you will need to remove everything # below this line and set the cluster value (above) to the # hostname and port of your job tracker. exectype=mapreduce log.file=