#!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # The Pig command script # # Environment Variables # # JAVA_HOME The java implementation to use. Overrides JAVA_HOME. # # PIG_CLASSPATH Extra Java CLASSPATH entries. # # HADOOP_HOME/HADOOP_PREFIX Environment HADOOP_HOME/HADOOP_PREFIX(0.20.205) # # PIG_HEAPSIZE The maximum amount of heap to use, in MB. # Default is 1000. # # PIG_OPTS Extra Java runtime options. # # PIG_CONF_DIR Alternate conf dir. Default is ${PIG_HOME}/conf. # # HBASE_CONF_DIR - Optionally, the HBase configuration to run against # when using HBaseStorage cygwin=false case "`uname`" in CYGWIN*) cygwin=true;; esac debug=false remaining=() # filter command line parameter for f in "$@"; do if [[ $f = "-secretDebugCmd" ]]; then debug=true else remaining+=("$f") fi done # resolve links - $0 may be a softlink this="${BASH_SOURCE-$0}" # convert relative path to absolute path bin=$(cd -P -- "$(dirname -- "$this")" && pwd -P) script="$(basename -- "$this")" this="$bin/$script" # the root of the Pig installation export PIG_HOME=`dirname "$this"`/.. if [ -z "$PIG_CONF_DIR" ]; then if [ -f ${PIG_HOME}/conf/pig.properties ]; then PIG_CONF_DIR=${PIG_HOME}/conf fi fi if [ -z "$PIG_CONF_DIR" ]; then if [ -d /etc/pig ]; then # if installed with rpm/deb package PIG_CONF_DIR="/etc/pig" fi fi if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then . "${PIG_CONF_DIR}/pig-env.sh" fi # some Java parameters if [ "$JAVA_HOME" != "" ]; then #echo "run java in $JAVA_HOME" JAVA_HOME=$JAVA_HOME fi if [ "$JAVA_HOME" = "" ]; then echo "Error: JAVA_HOME is not set." exit 1 fi JAVA=$JAVA_HOME/bin/java JAVA_HEAP_MAX=-Xmx1000m # check envvars which might override default args if [ "$PIG_HEAPSIZE" != "" ]; then JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m" fi # CLASSPATH initially contains $PIG_CONF_DIR CLASSPATH="${PIG_CONF_DIR}" CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar # add user-specified CLASSPATH if [ "$PIG_CLASSPATH" != "" ]; then CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH} fi # so that filenames w/ spaces are handled correctly in loops below IFS= shopt -s extglob shopt -s nullglob for f in $PIG_HOME/lib/*.jar; do CLASSPATH=${CLASSPATH}:$f; done JYTHON_JAR=`echo ${PIG_HOME}/lib/jython*.jar` if [ -z "$JYTHON_JAR" ]; then JYTHON_JAR=`echo $PIG_HOME/build/ivy/lib/Pig/jython*.jar` if [ -n "$JYTHON_JAR" ]; then CLASSPATH=${CLASSPATH}:$JYTHON_JAR fi fi for f in $PIG_HOME/share/pig/lib/*.jar; do CLASSPATH=${CLASSPATH}:$f; done # For Hadoop 0.23.0+ # #if [ -d "${PIG_HOME}/share/hadoop/common" ]; then # for f in ${PIG_HOME}/share/hadoop/common/hadoop*.jar; do # CLASSPATH=${CLASSPATH}:$f; # done #fi # #if [ -d "${PIG_HOME}/share/hadoop/hdfs" ]; then # for f in ${PIG_HOME}/share/hadoop/hdfs/hadoop*.jar; do # CLASSPATH=${CLASSPATH}:$f; # done #fi # #if [ -d "${PIG_HOME}/share/hadoop/mapreduce" ]; then # for f in ${PIG_HOME}/share/hadoop/mapreduce/hadoop*.jar; do # CLASSPATH=${CLASSPATH}:$f; # done #fi if [ -n "$HADOOP_PREFIX" ]; then if [ -f $HADOOP_PREFIX/bin/hadoop ]; then HADOOP_BIN=$HADOOP_PREFIX/bin/hadoop fi fi if [[ -z "$HADOOP_BIN" && -n "$HADOOP_HOME" && -d "$HADOOP_HOME" ]]; then if [ -f $HADOOP_HOME/bin/hadoop ]; then HADOOP_BIN=$HADOOP_HOME/bin/hadoop fi fi if [ -n "$HADOOP_BIN" ]; then # if installed with rpm/deb package if [ -f /usr/bin/hadoop ]; then HADOOP_BIN=/usr/bin/hadoop fi fi # if using HBase, likely want to include HBase config HBASE_CONF_DIR=${HBASE_CONF_DIR:-/etc/hbase} if [ -n "$HBASE_CONF_DIR" ] && [ -d "$HBASE_CONF_DIR" ]; then CLASSPATH=$HBASE_CONF_DIR:$CLASSPATH fi if [ -d "${PIG_HOME}/etc/hadoop" ]; then CLASSPATH=${CLASSPATH}:${PIG_HOME}/etc/hadoop; fi # locate ZooKeeper if [ -d "${PIG_HOME}/share/zookeeper" ]; then for f in ${PIG_HOME}/share/zookeeper/zookeeper-*.jar; do CLASSPATH=${CLASSPATH}:$f done fi # locate HBase if [ -d "${PIG_HOME}/share/hbase" ]; then for f in ${PIG_HOME}/share/hbase/hbase-*.jar; do CLASSPATH=${CLASSPATH}:$f done fi # default log directory & file if [ "$PIG_LOG_DIR" = "" ]; then PIG_LOG_DIR="$PIG_HOME/logs" fi if [ "$PIG_LOGFILE" = "" ]; then PIG_LOGFILE='pig.log' fi # cygwin path translation if $cygwin; then CLASSPATH=`cygpath -p -w "$CLASSPATH"` PIG_HOME=`cygpath -d "$PIG_HOME"` PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"` fi # restore ordinary behaviour unset IFS PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR" PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE" PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME" # run it if [ -n "$HADOOP_BIN" ]; then if [ "$debug" == "true" ]; then echo "Find hadoop at $HADOOP_BIN" fi PIG_JAR=`echo $PIG_HOME/pig-*withouthadoop.jar` # for deb/rpm package, add pig jar in /usr/share/pig if [ -z "$PIG_JAR" ]; then PIG_JAR=`echo $PIG_HOME/share/pig/pig-*withouthadoop.jar` fi if [ -n "$PIG_JAR" ]; then CLASSPATH=${CLASSPATH}:$PIG_JAR else echo "Cannot locate pig-withouthadoop.jar. do 'ant jar-withouthadoop', and try again" exit 1 fi export HADOOP_CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH export HADOOP_OPTS="$JAVA_HEAP_MAX $PIG_OPTS $HADOOP_OPTS" COMMAND_LINE="$HADOOP_BIN jar $PIG_JAR ${remaining[@]}" if [ "$debug" == "true" ]; then echo "dry run:" echo "HADOOP_CLASSPATH: $HADOOP_CLASSPATH" echo "HADOOP_OPTS: $HADOOP_OPTS" echo "$COMMAND_LINE" echo else exec $COMMAND_LINE fi else # fall back to use fat pig.jar if [ "$debug" == "true" ]; then echo "Cannot find local hadoop installation, using bundled hadoop 20.2" fi PIG_JAR=`echo $PIG_HOME/pig!(*withouthadoop).jar` if [ -n "$PIG_JAR" ]; then CLASSPATH="${CLASSPATH}:$PIG_JAR" else echo "Cannot locate pig.jar. do 'ant jar', and try again" exit 1 fi CLASS=org.apache.pig.Main COMMAND_LINE="$JAVA $JAVA_HEAP_MAX $PIG_OPTS -classpath $CLASSPATH $CLASS ${remaining[@]}" if [ "$debug" == "true" ]; then echo "dry run:" echo "$COMMAND_LINE" echo else exec $COMMAND_LINE fi fi shopt -u nullglob shopt -u extglob