#!/usr/bin/env bash

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# 
# The Pig command script
#
# Environment Variables
#
#     JAVA_HOME                The java implementation to use.    Overrides JAVA_HOME.
#
#     PIG_CLASSPATH Extra Java CLASSPATH entries.
#
#     PIG_USER_CLASSPATH_FIRST If set, add user provided classpath entries to
#                              the top of classpath instead of appending them.
#                              Default is unset, i.e. the classpath entries are
#                              placed normally at the end of a pre-defined classpath.
#
#     HADOOP_HOME/HADOOP_PREFIX     Environment HADOOP_HOME/HADOOP_PREFIX(0.20.205)
#
#     HADOOP_CONF_DIR     Hadoop conf dir
#
#     PIG_HEAPSIZE    The maximum amount of heap to use, in MB. 
#                                        Default is 1000.
#
#     PIG_OPTS            Extra Java runtime options.
#
#     PIG_CONF_DIR    Alternate conf dir. Default is ${PIG_HOME}/conf.
#
#     HBASE_HOME       Optionally, the HBase installation directory.
#                      Defaults to ${PIG_HOME}/share/hbase
#
#     HBASE_CONF_DIR - Optionally, the HBase configuration to run against
#                      when using HBaseStorage. Defaults to ${HBASE_HOME}/conf

cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
esac
debug=false

remaining=()
includeHCatalog="";
addJarString=-Dpig.additional.jars.uris\=;
additionalJars="";
prevArgExecType=false;
isSparkMode=false;
isSparkLocalMode=false;
sparkversion=2;

#verify the execType is SPARK or SPARK_LOCAL or not
function processExecType(){
    execType=$1
    execTypeUpperCase=$(echo $execType |tr [a-z] [A-Z])
    if [[ "$execTypeUpperCase" == "SPARK" ]]; then
       isSparkMode=true
    elif [[ "$execTypeUpperCase" == "SPARK_LOCAL" ]]; then
       isSparkLocalMode=true
    fi
}

# filter command line parameter
for f in "$@"; do
     if [[ $f == "-secretDebugCmd" || $f == "-printCmdDebug" ]]; then
        debug=true
      elif [[ $f == "-useHCatalog" ]]; then
        # if need to use hcatalog, we need to add the hcatalog and hive jars
        # to the classpath and also include the hive configuration xml file
        # for pig to work correctly with hcatalog
        # because of PIG-2532, including the jars in the classpath is 
        # sufficient to ensure that they are registered as well
        includeHCatalog=true;
      elif [[ "$includeHCatalog" == "true" && $f == $addJarString* ]]; then
        additionalJars=`echo $f | sed s/$addJarString//`
      elif [[ "$f" == "-x" || "$f" == "-exectype" ]]; then
        prevArgExecType=true;
        remaining[${#remaining[@]}]="$f"
      elif [[ "$prevArgExecType" == "true" ]]; then
        prevArgExecType=false;
        processExecType $f
        remaining[${#remaining[@]}]="$f"
      else
        remaining[${#remaining[@]}]="$f"
     fi
done

# resolve links - $0 may be a softlink
this="${BASH_SOURCE-$0}"

# convert relative path to absolute path
bin=$(cd -P -- "$(dirname -- "$this")">/dev/null && pwd -P)
script="$(basename -- "$this")"
this="$bin/$script"

# the root of the Pig installation
if [ -z "$PIG_HOME" ]; then
    export PIG_HOME=`dirname "$this"`/..
fi

if [ -z "$PIG_CONF_DIR" ]; then
    if [ -f ${PIG_HOME}/conf/pig.properties ]; then
        PIG_CONF_DIR=${PIG_HOME}/conf
    fi
fi

if [ -z "$PIG_CONF_DIR" ]; then
    if [ -d /etc/pig ]; then
        # if installed with rpm/deb package
        PIG_CONF_DIR="/etc/pig"
    fi
fi

if [ -f "${PIG_CONF_DIR}/pig-env.sh" ]; then
    . "${PIG_CONF_DIR}/pig-env.sh"
fi

# some Java parameters
if [ "$JAVA_HOME" != "" ]; then
    #echo "run java in $JAVA_HOME"
    JAVA_HOME=$JAVA_HOME
fi
    
if [ "$JAVA_HOME" = "" ]; then
    echo "Error: JAVA_HOME is not set."
    exit 1
fi

JAVA=$JAVA_HOME/bin/java
JAVA_HEAP_MAX=-Xmx1000m 

# check envvars which might override default args
if [ "$PIG_HEAPSIZE" != "" ]; then
    JAVA_HEAP_MAX="-Xmx""$PIG_HEAPSIZE""m"
fi

# CLASSPATH initially contains $PIG_CONF_DIR
CLASSPATH="${PIG_CONF_DIR}"
CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
if [ "$includeHCatalog" == "true" ]; then
  # need to provide the hcatalog jar file path as well as
  # the location of the hive jars on which hcatalog depends
  hiveMetaStoreJar=hive-metastore-*.jar
  thriftJar=libthrift-*.jar
  hiveExecJar=hive-exec-*.jar
  fbJar=libfb303-*.jar
  jdoECJar=jdo*-api-*.jar
  slfJar=slf4j-api-*.jar
  hbaseHiveJar=hive-hbase-handler-*.jar
  if [ "$HIVE_HOME" == "" ]; then
    if [ -d "/usr/lib/hive" ]; then
      HIVE_HOME=/usr/lib/hive
    else
      echo "Please initialize HIVE_HOME"
      exit -1
    fi
  fi

  hiveMetaStoreVersion=`ls $HIVE_HOME/lib/$hiveMetaStoreJar`
  thriftVersion=`ls $HIVE_HOME/lib/$thriftJar`
  hiveExecVersion=`ls $HIVE_HOME/lib/$hiveExecJar`
  fbJarVersion=`ls $HIVE_HOME/lib/$fbJar`
  jdoECJarVersion=`ls $HIVE_HOME/lib/$jdoECJar`
  slfJarVersion=`ls $HIVE_HOME/lib/$slfJar`
  hbaseHiveVersion=`ls $HIVE_HOME/lib/$hbaseHiveJar`

  # hcatalog jar name for 0.4 and earlier
  hcatJarOld=hcatalog-*.jar
  # hcatalog jar name for 0.5 and newer
  hcatJar=*hcatalog-core-*.jar
  hbaseHCatJar=*hbase-storage-handler-*.jar
  pigHCatJar=*hcatalog-pig-adapter-*.jar
  if [ "$HCAT_HOME" == "" ]; then
    if [ -d "/usr/lib/hcatalog" ]; then
      HCAT_HOME=/usr/lib/hcatalog
    elif [ -d "/usr/lib/hive-hcatalog" ]; then
      HCAT_HOME=/usr/lib/hive-hcatalog
    else
      echo "Please initialize HCAT_HOME"
      exit -1
    fi
  fi
  hcatJarPath=`ls $HCAT_HOME/share/hcatalog/$hcatJar`
  # if hcat jar is not found may be we are on hcatalog 0.4 or older
  if [ 'xx' == "x${hcatJarPath}x" ]; then
    hcatJarPath=`ls $HCAT_HOME/share/hcatalog/$hcatJarOld | grep -v server`
  fi

  # if we are using an older hcatalog version then the jar is on a different path
  if [ -d "$HCAT_HOME/share/hcatalog/storage-handlers/hbase/lib" ]; then
    # in 0.5 and newer we need to add multiple jars to the class path
    hbaseHCatJarPath="$HCAT_HOME/share/hcatalog/storage-handlers/hbase/lib/*"
  else
    hbaseHCatJarPath=`ls $HCAT_HOME/lib/$hbaseHCatJar`
  fi
  
  # get the pig storage handler jar
  pigHCatJarPath=`ls $HCAT_HOME/share/hcatalog/${pigHCatJar}`

  HCAT_CLASSPATHS=$hiveMetaStoreVersion:$thriftVersion:$hiveExecVersion:$fbJarVersion:$jdoECJarVersion:$slfJarVersion:$hbaseHiveVersion:$hcatJarPath:$hbaseHCatJarPath:$pigHCatJarPath
  ADDITIONAL_CLASSPATHS=file://$hiveMetaStoreVersion,file://$thriftVersion,file://$hiveExecVersion,file://$fbJarVersion,file://$jdoECJarVersion,file://$slfJarVersion,file://$hbaseHiveVersion,file://$hcatJarPath,file://$hbaseHCatJarPath,file://$pigHCatJarPath
  if [ "$additionalJars" != "" ]; then
    ADDITIONAL_CLASSPATHS=$ADDITIONAL_CLASSPATHS,$additionalJars
  fi
  CLASSPATH=${CLASSPATH}:$HCAT_CLASSPATHS:$HIVE_HOME/conf
fi

# Add user-specified CLASSPATH entries via PIG_CLASSPATH
# If PIG_USER_CLASSPATH_FIRST is set, prepend the entries
if [ "$PIG_CLASSPATH" != "" ]; then
  if [ "$PIG_USER_CLASSPATH_FIRST" == "" ]; then
    CLASSPATH=${CLASSPATH}:${PIG_CLASSPATH}
  else
    CLASSPATH=${PIG_CLASSPATH}:${CLASSPATH}
  fi
fi

# add HADOOP_CONF_DIR
if [ "$HADOOP_CONF_DIR" != "" ]; then
    CLASSPATH=${CLASSPATH}:${HADOOP_CONF_DIR}
fi

# so that filenames w/ spaces are handled correctly in loops below
IFS=

shopt -s extglob
shopt -s nullglob

for f in $PIG_HOME/lib/*.jar; do
    CLASSPATH=${CLASSPATH}:$f;
done

JYTHON_JAR=`echo ${PIG_HOME}/lib/jython*.jar`

if [ -z "$JYTHON_JAR" ]; then
    JYTHON_JAR=`echo $PIG_HOME/build/ivy/lib/Pig/jython*.jar`
    if [ -n "$JYTHON_JAR" ]; then
        CLASSPATH=${CLASSPATH}:$JYTHON_JAR
    fi
fi

JRUBY_JAR=`echo ${PIG_HOME}/lib/jruby-complete-*.jar`

if [ -z "$JRUBY_JAR" ]; then
    JRUBY_JAR=`echo $PIG_HOME/build/ivy/lib/Pig/jruby-complete-*.jar`
    if [ -n "$JRUBY_JAR" ]; then
        CLASSPATH=${CLASSPATH}:$JRUBY_JAR
    fi
fi

for f in $PIG_HOME/share/pig/lib/*.jar; do
    CLASSPATH=${CLASSPATH}:$f;
done

# For Hadoop 0.23.0+
#
#if [ -d "${PIG_HOME}/share/hadoop/common" ]; then
#    for f in ${PIG_HOME}/share/hadoop/common/hadoop*.jar; do
#        CLASSPATH=${CLASSPATH}:$f;
#    done
#fi
#
#if [ -d "${PIG_HOME}/share/hadoop/hdfs" ]; then
#    for f in ${PIG_HOME}/share/hadoop/hdfs/hadoop*.jar; do
#        CLASSPATH=${CLASSPATH}:$f;
#    done
#fi
#
#if [ -d "${PIG_HOME}/share/hadoop/mapreduce" ]; then
#    for f in ${PIG_HOME}/share/hadoop/mapreduce/hadoop*.jar; do
#        CLASSPATH=${CLASSPATH}:$f;
#    done
#fi

if which hadoop >/dev/null; then
    HADOOP_BIN=`which hadoop`
fi

if [[ -z "$HADOOP_BIN" && -n "$HADOOP_PREFIX" ]]; then
    if [ -f $HADOOP_PREFIX/bin/hadoop ]; then
        HADOOP_BIN=$HADOOP_PREFIX/bin/hadoop
    fi
fi

if [[ -z "$HADOOP_BIN" && -n "$HADOOP_HOME" && -d "$HADOOP_HOME" ]]; then
    if [ -f $HADOOP_HOME/bin/hadoop ]; then
        HADOOP_BIN=$HADOOP_HOME/bin/hadoop
    fi
fi

if [ -z "$HADOOP_BIN" ]; then
    # if installed with rpm/deb package
    if [ -f /usr/bin/hadoop ]; then
        HADOOP_BIN=/usr/bin/hadoop
    fi
fi

# find out the HADOOP_HOME in order to find hadoop jar
# we use the name of hadoop jar to decide if user is using
# hadoop 1 or hadoop 2
if [[ -z "$HADOOP_HOME" && -n "$HADOOP_PREFIX" ]]; then
    HADOOP_HOME=$HADOOP_PREFIX
fi

if [[ -z "$HADOOP_HOME" && -n "$HADOOP_BIN" ]]; then
    HADOOP_HOME=`dirname $HADOOP_BIN`/..
fi

# if using HBase, likely want to include HBase jars and config
HBH=${HBASE_HOME:-"${PIG_HOME}/share/hbase"}
if [ -d "${HBH}" ]; then
    for f in ${HBH}/hbase-*.jar; do
        CLASSPATH=${CLASSPATH}:$f
    done
    for f in ${HBH}/lib/*.jar; do
        CLASSPATH=${CLASSPATH}:$f
    done
    HBASE_CONF_DIR=${HBASE_CONF_DIR:-"${HBH}/conf"}
fi
if [ -n "$HBASE_CONF_DIR" ] && [ -d "$HBASE_CONF_DIR" ]; then
    CLASSPATH=$HBASE_CONF_DIR:$CLASSPATH
fi

if [ -d "${PIG_HOME}/etc/hadoop" ]; then
    CLASSPATH=${CLASSPATH}:${PIG_HOME}/etc/hadoop;
fi

# locate ZooKeeper
ZKH=${ZOOKEEPER_HOME:-"${PIG_HOME}/share/zookeeper"}
if [ -d "$ZKH" ] ; then
    for f in ${ZKH}/zookeeper-*.jar; do
        CLASSPATH=${CLASSPATH}:$f
    done
fi

# default log directory & file
if [ "$PIG_LOG_DIR" = "" ]; then
    PIG_LOG_DIR="$PIG_HOME/logs"
fi
if [ "$PIG_LOGFILE" = "" ]; then
    PIG_LOGFILE='pig.log'
fi

# cygwin path translation
if $cygwin; then
    CLASSPATH=`cygpath -p -w "$CLASSPATH"`
    PIG_HOME=`cygpath -d "$PIG_HOME"`
    PIG_LOG_DIR=`cygpath -d "$PIG_LOG_DIR"`
fi
 
# restore ordinary behaviour
unset IFS

PIG_OPTS="$PIG_OPTS -Dpig.log.dir=$PIG_LOG_DIR"
PIG_OPTS="$PIG_OPTS -Dpig.log.file=$PIG_LOGFILE"
PIG_OPTS="$PIG_OPTS -Dpig.home.dir=$PIG_HOME"
if [ "$includeHCatalog" == "true" ]; then
  addJars=`echo $PIG_OPTS | awk '{ for (i=1; i<=NF; i++) print $i; }' | grep "\-Dpig.additional.jars.uris=" | sed s/-Dpig.additional.jars.uris=//`
  if [ "$addJars" != "" ]; then
    ADDITIONAL_CLASSPATHS=$addJars,$ADDITIONAL_CLASSPATHS
    PIG_OPTS=`echo $PIG_OPTS | sed 's/-Dpig.additional.jars.uris=[^ ]*//'`
  fi
  PIG_OPTS="$PIG_OPTS -Dpig.additional.jars.uris=$ADDITIONAL_CLASSPATHS"
fi

################# ADDING SPARK DEPENDENCIES ##################
# For spark_local mode:
if [ "$isSparkLocalMode" == "true" ]; then
#SPARK_MASTER is forced to be "local" in spark_local mode
        SPARK_MASTER="local"
    for f in $PIG_HOME/lib/spark/*.jar; do
            CLASSPATH=${CLASSPATH}:$f;
    done
fi

# For spark mode:
# Please specify SPARK_HOME first so that we can locate $SPARK_HOME/lib/spark-assembly*.jar,
# we will add spark-assembly*.jar to the classpath.
if [ "$isSparkMode"  == "true" ]; then
    if [ -z "$SPARK_HOME" ]; then
       echo "Error: SPARK_HOME is not set!"
       exit 1
    fi
    # spark-tags*.jar appears in spark2, spark1 does not include this jar, we use this jar to judge current spark is spark1 or spark2.
    SPARK_TAG_JAR=`find $SPARK_HOME -name 'spark-tags*.jar'|wc -l`
    if [ "$SPARK_TAG_JAR" -eq 0 ];then 
          sparkversion="1"
    fi
    if [ "$sparkversion" == "1" ]; then
        # Please specify SPARK_JAR which is the hdfs path of spark-assembly*.jar to allow YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs.
        if [ -z "$SPARK_JAR" ]; then
           echo "Error: SPARK_JAR is not set, SPARK_JAR stands for the hdfs location of spark-assembly*.jar. This
        allows YARN to cache spark-assembly*.jar on nodes so that it doesn't need to be distributed each time an application runs."
           exit 1
        fi

        if [ -n "$SPARK_HOME" ]; then
            echo "Using Spark Home: " ${SPARK_HOME}
            SPARK_ASSEMBLY_JAR=`ls ${SPARK_HOME}/lib/spark-assembly*`
            CLASSPATH=${CLASSPATH}:$SPARK_ASSEMBLY_JAR
        fi
    fi

    if [ "$sparkversion" == "2" ]; then
          if [ -n "$SPARK_HOME" ]; then
              echo "Using Spark Home: " ${SPARK_HOME}
              for f in $SPARK_HOME/jars/*.jar; do
                   CLASSPATH=${CLASSPATH}:$f
              done
          fi
     fi
fi

#spark-assembly.jar contains jcl-over-slf4j which would create a LogFactory implementation that is incompatible
if [ "$isSparkMode"  == "true" ]; then
    PIG_OPTS="$PIG_OPTS -Dorg.apache.commons.logging.LogFactory=org.apache.commons.logging.impl.LogFactoryImpl"
fi
################# ADDING SPARK DEPENDENCIES ##################

# run it
if [ -n "$HADOOP_BIN" ]; then
    if [ "$debug" == "true" ]; then
        echo "Found hadoop at $HADOOP_BIN"
    fi

    HADOOP_VERSION_LONG=`hadoop version 2>/dev/null | head -1 | sed -e 's/Hadoop //g'`
    HADOOP_VERSION=`echo "$HADOOP_VERSION_LONG" | cut -c 1`

    PIG_JAR=`echo $PIG_HOME/pig*-core-h${HADOOP_VERSION}.jar`

    # for deb/rpm package, add pig jar in /usr/share/pig
    if [ -z "$PIG_JAR" ]; then
        PIG_JAR=`echo $PIG_HOME/share/pig/pig*-core-h${HADOOP_VERSION}.jar`
    fi

    if [ -n "$PIG_JAR" ]; then
        CLASSPATH=${CLASSPATH}:$PIG_JAR
    else
        if [ "$HADOOP_VERSION" == "2" ]; then
            echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found Hadoop $HADOOP_VERSION_LONG). Do 'ant clean jar', and try again"
        else
            echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found Hadoop $HADOOP_VERSION_LONG). Do 'ant -Dhadoopversion=3 clean jar', and try again"
        fi
        exit 1
    fi

    for f in $PIG_HOME/lib/h${HADOOP_VERSION}/*.jar; do
        CLASSPATH=${CLASSPATH}:$f;
    done

    export HADOOP_CLASSPATH=$CLASSPATH:$HADOOP_CLASSPATH
    export HADOOP_CLIENT_OPTS="$JAVA_HEAP_MAX $PIG_OPTS $HADOOP_CLIENT_OPTS"
    if [ "$debug" == "true" ]; then
        echo "dry run:"
        echo "HADOOP_CLASSPATH: $HADOOP_CLASSPATH"
        echo "HADOOP_OPTS: $HADOOP_OPTS"
        echo "HADOOP_CLIENT_OPTS: $HADOOP_CLIENT_OPTS"
        echo "$HADOOP_BIN" jar "$PIG_JAR" "${remaining[@]}"
        echo
    else
        exec "$HADOOP_BIN" jar "$PIG_JAR" "${remaining[@]}"
    fi
else
    # use bundled hadoop to run local mode
    PIG_JAR=`echo $PIG_HOME/pig*-core-h*.jar`
    HADOOP_VERSION=`echo "$PIG_JAR" | rev | cut -c -5 | rev | cut -c 1`

    if [ -n "$PIG_JAR" ]; then
        CLASSPATH="${CLASSPATH}:$PIG_JAR"
    else
        if [ "$HADOOP_VERSION" == "2" ]; then
            echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found no Hadoop installation). Do 'ant clean jar', and try again"
        else
            echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar (found no Hadoop installation). Do 'ant -Dhadoopversion=3 clean jar', and try again"
        fi
        exit 1
    fi

    for f in $PIG_HOME/lib/h${HADOOP_VERSION}/*.jar; do
        CLASSPATH=${CLASSPATH}:$f;
    done

    # Add bundled hadoop jars
    for f in $PIG_HOME/lib/hadoop${HADOOP_VERSION}-runtime/*.jar; do
        CLASSPATH=${CLASSPATH}:$f;
    done

    if [ "$debug" == "true" ]; then
        echo "Cannot find local hadoop installation, using bundled `java -cp $CLASSPATH org.apache.hadoop.util.VersionInfo | head -1`"
    fi

    CLASS=org.apache.pig.Main
    if [ "$debug" == "true" ]; then
        echo "dry run:"
        echo "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS "${remaining[@]}"
        echo
    else
        exec "$JAVA" $JAVA_HEAP_MAX $PIG_OPTS -classpath "$CLASSPATH" $CLASS "${remaining[@]}"
    fi
fi
shopt -u nullglob
shopt -u extglob