#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # This script tests the Hadoop cloud scripts by running through a minimal # sequence of steps to start a persistent (EBS) cluster, run a job, then # shutdown the cluster. # # Example usage: # HADOOP_HOME=~/dev/hadoop-0.20.1/ ./persistent-cluster.sh # function wait_for_volume_detachment() { set +e set +x while true; do attached=`$HADOOP_CLOUD_SCRIPT list-storage --config-dir=$CONFIG_DIR \ $CLUSTER | awk '{print $6}' | grep 'attached'` sleep 5 if [ -z "$attached" ]; then break fi done set -e set -x } set -e set -x bin=`dirname "$0"` bin=`cd "$bin"; pwd` WORKSPACE=${WORKSPACE:-`pwd`} CONFIG_DIR=${CONFIG_DIR:-$WORKSPACE/.hadoop-cloud} CLUSTER=${CLUSTER:-hadoop-cloud-ebs-$USER-test-cluster} IMAGE_ID=${IMAGE_ID:-ami-6159bf08} # default to Fedora 32-bit AMI AVAILABILITY_ZONE=${AVAILABILITY_ZONE:-us-east-1c} KEY_NAME=${KEY_NAME:-$USER} AUTO_SHUTDOWN=${AUTO_SHUTDOWN:-15} LOCAL_HADOOP_VERSION=${LOCAL_HADOOP_VERSION:-0.20.1} HADOOP_HOME=${HADOOP_HOME:-$WORKSPACE/hadoop-$LOCAL_HADOOP_VERSION} HADOOP_CLOUD_HOME=${HADOOP_CLOUD_HOME:-$bin/../py} HADOOP_CLOUD_PROVIDER=${HADOOP_CLOUD_PROVIDER:-ec2} SSH_OPTIONS=${SSH_OPTIONS:-"-i ~/.$HADOOP_CLOUD_PROVIDER/id_rsa-$KEY_NAME \ -o StrictHostKeyChecking=no"} HADOOP_CLOUD_SCRIPT=$HADOOP_CLOUD_HOME/hadoop-$HADOOP_CLOUD_PROVIDER export HADOOP_CONF_DIR=$CONFIG_DIR/$CLUSTER # Install Hadoop locally if [ ! -d $HADOOP_HOME ]; then wget http://archive.apache.org/dist/hadoop/core/hadoop-\ $LOCAL_HADOOP_VERSION/hadoop-$LOCAL_HADOOP_VERSION.tar.gz tar zxf hadoop-$LOCAL_HADOOP_VERSION.tar.gz -C $WORKSPACE rm hadoop-$LOCAL_HADOOP_VERSION.tar.gz fi # Create storage $HADOOP_CLOUD_SCRIPT create-storage --config-dir=$CONFIG_DIR \ --availability-zone=$AVAILABILITY_ZONE $CLUSTER nn 1 \ $bin/ebs-storage-spec.json $HADOOP_CLOUD_SCRIPT create-storage --config-dir=$CONFIG_DIR \ --availability-zone=$AVAILABILITY_ZONE $CLUSTER dn 1 \ $bin/ebs-storage-spec.json # Launch a cluster $HADOOP_CLOUD_SCRIPT launch-cluster --config-dir=$CONFIG_DIR \ --image-id=$IMAGE_ID --key-name=$KEY_NAME --auto-shutdown=$AUTO_SHUTDOWN \ --availability-zone=$AVAILABILITY_ZONE $CLIENT_CIDRS $ENVS $CLUSTER 1 # Run a proxy and save its pid in HADOOP_CLOUD_PROXY_PID eval `$HADOOP_CLOUD_SCRIPT proxy --config-dir=$CONFIG_DIR \ --ssh-options="$SSH_OPTIONS" $CLUSTER` # Run a job and check it works $HADOOP_HOME/bin/hadoop fs -mkdir input $HADOOP_HOME/bin/hadoop fs -put $HADOOP_HOME/LICENSE.txt input $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-*-examples.jar grep \ input output Apache # following returns a non-zero exit code if no match $HADOOP_HOME/bin/hadoop fs -cat 'output/part-00000' | grep Apache # Shutdown the cluster kill $HADOOP_CLOUD_PROXY_PID $HADOOP_CLOUD_SCRIPT terminate-cluster --config-dir=$CONFIG_DIR --force $CLUSTER sleep 5 # wait for termination to take effect # Relaunch the cluster $HADOOP_CLOUD_SCRIPT launch-cluster --config-dir=$CONFIG_DIR \ --image-id=$IMAGE_ID --key-name=$KEY_NAME --auto-shutdown=$AUTO_SHUTDOWN \ --availability-zone=$AVAILABILITY_ZONE $CLIENT_CIDRS $ENVS $CLUSTER 1 # Run a proxy and save its pid in HADOOP_CLOUD_PROXY_PID eval `$HADOOP_CLOUD_SCRIPT proxy --config-dir=$CONFIG_DIR \ --ssh-options="$SSH_OPTIONS" $CLUSTER` # Check output is still there $HADOOP_HOME/bin/hadoop fs -cat 'output/part-00000' | grep Apache # Shutdown the cluster kill $HADOOP_CLOUD_PROXY_PID $HADOOP_CLOUD_SCRIPT terminate-cluster --config-dir=$CONFIG_DIR --force $CLUSTER sleep 5 # wait for termination to take effect # Cleanup $HADOOP_CLOUD_SCRIPT delete-cluster --config-dir=$CONFIG_DIR $CLUSTER wait_for_volume_detachment $HADOOP_CLOUD_SCRIPT delete-storage --config-dir=$CONFIG_DIR --force $CLUSTER