#!/bin/sh # Create a Hadoop AMI. Runs on the EC2 instance. # Import variables bin=`dirname "$0"` bin=`cd "$bin"; pwd` . "$bin"/hadoop-ec2-env.sh # Remove environment script since it contains sensitive information rm -f "$bin"/hadoop-ec2-env.sh # Install Java cd /usr/local wget -nv -O java.bin $JAVA_BINARY_URL sh java.bin rm -f java.bin # Install tools yum install rsync # Install Hadoop cd /usr/local wget -nv http://www.apache.org/dist/lucene/hadoop/hadoop-$HADOOP_VERSION.tar.gz tar xzf hadoop-$HADOOP_VERSION.tar.gz rm -f hadoop-$HADOOP_VERSION.tar.gz # Configure Hadoop sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \ -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \ -e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \ /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh mkdir -p /mnt/hadoop/logs # Do configuration on instance startup echo "/root/hadoop-init" >> /etc/rc.d/rc.local # Configure networking. # Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.) rm -f /root/.ssh/authorized_keys # Ensure logging in to new hosts is seamless. echo ' StrictHostKeyChecking no' >> /etc/ssh/ssh_config # Bundle and upload image cd ~root # Don't need to delete .bash_history since it isn't written until exit. ec2-bundle-vol -d /mnt -k /mnt/pk-*.pem -c /mnt/cert-*.pem -u $AWS_ACCOUNT_ID -s 1536 -p hadoop-$HADOOP_VERSION rm /mnt/pk-*.pem /mnt/cert-*.pem ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY # End echo Done