#!/bin/sh # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Create a Hadoop AMI. Runs on the EC2 instance. # Import variables bin=`dirname "$0"` bin=`cd "$bin"; pwd` . "$bin"/hadoop-ec2-env.sh # Remove environment script since it contains sensitive information rm -f "$bin"/hadoop-ec2-env.sh # Install Java echo "Downloading and installing java binary." cd /usr/local wget -nv -O java.bin $JAVA_BINARY_URL sh java.bin rm -f java.bin # Install tools echo "Installing rpms." yum -y install rsync lynx screen ganglia-gmetad ganglia-gmond ganglia-web httpd php yum -y clean all # Install Hadoop echo "Installing Hadoop $HADOOP_VERSION." cd /usr/local wget -nv http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz [ ! -f hadoop-$HADOOP_VERSION.tar.gz ] && wget -nv http://www.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz tar xzf hadoop-$HADOOP_VERSION.tar.gz rm -f hadoop-$HADOOP_VERSION.tar.gz # Configure Hadoop sed -i -e "s|# export JAVA_HOME=.*|export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}|" \ -e 's|# export HADOOP_LOG_DIR=.*|export HADOOP_LOG_DIR=/mnt/hadoop/logs|' \ -e 's|# export HADOOP_SLAVE_SLEEP=.*|export HADOOP_SLAVE_SLEEP=1|' \ -e 's|# export HADOOP_OPTS=.*|export HADOOP_OPTS=-server|' \ /usr/local/hadoop-$HADOOP_VERSION/conf/hadoop-env.sh # Run user data as script on instance startup chmod +x /etc/init.d/ec2-run-user-data echo "/etc/init.d/ec2-run-user-data" >> /etc/rc.d/rc.local # Setup root user bash environment echo "export JAVA_HOME=/usr/local/jdk${JAVA_VERSION}" >> /root/.bash_profile echo "export HADOOP_HOME=/usr/local/hadoop-${HADOOP_VERSION}" >> /root/.bash_profile echo 'export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$PATH' >> /root/.bash_profile # Configure networking. # Delete SSH authorized_keys since it includes the key it was launched with. (Note that it is re-populated when an instance starts.) rm -f /root/.ssh/authorized_keys # Ensure logging in to new hosts is seamless. echo ' StrictHostKeyChecking no' >> /etc/ssh/ssh_config # Bundle and upload image cd ~root # Don't need to delete .bash_history since it isn't written until exit. df -h ec2-bundle-vol -d /mnt -k /mnt/pk*.pem -c /mnt/cert*.pem -u $AWS_ACCOUNT_ID -s 3072 -p hadoop-$HADOOP_VERSION-$ARCH -r $ARCH ec2-upload-bundle -b $S3_BUCKET -m /mnt/hadoop-$HADOOP_VERSION-$ARCH.manifest.xml -a $AWS_ACCESS_KEY_ID -s $AWS_SECRET_ACCESS_KEY # End echo Done