# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # Hadoop RPM spec file # # FIXME: we need to disable a more strict checks on native files for now, # since Hadoop build system makes it difficult to pass the kind of flags # that would make newer RPM debuginfo generation scripts happy. %undefine _missing_build_ids_terminate_build %define hadoop_name hadoop %define etc_hadoop /etc/%{name} %define config_hadoop %{etc_hadoop}/conf %define lib_hadoop_dirname /usr/lib %define lib_hadoop %{lib_hadoop_dirname}/%{name} %define log_hadoop_dirname /var/log %define log_hadoop %{log_hadoop_dirname}/%{name} %define bin_hadoop %{_bindir} %define man_hadoop %{_mandir} %define src_hadoop /usr/src/%{name} %define hadoop_username mapred %define hadoop_services namenode secondarynamenode datanode jobtracker tasktracker # Hadoop outputs built binaries into %{hadoop_build} %define hadoop_build_path build %define static_images_dir src/webapps/static/images %ifarch i386 %global hadoop_arch Linux-i386-32 %endif %ifarch amd64 x86_64 %global hadoop_arch Linux-amd64-64 %endif # CentOS 5 does not have any dist macro # So I will suppose anything that is not Mageia or a SUSE will be a RHEL/CentOS/Fedora %if %{!?suse_version:1}0 && %{!?mgaversion:1}0 # brp-repack-jars uses unzip to expand jar files # Unfortunately aspectjtools-1.6.5.jar pulled by ivy contains some files and directories without any read permission # and make whole process to fail. # So for now brp-repack-jars is being deactivated until this is fixed. # See CDH-2151 %define __os_install_post \ /usr/lib/rpm/redhat/brp-compress ; \ /usr/lib/rpm/redhat/brp-strip-static-archive %{__strip} ; \ /usr/lib/rpm/redhat/brp-strip-comment-note %{__strip} %{__objdump} ; \ /usr/lib/rpm/brp-python-bytecompile ; \ %{nil} %define doc_hadoop %{_docdir}/%{name}-%{hadoop_version} %define alternatives_cmd alternatives %global initd_dir %{_sysconfdir}/rc.d/init.d %endif %if %{?suse_version:1}0 # Only tested on openSUSE 11.4. le'ts update it for previous release when confirmed %if 0%{suse_version} > 1130 %define suse_check \# Define an empty suse_check for compatibility with older sles %endif # Deactivating symlinks checks %define __os_install_post \ %{suse_check} ; \ /usr/lib/rpm/brp-compress ; \ %{nil} %define doc_hadoop %{_docdir}/%{name} %define alternatives_cmd update-alternatives %global initd_dir %{_sysconfdir}/rc.d %endif %if 0%{?mgaversion} %define doc_hadoop %{_docdir}/%{name}-%{hadoop_version} %define alternatives_cmd update-alternatives %global initd_dir %{_sysconfdir}/rc.d/init.d %endif # Even though we split the RPM into arch and noarch, it still will build and install # the entirety of hadoop. Defining this tells RPM not to fail the build # when it notices that we didn't package most of the installed files. %define _unpackaged_files_terminate_build 0 # RPM searches perl files for dependancies and this breaks for non packaged perl lib # like thrift so disable this %define _use_internal_dependency_generator 0 Name: %{hadoop_name} Version: %{hadoop_version} Release: %{hadoop_release} Summary: Hadoop is a software platform for processing vast amounts of data License: Apache License v2.0 URL: http://hadoop.apache.org/core/ Group: Development/Libraries Source0: %{name}-%{hadoop_base_version}.tar.gz Source1: do-component-build Source2: install_%{name}.sh Source3: hadoop.default Source4: hadoop-init.tmpl Source5: hadoop-init.tmpl.suse Source6: hadoop.1 Source7: hadoop-fuse-dfs.1 Source8: hadoop-fuse.default Source9: hadoop.nofiles.conf Buildroot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) BuildRequires: python >= 2.4, git, fuse-devel,fuse, automake, autoconf Requires: coreutils, /usr/sbin/useradd, /usr/sbin/usermod, /sbin/chkconfig, /sbin/service, bigtop-utils Provides: hadoop %if %{?suse_version:1}0 BuildRequires: libfuse2, libopenssl-devel, gcc-c++, ant, ant-nodeps, ant-trax # Required for init scripts Requires: sh-utils, insserv %endif # CentOS 5 does not have any dist macro # So I will suppose anything that is not Mageia or a SUSE will be a RHEL/CentOS/Fedora %if %{!?suse_version:1}0 && %{!?mgaversion:1}0 BuildRequires: fuse-libs, libtool, redhat-rpm-config, lzo-devel, openssl-devel # Required for init scripts Requires: sh-utils, redhat-lsb %endif %if 0%{?mgaversion} BuildRequires: libfuse-devel, libfuse2 , libopenssl-devel, gcc-c++, ant, libtool, automake, autoconf, liblzo-devel, zlib-devel Requires: chkconfig, xinetd-simple-services, zlib, initscripts %endif %description Hadoop is a software platform that lets one easily write and run applications that process vast amounts of data. Here's what makes Hadoop especially useful: * Scalable: Hadoop can reliably store and process petabytes. * Economical: It distributes the data and processing across clusters of commonly available computers. These clusters can number into the thousands of nodes. * Efficient: By distributing the data, Hadoop can process it in parallel on the nodes where the data is located. This makes it extremely rapid. * Reliable: Hadoop automatically maintains multiple copies of data and automatically redeploys computing tasks based on failures. Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS). MapReduce divides applications into many small blocks of work. HDFS creates multiple replicas of data blocks for reliability, placing them on compute nodes around the cluster. MapReduce can then process the data where it is located. %package namenode Summary: The Hadoop namenode manages the block locations of HDFS files Group: System/Daemons Requires: %{name} = %{version}-%{release} %description namenode The Hadoop Distributed Filesystem (HDFS) requires one unique server, the namenode, which manages the block locations of files on the filesystem. %package secondarynamenode Summary: Hadoop Secondary namenode Group: System/Daemons Requires: %{name} = %{version}-%{release} %description secondarynamenode The Secondary Name Node periodically compacts the Name Node EditLog into a checkpoint. This compaction ensures that Name Node restarts do not incur unnecessary downtime. %package jobtracker Summary: Hadoop Job Tracker Group: System/Daemons Requires: %{name} = %{version}-%{release} %description jobtracker The jobtracker is a central service which is responsible for managing the tasktracker services running on all nodes in a Hadoop Cluster. The jobtracker allocates work to the tasktracker nearest to the data with an available work slot. %package datanode Summary: Hadoop Data Node Group: System/Daemons Requires: %{name} = %{version}-%{release} %description datanode The Data Nodes in the Hadoop Cluster are responsible for serving up blocks of data over the network to Hadoop Distributed Filesystem (HDFS) clients. %package tasktracker Summary: Hadoop Task Tracker Group: System/Daemons Requires: %{name} = %{version}-%{release} %description tasktracker The tasktracker has a fixed number of work slots. The jobtracker assigns MapReduce work to the tasktracker that is nearest the data with an available work slot. %package conf-pseudo Summary: Hadoop installation in pseudo-distributed mode Group: System/Daemons Requires: %{name} = %{version}-%{release}, %{name}-namenode = %{version}-%{release}, %{name}-datanode = %{version}-%{release}, %{name}-secondarynamenode = %{version}-%{release}, %{name}-tasktracker = %{version}-%{release}, %{name}-jobtracker = %{version}-%{release} %description conf-pseudo Installation of this RPM will setup your machine to run in pseudo-distributed mode where each Hadoop daemon runs in a separate Java process. %package doc Summary: Hadoop Documentation Group: Documentation Obsoletes: %{name}-docs %description doc Documentation for Hadoop %package source Summary: Source code for Hadoop Group: System/Daemons AutoReq: no %description source The Java source code for Hadoop and its contributed packages. This is handy when trying to debug programs that depend on Hadoop. %package fuse Summary: Mountable HDFS Group: Development/Libraries Requires: %{name} = %{version}-%{release}, fuse AutoReq: no %if %{?suse_version:1}0 Requires: libfuse2 %else Requires: fuse-libs %endif %description fuse These projects (enumerated below) allow HDFS to be mounted (on most flavors of Unix) as a standard file system using the mount command. Once mounted, the user can operate on an instance of hdfs using standard Unix utilities such as 'ls', 'cd', 'cp', 'mkdir', 'find', 'grep', or use standard Posix libraries like open, write, read, close from C, C++, Python, Ruby, Perl, Java, bash, etc. %package native Summary: Native libraries for Hadoop Compression Group: Development/Libraries Requires: %{name} = %{version}-%{release} AutoReq: no %description native Native libraries for Hadoop compression %package libhdfs Summary: Hadoop Filesystem Library Group: Development/Libraries Requires: %{name} = %{version}-%{release} # TODO: reconcile libjvm AutoReq: no %description libhdfs Hadoop Filesystem Library %package pipes Summary: Hadoop Pipes Library Group: Development/Libraries Requires: %{name} = %{version}-%{release} %description pipes Hadoop Pipes Library %package sbin Summary: Binaries for secured Hadoop clusters Group: System/Daemons Requires: %{name} = %{version}-%{release} %description sbin This package contains a setuid program, 'task-controller', which is used for launching MapReduce tasks in a secured MapReduce cluster. This program allows the tasks to run as the Unix user who submitted the job, rather than the Unix user running the MapReduce daemons. This package also contains 'jsvc', a daemon wrapper necessary to allow DataNodes to bind to a low (privileged) port and then drop root privileges before continuing operation. %prep %setup -n %{name}-%{hadoop_base_version} # %patch0 -p0 %build # This assumes that you installed Java JDK 6 and set JAVA_HOME # This assumes that you installed Java JDK 5 and set JAVA5_HOME # This assumes that you installed Forrest and set FORREST_HOME env HADOOP_VERSION=%{hadoop_version} HADOOP_ARCH=%{hadoop_arch} bash %{SOURCE1} %clean %__rm -rf $RPM_BUILD_ROOT ######################### #### INSTALL SECTION #### ######################### %install %__rm -rf $RPM_BUILD_ROOT %__install -d -m 0755 $RPM_BUILD_ROOT/%{lib_hadoop} bash %{SOURCE2} \ --distro-dir=$RPM_SOURCE_DIR \ --build-dir=$PWD/build \ --src-dir=$RPM_BUILD_ROOT%{src_hadoop} \ --lib-dir=$RPM_BUILD_ROOT%{lib_hadoop} \ --system-lib-dir=%{_libdir} \ --etc-dir=$RPM_BUILD_ROOT%{etc_hadoop} \ --prefix=$RPM_BUILD_ROOT \ --doc-dir=$RPM_BUILD_ROOT%{doc_hadoop} \ --example-dir=$RPM_BUILD_ROOT%{doc_hadoop}/examples \ --native-build-string=%{hadoop_arch} \ --installed-lib-dir=%{lib_hadoop} \ --man-dir=$RPM_BUILD_ROOT%{man_hadoop} \ %__mv -f $RPM_BUILD_ROOT/usr/share/doc/libhdfs-devel $RPM_BUILD_ROOT/%{_docdir}/libhdfs-%{hadoop_version} # Init.d scripts %__install -d -m 0755 $RPM_BUILD_ROOT/%{initd_dir}/ %if %{?suse_version:1}0 orig_init_file=$RPM_SOURCE_DIR/hadoop-init.tmpl.suse %else orig_init_file=$RPM_SOURCE_DIR/hadoop-init.tmpl %endif # Generate the init.d scripts for service in %{hadoop_services} do init_file=$RPM_BUILD_ROOT/%{initd_dir}/%{name}-${service} %__cp $orig_init_file $init_file %__sed -i -e 's|@HADOOP_COMMON_ROOT@|%{lib_hadoop}|' $init_file %__sed -i -e "s|@HADOOP_DAEMON@|${service}|" $init_file %__sed -i -e 's|@HADOOP_CONF_DIR@|%{config_hadoop}|' $init_file case "$service" in hadoop_services|namenode|secondarynamenode|datanode) %__sed -i -e 's|@HADOOP_DAEMON_USER@|hdfs|' $init_file ;; jobtracker|tasktracker) %__sed -i -e 's|@HADOOP_DAEMON_USER@|mapred|' $init_file ;; esac chmod 755 $init_file done %__install -d -m 0755 $RPM_BUILD_ROOT/etc/default %__cp $RPM_SOURCE_DIR/hadoop.default $RPM_BUILD_ROOT/etc/default/hadoop %__cp $RPM_SOURCE_DIR/hadoop-fuse.default $RPM_BUILD_ROOT/etc/default/hadoop-fuse %__install -d -m 0755 $RPM_BUILD_ROOT/etc/security/limits.d %__install -m 0644 %{SOURCE9} $RPM_BUILD_ROOT/etc/security/limits.d/hadoop.nofiles.conf # /var/lib/hadoop/cache %__install -d -m 1777 $RPM_BUILD_ROOT/var/lib/%{name}/cache # /var/log/hadoop %__install -d -m 0755 $RPM_BUILD_ROOT/var/log %__install -d -m 0775 $RPM_BUILD_ROOT/var/run/%{name} %__install -d -m 0775 $RPM_BUILD_ROOT/%{log_hadoop} %pre getent group hadoop >/dev/null || groupadd -r hadoop getent group hdfs >/dev/null || groupadd -r hdfs getent group mapred >/dev/null || groupadd -r mapred getent passwd mapred >/dev/null || /usr/sbin/useradd --comment "Hadoop MapReduce" --shell /bin/bash -M -r -g mapred -G hadoop --home %{lib_hadoop} mapred # Create an hdfs user if one does not already exist. getent passwd hdfs >/dev/null || /usr/sbin/useradd --comment "Hadoop HDFS" --shell /bin/bash -M -r -g hdfs -G hadoop --home %{lib_hadoop} hdfs %post %{alternatives_cmd} --install %{config_hadoop} %{name}-conf %{etc_hadoop}/conf.empty 10 %{alternatives_cmd} --install %{bin_hadoop}/%{hadoop_name} %{hadoop_name}-default %{bin_hadoop}/%{name} 20 \ --slave %{log_hadoop_dirname}/%{hadoop_name} %{hadoop_name}-log %{log_hadoop} \ --slave %{lib_hadoop_dirname}/%{hadoop_name} %{hadoop_name}-lib %{lib_hadoop} \ --slave /etc/%{hadoop_name} %{hadoop_name}-etc %{etc_hadoop} \ --slave %{man_hadoop}/man1/%{hadoop_name}.1.*z %{hadoop_name}-man %{man_hadoop}/man1/%{name}.1.*z chmod g+w /var/lib/hadoop/cache/hadoop/ mkdir -p /var/log/hadoop || : touch /var/log/hadoop/SecurityAuth.audit chgrp hadoop /var/log/hadoop/SecurityAuth.audit chmod g+w /var/log/hadoop/SecurityAuth.audit %preun if [ "$1" = 0 ]; then # Stop any services that might be running for service in %{hadoop_services} do service hadoop-$service stop 1>/dev/null 2>/dev/null || : done %{alternatives_cmd} --remove %{name}-conf %{etc_hadoop}/conf.empty || : %{alternatives_cmd} --remove %{hadoop_name}-default %{bin_hadoop}/%{name} || : fi %files %defattr(-,root,root) %config(noreplace) %{etc_hadoop}/conf.empty %config(noreplace) /etc/default/hadoop %config(noreplace) /etc/security/limits.d/hadoop.nofiles.conf %{lib_hadoop} %{bin_hadoop}/%{name} %{man_hadoop}/man1/hadoop.1.*z %attr(0775,root,hadoop) /var/run/%{name} %attr(0775,root,hadoop) %{log_hadoop} %exclude %{lib_hadoop}/lib/native %exclude %{lib_hadoop}/sbin/%{hadoop_arch} %exclude %{lib_hadoop}/bin/fuse_dfs %exclude %{lib_hadoop}/contrib/fuse-dfs %exclude %{lib_hadoop}/hdfs/contrib/fuse-dfs # FIXME: The following is a workaround for BIGTOP-139 # %exclude %{lib_hadoop}/bin/task-controller %exclude %{lib_hadoop}/bin/jsvc* %exclude %{lib_hadoop}/hdfs/bin/jsvc* %files doc %defattr(-,root,root) %doc %{doc_hadoop} %files source %defattr(-,root,root) %{src_hadoop} # Service file management RPMs %define service_macro() \ %files %1 \ %defattr(-,root,root) \ %{initd_dir}/%{name}-%1 \ %post %1 \ chkconfig --add %{name}-%1 \ %2 \ \ %preun %1 \ if [ $1 = 0 ]; then \ service %{name}-%1 stop > /dev/null 2>&1 \ chkconfig --del %{name}-%1 \ fi \ %postun %1 \ if [ $1 -ge 1 ]; then \ service %{name}-%1 condrestart >/dev/null 2>&1 \ fi %define post_namenode \ if [ ! -d /var/lib/%{name}/cache/hadoop/dfs/name ]; then \ HADOOP_NAMENODE_USER=hdfs hadoop namenode -format 2>/dev/null 1>/dev/null || : \ fi %service_macro namenode post_namenode %service_macro secondarynamenode %service_macro datanode %service_macro jobtracker %service_macro tasktracker # Pseudo-distributed Hadoop installation %post conf-pseudo %{alternatives_cmd} --install %{config_hadoop} %{name}-conf %{etc_hadoop}/conf.pseudo 30 %files conf-pseudo %defattr(-,root,root) %config(noreplace) %attr(755,root,root) %{etc_hadoop}/conf.pseudo %dir %attr(0755,root,hadoop) /var/lib/%{name} %dir %attr(1777,root,hadoop) /var/lib/%{name}/cache %preun conf-pseudo if [ "$1" = 0 ]; then %{alternatives_cmd} --remove %{name}-conf %{etc_hadoop}/conf.pseudo rm -f %{etc_hadoop}/conf fi %files native %defattr(-,root,root) %{lib_hadoop}/lib/native %files fuse %defattr(-,root,root) %config(noreplace) /etc/default/hadoop-fuse %{lib_hadoop}/contrib/fuse-dfs %attr(0755,root,root) %{lib_hadoop}/bin/fuse_dfs %attr(0755,root,root) %{lib_hadoop}/bin/fuse_dfs_wrapper.sh %attr(0755,root,root) %{bin_hadoop}/hadoop-fuse-dfs %attr(0644,root,root) %{man_hadoop}/man1/hadoop-fuse-dfs.1.* %files pipes %defattr(-,root,root) %{_libdir}/libhadooppipes* %{_libdir}/libhadooputil* %{_includedir}/hadoop/* %files libhdfs %defattr(-,root,root) %{_libdir}/libhdfs* %{_includedir}/hdfs.h # -devel should be its own package %doc %{_docdir}/libhdfs-%{hadoop_version} %files sbin %defattr(-,root,root) %dir %{lib_hadoop}/sbin %dir %{lib_hadoop}/sbin/%{hadoop_arch} # %attr(4750,root,mapred) %{lib_hadoop}/sbin/%{hadoop_arch}/task-controller %attr(0755,root,root) %{lib_hadoop}/sbin/%{hadoop_arch}/jsvc # FIXME: The following is a workaround for BIGTOP-139 # %attr(4750,root,mapred) %{lib_hadoop}/bin/task-controller %attr(0755,root,root) %{lib_hadoop}/bin/jsvc*