- Using debian:12-slim instead of base image - Added netcat-openbsd in installed packages which is needed for healthcheck - Added HADOOP_OPTS env for resourcemanager container - Changed env USER=hdoop from USER=root - Added useradd anc chown commands for hdoop user
73 lines
2.3 KiB
Docker
73 lines
2.3 KiB
Docker
# Base image: Debian-12
|
|
FROM debian:12-slim
|
|
|
|
# Update local package index, upgrade packages and install required packages.
|
|
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
|
|
openjdk-17-jdk \
|
|
net-tools \
|
|
curl \
|
|
gnupg \
|
|
netcat-openbsd \
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
# Set Java env
|
|
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/
|
|
|
|
# Download Hadoop Keys
|
|
RUN curl -O https://dist.apache.org/repos/dist/release/hadoop/common/KEYS
|
|
|
|
# Import keys
|
|
RUN gpg --import KEYS
|
|
|
|
# Set hadoop version & URL
|
|
ENV HADOOP_VERSION 3.4.1
|
|
ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
|
|
|
|
# Download hadoop and signature, verify with gpg, extract hadoop-ver.tar.hz into /opt and remove hadoop-ver.tar.gz
|
|
RUN set -x \
|
|
&& curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
|
|
&& curl -fSL "$HADOOP_URL.asc" -o /tmp/hadoop.tar.gz.asc \
|
|
&& gpg --verify /tmp/hadoop.tar.gz.asc \
|
|
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
|
|
&& rm /tmp/hadoop.tar.gz*
|
|
|
|
# Establish symobolic link in /etc/hadoop for /opt/hadoop-$HADOOP_VERSION/etc/hadoop
|
|
RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop
|
|
|
|
# Make logs dir and data dir
|
|
RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs /hadoop-data
|
|
|
|
# Set env
|
|
ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION
|
|
ENV HADOOP_CONF_DIR=/etc/hadoop
|
|
ENV HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED"
|
|
ENV MULTIHOMED_NETWORK=1
|
|
ENV USER=hdoop
|
|
ENV PATH $HADOOP_HOME/bin/:$PATH
|
|
|
|
# Ensure required config files exist
|
|
# RUN mkdir -p /etc/hadoop && \
|
|
# echo "<configuration></configuration>" > /etc/hadoop/core-site.xml && \
|
|
# echo "<configuration></configuration>" > /etc/hadoop/hdfs-site.xml && \
|
|
# echo "<configuration></configuration>" > /etc/hadoop/yarn-site.xml && \
|
|
# echo "<configuration></configuration>" > /etc/hadoop/mapred-site.xml && \
|
|
# echo "<configuration></configuration>" > /etc/hadoop/httpfs-site.xml && \
|
|
# echo "<configuration></configuration>" > /etc/hadoop/kms-site.xml
|
|
|
|
# Create non-root user
|
|
RUN useradd -m -s /bin/bash hdoop
|
|
|
|
# Copy entrypoint.sh to container
|
|
ADD entrypoint.sh /entrypoint.sh
|
|
|
|
# Set ownership for Hadoop dirs
|
|
RUN chown -R hdoop:hdoop /opt/hadoop-$HADOOP_VERSION /etc/hadoop /hadoop-data
|
|
|
|
# Make entrypoint.sh executable
|
|
RUN chmod a+x /entrypoint.sh
|
|
|
|
USER hdoop
|
|
|
|
# Specify entrypoint
|
|
ENTRYPOINT ["/entrypoint.sh"]
|