# Base image: Debian-12
FROM debian:12-slim
# Update local package index, upgrade packages and install required packages.
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
openjdk-17-jdk \
net-tools \
curl \
gnupg \
netcat-openbsd \
&& rm -rf /var/lib/apt/lists/*
# Set Java env
ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/
# Download Hadoop Keys
RUN curl -O https://dist.apache.org/repos/dist/release/hadoop/common/KEYS
# Import keys
RUN gpg --import KEYS
# Set hadoop version & URL
ENV HADOOP_VERSION 3.4.1
ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
# Download hadoop and signature, verify with gpg, extract hadoop-ver.tar.hz into /opt and remove hadoop-ver.tar.gz
RUN set -x \
&& curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
&& curl -fSL "$HADOOP_URL.asc" -o /tmp/hadoop.tar.gz.asc \
&& gpg --verify /tmp/hadoop.tar.gz.asc \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz*
# Establish symobolic link in /etc/hadoop for /opt/hadoop-$HADOOP_VERSION/etc/hadoop
RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop
# Make logs dir and data dir
RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs /hadoop-data
# Set env
ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED"
ENV MULTIHOMED_NETWORK=1
ENV USER=hdoop
ENV PATH $HADOOP_HOME/bin/:$PATH
# Ensure required config files exist
# RUN mkdir -p /etc/hadoop && \
# echo "" > /etc/hadoop/core-site.xml && \
# echo "" > /etc/hadoop/hdfs-site.xml && \
# echo "" > /etc/hadoop/yarn-site.xml && \
# echo "" > /etc/hadoop/mapred-site.xml && \
# echo "" > /etc/hadoop/httpfs-site.xml && \
# echo "" > /etc/hadoop/kms-site.xml
# Create non-root user
RUN useradd -m -s /bin/bash hdoop
# Copy entrypoint.sh to container
ADD entrypoint.sh /entrypoint.sh
# Set ownership for Hadoop dirs
RUN chown -R hdoop:hdoop /opt/hadoop-$HADOOP_VERSION /etc/hadoop /hadoop-data
# Make entrypoint.sh executable
RUN chmod a+x /entrypoint.sh
USER hdoop
# Specify entrypoint
ENTRYPOINT ["/entrypoint.sh"]