# Base image: Debian-12 (slim) FROM debian:12-slim # Update local package index, upgrade packages and install required packages. RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ openjdk-17-jdk \ net-tools \ curl \ gnupg \ netcat-openbsd \ && rm -rf /var/lib/apt/lists/* # Set Java env (currently using Java 17) ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/ # Download Hadoop Keys RUN curl -O https://dist.apache.org/repos/dist/release/hadoop/common/KEYS # Import keys RUN gpg --import KEYS # Set hadoop version & URL (currently using Hadoop 3.4.1) ENV HADOOP_VERSION=3.4.1 ENV HADOOP_URL=https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz # Download hadoop and signature, verify with gpg, extract hadoop-ver.tar.hz into /opt and remove hadoop-ver.tar.gz RUN set -x \ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \ && curl -fSL "$HADOOP_URL.asc" -o /tmp/hadoop.tar.gz.asc \ && gpg --verify /tmp/hadoop.tar.gz.asc \ && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \ && rm /tmp/hadoop.tar.gz* # Establish symobolic link in /etc/hadoop for /opt/hadoop-$HADOOP_VERSION/etc/hadoop RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop # Make logs dir and data dir RUN mkdir -p /opt/hadoop-$HADOOP_VERSION/logs /hadoop-data /hadoop /hadoop/dfs /hadoop/yarn /hadoop/dfs/data /hadoop/dfs/name # Set env ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION ENV HADOOP_CONF_DIR=/etc/hadoop ENV HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED" ENV MULTIHOMED_NETWORK=1 ENV USER=hdoop ENV PATH $HADOOP_HOME/bin/:$PATH # Ensure required config files exist # RUN mkdir -p /etc/hadoop && \ # echo "" > /etc/hadoop/core-site.xml && \ # echo "" > /etc/hadoop/hdfs-site.xml && \ # echo "" > /etc/hadoop/yarn-site.xml && \ # echo "" > /etc/hadoop/mapred-site.xml && \ # echo "" > /etc/hadoop/httpfs-site.xml && \ # echo "" > /etc/hadoop/kms-site.xml # Create non-root user RUN useradd -m -s /bin/bash hdoop # Copy entrypoint.sh to container ADD entrypoint.sh /entrypoint.sh # Set ownership for Hadoop dirs RUN chown -R hdoop:hdoop /opt/hadoop-$HADOOP_VERSION /etc/hadoop /hadoop-data /hadoop # Make entrypoint.sh executable RUN chmod a+x /entrypoint.sh USER hdoop # Specify entrypoint ENTRYPOINT ["/entrypoint.sh"]