Updated version for hadoop, base image (debian 12 from 9) and added comments for better understanding.

This commit is contained in:
K 2025-04-06 16:34:57 +05:30
parent 8414e2b051
commit f1f2f646ba
Signed by: notkshitij
GPG Key ID: C5B8BC7530F8F43F

View File

@ -1,47 +1,55 @@
FROM debian:9 # Base image: Debian-12
FROM debian:12
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> # Update local package index, upgrade packages and install required packages.
MAINTAINER Giannis Mouchakis <gmouchakis@iit.demokritos.gr> RUN apt-get update && apt-get -y upgrade && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
openjdk-8-jdk \
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ net-tools \
openjdk-8-jdk \ curl \
net-tools \ netcat \
curl \ gnupg \
netcat \ libsnappy-dev \
gnupg \ && rm -rf /var/lib/apt/lists/*
libsnappy-dev \
&& rm -rf /var/lib/apt/lists/*
# Set Java env
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
# Download Hadoop Keys
RUN curl -O https://dist.apache.org/repos/dist/release/hadoop/common/KEYS RUN curl -O https://dist.apache.org/repos/dist/release/hadoop/common/KEYS
# Import keys
RUN gpg --import KEYS RUN gpg --import KEYS
ENV HADOOP_VERSION 3.2.1 # Set hadoop version & URL
ENV HADOOP_VERSION 3.3.6
ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
# Download hadoop and signature, verify with gpg, extract hadoop-ver.tar.hz into /opt and remove hadoop-ver.tar.gz
RUN set -x \ RUN set -x \
&& curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
&& curl -fSL "$HADOOP_URL.asc" -o /tmp/hadoop.tar.gz.asc \ && curl -fSL "$HADOOP_URL.asc" -o /tmp/hadoop.tar.gz.asc \
&& gpg --verify /tmp/hadoop.tar.gz.asc \ && gpg --verify /tmp/hadoop.tar.gz.asc \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \ && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz* && rm /tmp/hadoop.tar.gz*
# Establish symobolic link in /etc/hadoop for /opt/hadoop-$HADOOP_VERSION/etc/hadoop
RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop
RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs # Make logs dir and data dir
RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs /hadoop-data
RUN mkdir /hadoop-data
# Set env
ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION ENV HADOOP_HOME=/opt/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=/etc/hadoop ENV HADOOP_CONF_DIR=/etc/hadoop
ENV MULTIHOMED_NETWORK=1 ENV MULTIHOMED_NETWORK=1
ENV USER=root ENV USER=root
ENV PATH $HADOOP_HOME/bin/:$PATH ENV PATH $HADOOP_HOME/bin/:$PATH
# Copy entrypoint.sh to container
ADD entrypoint.sh /entrypoint.sh ADD entrypoint.sh /entrypoint.sh
# Make entrypoint.sh executable
RUN chmod a+x /entrypoint.sh RUN chmod a+x /entrypoint.sh
# Specify entrypoint
ENTRYPOINT ["/entrypoint.sh"] ENTRYPOINT ["/entrypoint.sh"]