From 8065ea099085f6a133af6c0547d440709f334c15 Mon Sep 17 00:00:00 2001 From: ivan Date: Tue, 24 Jan 2017 14:46:54 +0100 Subject: [PATCH 01/13] removed unnecessary instruction from README file --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 94195c6..fb0af28 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,6 @@ Version 1.1.0 introduces healthchecks for the containers. To deploy an example HDFS cluster, run: ``` - docker network create hadoop docker-compose up ``` From a2c298d7c1cd4228de474c3897a819f57407dc30 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Thu, 28 Sep 2017 12:02:52 +0200 Subject: [PATCH 02/13] added mapred config for 2.7.1 --- base/entrypoint.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/base/entrypoint.sh b/base/entrypoint.sh index d7b049b..15d23ee 100644 --- a/base/entrypoint.sh +++ b/base/entrypoint.sh @@ -36,6 +36,7 @@ configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF configure /etc/hadoop/yarn-site.xml yarn YARN_CONF configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF configure /etc/hadoop/kms-site.xml kms KMS_CONF +configure /etc/hadoop/mapred-site.xml mapred MAPRED_CONF if [ "$MULTIHOMED_NETWORK" = "1" ]; then echo "Configuring for multihomed network" From ec07aea90b8a32bbfbb50d4a54e936e9af52e6b1 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Thu, 28 Sep 2017 12:05:29 +0200 Subject: [PATCH 03/13] bumped the versions --- datanode/Dockerfile | 2 +- historyserver/Dockerfile | 2 +- namenode/Dockerfile | 2 +- nodemanager/Dockerfile | 2 +- resourcemanager/Dockerfile | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datanode/Dockerfile b/datanode/Dockerfile index 37aad11..f1a9a72 100644 --- a/datanode/Dockerfile +++ b/datanode/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 diff --git a/historyserver/Dockerfile b/historyserver/Dockerfile index 55da9e5..2e5d4f0 100644 --- a/historyserver/Dockerfile +++ b/historyserver/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.0.0 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 diff --git a/namenode/Dockerfile b/namenode/Dockerfile index 6d3d8f5..45f3ccf 100644 --- a/namenode/Dockerfile +++ b/namenode/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 diff --git a/nodemanager/Dockerfile b/nodemanager/Dockerfile index 1e44977..7f2878a 100644 --- a/nodemanager/Dockerfile +++ b/nodemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 diff --git a/resourcemanager/Dockerfile b/resourcemanager/Dockerfile index 710915f..61b4d35 100644 --- a/resourcemanager/Dockerfile +++ b/resourcemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 From 63548a38aac3712c97a94794a5b9f77de045835d Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Thu, 28 Sep 2017 12:32:28 +0200 Subject: [PATCH 04/13] fixed base, bumped the version --- base/Dockerfile | 4 ++-- datanode/Dockerfile | 2 +- historyserver/Dockerfile | 2 +- namenode/Dockerfile | 2 +- nodemanager/Dockerfile | 2 +- resourcemanager/Dockerfile | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/base/Dockerfile b/base/Dockerfile index db2e51b..3ac7b47 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -3,7 +3,7 @@ MAINTAINER Ivan Ermilov RUN echo "deb http://ftp.debian.org/debian jessie-backports main" >> /etc/apt/sources.list -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends openjdk-8-jdk +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -t jessie-backports -y --no-install-recommends openjdk-8-jdk ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl @@ -44,7 +44,7 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F -ENV HADOOP_VERSION 2.7.1 +ENV HADOOP_VERSION 2.7.4 ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz RUN set -x \ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \ diff --git a/datanode/Dockerfile b/datanode/Dockerfile index f1a9a72..9f06116 100644 --- a/datanode/Dockerfile +++ b/datanode/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 diff --git a/historyserver/Dockerfile b/historyserver/Dockerfile index 2e5d4f0..31f0a82 100644 --- a/historyserver/Dockerfile +++ b/historyserver/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 diff --git a/namenode/Dockerfile b/namenode/Dockerfile index 45f3ccf..abeafe9 100644 --- a/namenode/Dockerfile +++ b/namenode/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 diff --git a/nodemanager/Dockerfile b/nodemanager/Dockerfile index 7f2878a..c82df41 100644 --- a/nodemanager/Dockerfile +++ b/nodemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 diff --git a/resourcemanager/Dockerfile b/resourcemanager/Dockerfile index 61b4d35..0f4a690 100644 --- a/resourcemanager/Dockerfile +++ b/resourcemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 From bd8bf613cdbc1165cf09c57fcb9fc05e151cc011 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Thu, 28 Sep 2017 12:39:10 +0200 Subject: [PATCH 05/13] Makefile for local build --- Makefile | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 Makefile diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..9286ce2 --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +current_branch := $(shell git rev-parse --abbrev-ref HEAD) +build: + docker build -t bde2020/hadoop-base:$(current_branch) ./base + docker build -t bde2020/hadoop-namenode:$(current_branch) ./namenode + docker build -t bde2020/hadoop-datanode:$(current_branch) ./datanode + docker build -t bde2020/hadoop-resourcemanager:$(current_branch) ./resourcemanager + docker build -t bde2020/hadoop-nodemanager:$(current_branch) ./nodemanager + docker build -t bde2020/hadoop-historyserver:$(current_branch) ./historyserver From 75eb1bacc24251933075977a6280380271ad0293 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Thu, 28 Sep 2017 13:24:12 +0200 Subject: [PATCH 06/13] added submit image --- Makefile | 12 ++++++++++++ submit/Dockerfile | 13 +++++++++++++ submit/WordCount.jar | Bin 0 -> 3075 bytes submit/run.sh | 3 +++ 4 files changed, 28 insertions(+) create mode 100644 submit/Dockerfile create mode 100644 submit/WordCount.jar create mode 100644 submit/run.sh diff --git a/Makefile b/Makefile index 9286ce2..399104d 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,5 @@ +DOCKER_NETWORK = hadoop +ENV_FILE = hadoop.env current_branch := $(shell git rev-parse --abbrev-ref HEAD) build: docker build -t bde2020/hadoop-base:$(current_branch) ./base @@ -6,3 +8,13 @@ build: docker build -t bde2020/hadoop-resourcemanager:$(current_branch) ./resourcemanager docker build -t bde2020/hadoop-nodemanager:$(current_branch) ./nodemanager docker build -t bde2020/hadoop-historyserver:$(current_branch) ./historyserver + docker build -t bde2020/hadoop-submit:$(current_branch) ./submit + +wordcount: + docker build -t hadoop-wordcount ./submit + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/ + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.8.0/README.txt /input/ + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/* + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /input diff --git a/submit/Dockerfile b/submit/Dockerfile new file mode 100644 index 0000000..52b0499 --- /dev/null +++ b/submit/Dockerfile @@ -0,0 +1,13 @@ +FROM bde2020/hadoop-base:1.2.0-hadoop2.8-java8 +MAINTAINER Ivan Ermilov + +COPY WordCount.jar /opt/hadoop/applications/WordCount.jar + +ENV JAR_FILEPATH="/opt/hadoop/applications/WordCount.jar" +ENV CLASS_TO_RUN="WordCount" +ENV PARAMS="/input /output" + +ADD run.sh /run.sh +RUN chmod a+x /run.sh + +CMD ["/run.sh"] diff --git a/submit/WordCount.jar b/submit/WordCount.jar new file mode 100644 index 0000000000000000000000000000000000000000..7b6a1780cd765c819f2cdaace19b6e61355e624c GIT binary patch literal 3075 zcmaKu2{hDu8^@<+m?2{9TxH2VX3#`TTE@;8CdR&H-y+M%gtBDIKCT9rO9qn&MP!RC z*~3JXF;vL%UR+x#%FBJKTtDK?M-Wu9_@AqBcemVlj-d~F=0WO+Xf_w=G=gW?PfysEf`S1lE zhUO8VSTRoP8H@VR^oAw>))sNWhG)0e7VB^7Jv# z^2~F&$m;6Byuj>%v6cK5fIt^FyDsOPz>A?M;+a@wTaBpcsx=E$w%N*nB$@N2{Q_Ne zYuv)W_8R*ede65ivyYKR{D7x<)e9Z@9;^BywO-U_ZemhhCQldl-MQ;y({PVcf_I4# zYgoLjPY;o1NM1z1H0bPowM~MtxG;}ovD|>QbfRD%nPbPqsG&^ULPElpi~QVAwV8BH z(*8)H%pXytP1`<%gxMvDf3XB6;(_OQd;H_jIdm1**VV#}Z6wO|(N#Os0ULuVZ-)Q6 zY}ex_Fawh=|J+Neb9GFb88P*;$yicwZj~eI1LlNwr`%Qr&Z}r6HVSOWiO9{V+xOK9aEV@w(bagWry~FHA9ED1R_Z z3M3k(6a=3yQDTi9h}w8@^%-}qe&DNk_pf*7dZcb%;UbDgO4ubOZ1BZ|&iVMU2p)YF z{(|Mfo38|Dxk@&wGAAO#wAbn?Fo6r7Z+vV)%;ojR-sWJ3_45$o170j-a{+wI@CytoT_W3EWIkyx zN78Anu??AoV6GP}-%hMjS@qj_JyZ=0mf5a-*TZ$4L?_>cEEFiSQIaQRHoXs-?&?-a zdj_YgpO`{50Y8xV*zR`MGF{?F`3V7eC?#h+wBvUY?H;K!&@zFtd|#tSi?!~Tv*yLu zSkOrEo@NAQbK?$iGlHZOO|F5vasC=gScrlCgFD2#(a$GX23kQ`IZK=ewn-4ToYRVW zop?~Mp^t{;5|lP*Hlf1Y_)TWPV6^E`S=Aj)XY2UB(mbwx0PLa(2xw4c~Mt;AN##-g1=wW}F1 zN!0Y)b)%c(Nr2GCh@iSY1 zkUY~kNWjh@_hi#sX!DbC=XkOkxZGAJC*w1QoOAg%NIHXHpz3}HR?Eb<4H#>O*G-{` z(;XXWGiL-KnokuOxKPM}4 z(I7tAR{OxGQHhIF5S(*6cH`)5z#Zn_qo*@eH9898hvP)G1JXSPL~kKC%0Oclpse_6 zhxj1?gDCr1_*q`?T6Ck^tr+0dJM$XgDT2bSVt*v?51}x43UNfwyEz< zEr?oO$9LU`?Bvf}0KVTox=%}kL9^}jUVrRk{wG>QexL<^#oN`_GsHCj>*(kAjTihL zFKC{x)XR-aRkPy!Xiz;~w)jB_v5>ce)tc#=4Czvs)wsTuOk>WHd3;NoMR?>O(UV(0 zDYY~F?XKctHeA+>Q*Wd7`m5}~_qNS}&Dra-+i|G?sZ~8ROvkl@cS})7<4C7V(h;=j zo37|BJH8f}MzEQHq1;}lin%)GI)NLC&N)eSkq{J;{}suGur#tC_Bq=)P!n|4-Ky5j z(t4a=A>96Qj8(l=)W>G8TF-gVU7M%VFN%6*#h$@N7M?KfmUf+$xv?JQxYku%uDmRt z=s{>Irl0ghcFXn=dNR@29I0RvrYDv&+Oh7m81^sq6}$vwCA{3{&-*#5QkziSjJCJ+ zCS>o#2A@Fbs(d-Iv>0SX9-|1z!5Nf>&ZhSS(3Rz@Wt^=$*)(OGXRr*o8tBvdd2q%K zJ)%qG3Ws4<#%zY!NrwL+ z4Q=UxlZ0KRxP+^Ow8*%+86hV4G%7aRp;o2dv*}Gr!kH{y$K_JJw|NO_VgpB3*~ak` zT|nNsp{}8DE^DYHd|IrytRQKG%PdsacrnfG&lxhZ!)Q=eGsmLh!ee}>5ZiU|BdFK1 zsw9239(D7Mw^?KR(D7sUoa;*^5hzMr&19kNSYjf%{mFR3BXWUBE5oIKrBvm=Hbz$ocRrLRI__g?;m^YFdCCDm)to6{fl&28>m zRUGEW(1Uhy- literal 0 HcmV?d00001 diff --git a/submit/run.sh b/submit/run.sh new file mode 100644 index 0000000..5ebe79e --- /dev/null +++ b/submit/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +$HADOOP_PREFIX/bin/hadoop jar $JAR_FILEPATH $CLASS_TO_RUN $PARAMS From 7225e1e4f74742c476df17c304f207093b5b86f8 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Tue, 24 Oct 2017 10:53:08 +0200 Subject: [PATCH 07/13] integrated startup sequence controlled by netcat --- Makefile | 4 +- README.md | 3 +- base/Dockerfile | 6 +-- base/entrypoint.sh | 35 ++++++++++++++ datanode/Dockerfile | 2 +- docker-compose-local.yml | 95 -------------------------------------- docker-compose-nginx.yml | 71 ---------------------------- docker-compose.yml | 82 +++++++++++--------------------- hadoop.env | 29 +++++++++--- historyserver/Dockerfile | 2 +- namenode/Dockerfile | 2 +- nodemanager/Dockerfile | 2 +- resourcemanager/Dockerfile | 2 +- submit/Dockerfile | 2 +- 14 files changed, 98 insertions(+), 239 deletions(-) delete mode 100644 docker-compose-local.yml delete mode 100644 docker-compose-nginx.yml diff --git a/Makefile b/Makefile index 399104d..f95859a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -DOCKER_NETWORK = hadoop +DOCKER_NETWORK = dockerhadoop_default ENV_FILE = hadoop.env current_branch := $(shell git rev-parse --abbrev-ref HEAD) build: @@ -13,7 +13,7 @@ build: wordcount: docker build -t hadoop-wordcount ./submit docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/ - docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.8.0/README.txt /input/ + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.7.4/README.txt /input/ docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/* docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output diff --git a/README.md b/README.md index fb0af28..280eec1 100644 --- a/README.md +++ b/README.md @@ -29,10 +29,11 @@ The available configurations are: * /etc/hadoop/yarn-site.xml YARN_CONF * /etc/hadoop/httpfs-site.xml HTTPFS_CONF * /etc/hadoop/kms-site.xml KMS_CONF +* /etc/hadoop/mapred-site.xml MAPRED_CONF If you need to extend some other configuration file, refer to base/entrypoint.sh bash script. -After starting the example Hadoop cluster, you should be able to access interfaces of all the components (substitute domain names by IP addresses from ```network inspect hadoop``` command): +After starting the example Hadoop cluster, you should be able to access interfaces of all the components (substitute domain names by IP addresses from ```network inspect dockerhadoop_default``` command): * Namenode: http://namenode:50070/dfshealth.html#tab-overview * History server: http://historyserver:8188/applicationhistory * Datanode: http://datanode:50075/ diff --git a/base/Dockerfile b/base/Dockerfile index 3ac7b47..de05d9c 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -6,7 +6,7 @@ RUN echo "deb http://ftp.debian.org/debian jessie-backports main" >> /etc/apt/so RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -t jessie-backports -y --no-install-recommends openjdk-8-jdk ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl netcat RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ 07617D4968B34D8F13D56E20BE5AAA0BA210C095 \ @@ -40,7 +40,7 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ 0445B7BFC4515847C157ECD16BA72FF1C99785DE \ B74F188889D159F3D7E64A7F348C6D7A0DCED714 \ 4A6AC5C675B6155682729C9E08D51A0A7501105C \ - 8B44A05C308955D191956559A5CEE20A90348D47 + 8B44A05C308955D191956559A5CEE20A90348D47 RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F @@ -52,7 +52,7 @@ RUN set -x \ && gpg --verify /tmp/hadoop.tar.gz.asc \ && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \ && rm /tmp/hadoop.tar.gz* - + RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop RUN cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs diff --git a/base/entrypoint.sh b/base/entrypoint.sh index 15d23ee..a240d90 100644 --- a/base/entrypoint.sh +++ b/base/entrypoint.sh @@ -79,4 +79,39 @@ if [ -n "$GANGLIA_HOST" ]; then done > /etc/hadoop/hadoop-metrics2.properties fi +function wait_for_it() +{ + local serviceport=$1 + local service=${serviceport%%:*} + local port=${serviceport#*:} + local retry_seconds=5 + local max_try=100 + let i=1 + + nc -z $service $port + result=$? + + until [ $result -eq 0 ]; do + echo "[$i/$max_try] check for ${service}:${port}..." + echo "[$i/$max_try] ${service}:${port} is not available yet" + if (( $i == $max_try )); then + echo "[$i/$max_try] ${service}:${port} is still not available; giving up after ${max_try} tries. :/" + exit 1 + fi + + echo "[$i/$max_try] try in ${retry_seconds}s once again ..." + let "i++" + sleep $retry_seconds + + nc -z $service $port + result=$? + done + echo "[$i/$max_try] $service:${port} is available." +} + +for i in "${SERVICE_PRECONDITION[@]}" +do + wait_for_it ${i} +done + exec $@ diff --git a/datanode/Dockerfile b/datanode/Dockerfile index 9f06116..5e5080c 100644 --- a/datanode/Dockerfile +++ b/datanode/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 diff --git a/docker-compose-local.yml b/docker-compose-local.yml deleted file mode 100644 index 8de012b..0000000 --- a/docker-compose-local.yml +++ /dev/null @@ -1,95 +0,0 @@ -version: "2" - -services: - namenode: - build: ./namenode - hostname: namenode - container_name: namenode - volumes: - - ./data/namenode:/hadoop/dfs/name - environment: - - CLUSTER_NAME=test - env_file: - - ./hadoop.env - - resourcemanager: - build: ./resourcemanager - hostname: resourcemanager - container_name: resourcemanager - depends_on: - - "namenode" - links: - - "namenode" - ports: - - "58088:8088" - env_file: - - ./hadoop.env - - historyserver: - build: ./historyserver - hostname: historyserver - container_name: historyserver - volumes: - - ./data/historyserver:/hadoop/yarn/timeline - depends_on: - - "namenode" - links: - - "namenode" - ports: - - "58188:8188" - env_file: - - ./hadoop.env - - nodemanager1: - build: ./nodemanager - hostname: nodemanager1 - container_name: nodemanager1 - depends_on: - - "namenode" - - "resourcemanager" - links: - - "namenode" - - "resourcemanager" - ports: - - "58042:8042" - env_file: - - ./hadoop.env - - datanode1: - build: ./datanode - hostname: datanode1 - container_name: datanode1 - depends_on: - - "namenode" - links: - - "namenode" - volumes: - - ./data/datanode1:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode2: - build: ./datanode - hostname: datanode2 - container_name: datanode2 - depends_on: - - "namenode" - links: - - "namenode" - volumes: - - ./data/datanode2:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode3: - build: ./datanode - hostname: datanode3 - container_name: datanode3 - depends_on: - - "namenode" - links: - - "namenode" - volumes: - - ./data/datanode3:/hadoop/dfs/data - env_file: - - ./hadoop.env diff --git a/docker-compose-nginx.yml b/docker-compose-nginx.yml deleted file mode 100644 index 688dba4..0000000 --- a/docker-compose-nginx.yml +++ /dev/null @@ -1,71 +0,0 @@ -nginx: - build: ./nginx - hostname: nginx - net: hadoop - container_name: nginx - ports: - - "8080:80" - -namenode: - image: bde2020/hadoop-namenode:1.0.0 - hostname: namenode - container_name: namenode - domainname: hadoop - net: hadoop - volumes: - - ./data/namenode:/hadoop/dfs/name - environment: - - CLUSTER_NAME=test - env_file: - - ./hadoop.env - -resourcemanager: - image: bde2020/hadoop-resourcemanager:1.0.0 - hostname: resourcemanager - container_name: resourcemanager - domainname: hadoop - net: hadoop - env_file: - - ./hadoop.env - -historyserver: - image: bde2020/hadoop-historyserver:1.0.0 - hostname: historyserver - container_name: historyserver - domainname: hadoop - net: hadoop - volumes: - - ./data/historyserver:/hadoop/yarn/timeline - env_file: - - ./hadoop.env - -nodemanager1: - image: bde2020/hadoop-nodemanager:1.0.0 - hostname: nodemanager1 - container_name: nodemanager1 - domainname: hadoop - net: hadoop - env_file: - - ./hadoop.env - -datanode1: - image: bde2020/hadoop-datanode:1.0.0 - hostname: datanode1 - container_name: datanode1 - domainname: hadoop - net: hadoop - volumes: - - ./data/datanode1:/hadoop/dfs/data - env_file: - - ./hadoop.env - -datanode2: - image: bde2020/hadoop-datanode:1.0.0 - hostname: datanode2 - container_name: datanode2 - domainname: hadoop - net: hadoop - volumes: - - ./data/datanode2:/hadoop/dfs/data - env_file: - - ./hadoop.env diff --git a/docker-compose.yml b/docker-compose.yml index bef8ae7..f4f8718 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,8 @@ -version: "2" +version: "3" services: namenode: - image: bde2020/hadoop-namenode:1.1.0-hadoop2.7.1-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name @@ -10,72 +10,44 @@ services: - CLUSTER_NAME=test env_file: - ./hadoop.env + + datanode: + image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + container_name: datanode + volumes: + - hadoop_datanode:/hadoop/dfs/data + environment: + SERVICE_PRECONDITION: "namenode:50070" + env_file: + - ./hadoop.env resourcemanager: - image: bde2020/hadoop-resourcemanager:1.1.0-hadoop2.7.1-java8 + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 container_name: resourcemanager - depends_on: - - namenode - - datanode1 - - datanode2 + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075" + env_file: + - ./hadoop.env + + nodemanager1: + image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 + container_name: nodemanager + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" env_file: - ./hadoop.env historyserver: - image: bde2020/hadoop-historyserver:1.1.0-hadoop2.7.1-java8 + image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 container_name: historyserver - depends_on: - - namenode - - datanode1 - - datanode2 + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" volumes: - hadoop_historyserver:/hadoop/yarn/timeline env_file: - ./hadoop.env - nodemanager1: - image: bde2020/hadoop-nodemanager:1.1.0-hadoop2.7.1-java8 - container_name: nodemanager1 - depends_on: - - namenode - - datanode1 - - datanode2 - env_file: - - ./hadoop.env - - datanode1: - image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8 - container_name: datanode1 - depends_on: - - namenode - volumes: - - hadoop_datanode1:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode2: - image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8 - container_name: datanode2 - depends_on: - - namenode - volumes: - - hadoop_datanode2:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode3: - image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8 - container_name: datanode3 - depends_on: - - namenode - volumes: - - hadoop_datanode3:/hadoop/dfs/data - env_file: - - ./hadoop.env - volumes: hadoop_namenode: - hadoop_datanode1: - hadoop_datanode2: - hadoop_datanode3: + hadoop_datanode: hadoop_historyserver: diff --git a/hadoop.env b/hadoop.env index 8bf3211..8a34fcd 100644 --- a/hadoop.env +++ b/hadoop.env @@ -1,22 +1,39 @@ -CORE_CONF_fs_defaultFS=hdfs://namenode:8020 +CORE_CONF_fs_defaultFS=hdfs://namenode:9000 CORE_CONF_hadoop_http_staticuser_user=root CORE_CONF_hadoop_proxyuser_hue_hosts=* CORE_CONF_hadoop_proxyuser_hue_groups=* +CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec HDFS_CONF_dfs_webhdfs_enabled=true HDFS_CONF_dfs_permissions_enabled=false +HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false +YARN_CONF_yarn_scheduler_fair_allow___undeclared___pools=false +YARN_CONF_yarn_scheduler_fair_user___as___default___queue=false YARN_CONF_yarn_log___aggregation___enable=true +YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ YARN_CONF_yarn_resourcemanager_recovery_enabled=true YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate -YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs -YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ -YARN_CONF_yarn_timeline___service_enabled=true -YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true YARN_CONF_yarn_resourcemanager_hostname=resourcemanager -YARN_CONF_yarn_timeline___service_hostname=historyserver YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 +YARN_CONF_yarn_timeline___service_enabled=true +YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true +YARN_CONF_yarn_timeline___service_hostname=historyserver +YARN_CONF_mapreduce_map_output_compress=true +YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec +YARN_CONF_yarn_nodemanager_resource_memory___mb=16384 +YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8 +YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5 +YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs +YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle + +MAPRED_CONF_mapreduce_framework_name=yarn +MAPRED_CONF_mapred_child_java_opts=-Xmx4096m +MAPRED_CONF_mapreduce_map_memory_mb=4096 +MAPRED_CONF_mapreduce_reduce_memory_mb=8192 +MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m +MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m diff --git a/historyserver/Dockerfile b/historyserver/Dockerfile index 31f0a82..8d5f908 100644 --- a/historyserver/Dockerfile +++ b/historyserver/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 diff --git a/namenode/Dockerfile b/namenode/Dockerfile index abeafe9..85e82b1 100644 --- a/namenode/Dockerfile +++ b/namenode/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 diff --git a/nodemanager/Dockerfile b/nodemanager/Dockerfile index c82df41..cb36593 100644 --- a/nodemanager/Dockerfile +++ b/nodemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 diff --git a/resourcemanager/Dockerfile b/resourcemanager/Dockerfile index 0f4a690..36a179b 100644 --- a/resourcemanager/Dockerfile +++ b/resourcemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 diff --git a/submit/Dockerfile b/submit/Dockerfile index 52b0499..8efad04 100644 --- a/submit/Dockerfile +++ b/submit/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.2.0-hadoop2.8-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 MAINTAINER Ivan Ermilov COPY WordCount.jar /opt/hadoop/applications/WordCount.jar From f2e2dc0afede8ea1a355d8f008bffd3d4769d90a Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Tue, 24 Oct 2017 13:11:27 +0200 Subject: [PATCH 08/13] changed scheduler, set max alloc --- hadoop.env | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hadoop.env b/hadoop.env index 8a34fcd..3284e85 100644 --- a/hadoop.env +++ b/hadoop.env @@ -8,12 +8,13 @@ HDFS_CONF_dfs_webhdfs_enabled=true HDFS_CONF_dfs_permissions_enabled=false HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false -YARN_CONF_yarn_scheduler_fair_allow___undeclared___pools=false -YARN_CONF_yarn_scheduler_fair_user___as___default___queue=false YARN_CONF_yarn_log___aggregation___enable=true YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ YARN_CONF_yarn_resourcemanager_recovery_enabled=true YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore +YARN_CONF_yarn_resourcemanager_scheduler_class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler +YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___mb=8192 +YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___vcores=4 YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true YARN_CONF_yarn_resourcemanager_hostname=resourcemanager From 1c87dd082530f2bc9592fa13e3669eed2ced06f5 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Mon, 18 Dec 2017 15:25:58 +0100 Subject: [PATCH 09/13] add docker compose v3 --- docker-compose-v3.yml | 110 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) create mode 100644 docker-compose-v3.yml diff --git a/docker-compose-v3.yml b/docker-compose-v3.yml new file mode 100644 index 0000000..a8d6b52 --- /dev/null +++ b/docker-compose-v3.yml @@ -0,0 +1,110 @@ +version: '3' + +services: + namenode: + image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 + networks: + - hbase + volumes: + - namenode:/hadoop/dfs/name + environment: + - CLUSTER_NAME=test + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + labels: + traefik.docker.network: hbase + traefik.port: 50070 + + datanode: + image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + networks: + - hbase + volumes: + - datanode:/hadoop/dfs/data + env_file: + - ./hadoop.env + environment: + SERVICE_PRECONDITION: "namenode:50070" + deploy: + mode: global + restart_policy: + condition: on-failure + labels: + traefik.docker.network: hbase + traefik.port: 50075 + + resourcemanager: + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 + networks: + - hbase + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075" + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + labels: + traefik.docker.network: hbase + traefik.port: 8088 + healthcheck: + disable: true + + nodemanager: + image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 + networks: + - hbase + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + env_file: + - ./hadoop.env + deploy: + mode: global + restart_policy: + condition: on-failure + labels: + traefik.docker.network: hbase + traefik.port: 8042 + + historyserver: + image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 + networks: + - hbase + volumes: + - hadoop_historyserver:/hadoop/yarn/timeline + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + labels: + traefik.docker.network: hbase + traefik.port: 8188 + +volumes: + datanode: + namenode: + hadoop_historyserver: + +networks: + hbase: + external: + name: hbase From 301e2a18dbfebbc4e5d7767cef98394a6f18d311 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Mon, 5 Feb 2018 12:57:04 +0100 Subject: [PATCH 10/13] fix wait_for_it script --- base/entrypoint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/entrypoint.sh b/base/entrypoint.sh index a240d90..e37bf20 100644 --- a/base/entrypoint.sh +++ b/base/entrypoint.sh @@ -109,7 +109,7 @@ function wait_for_it() echo "[$i/$max_try] $service:${port} is available." } -for i in "${SERVICE_PRECONDITION[@]}" +for i in ${SERVICE_PRECONDITION[@]} do wait_for_it ${i} done From 4c7ce389611aefb1d32f23098081293899deed0d Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Mon, 5 Feb 2018 13:24:35 +0100 Subject: [PATCH 11/13] doc --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 280eec1..6547a84 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Changes -Version 1.1.0 introduces healthchecks for the containers. +Version 2.0.0 introduces uses wait_for_it script for the cluster startup # Hadoop Docker @@ -9,6 +9,11 @@ To deploy an example HDFS cluster, run: docker-compose up ``` +Or deploy in swarm: +``` +docker stack deploy -c docker-compose-v3.yml hadoop +``` + The configuration parameters can be specified in the hadoop.env file or as environmental variables for specific services (e.g. namenode, datanode etc.): ``` CORE_CONF_fs_defaultFS=hdfs://namenode:8020 From 654ad4bd619fac945f7a7efe591dfe0f93f740a5 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Fri, 5 Oct 2018 16:02:06 +0200 Subject: [PATCH 12/13] docker hadoop v3.1.1 --- Makefile | 4 ++-- README.md | 12 ++++++++---- base/Dockerfile | 6 +++--- datanode/Dockerfile | 6 +++--- docker-compose-v3.yml | 10 +++++----- docker-compose.yml | 18 +++++++++--------- hadoop.env | 3 +++ historyserver/Dockerfile | 2 +- namenode/Dockerfile | 6 +++--- nodemanager/Dockerfile | 2 +- resourcemanager/Dockerfile | 2 +- submit/Dockerfile | 2 +- 12 files changed, 40 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index f95859a..f74842a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -DOCKER_NETWORK = dockerhadoop_default +DOCKER_NETWORK = docker-hadoop_default ENV_FILE = hadoop.env current_branch := $(shell git rev-parse --abbrev-ref HEAD) build: @@ -13,7 +13,7 @@ build: wordcount: docker build -t hadoop-wordcount ./submit docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/ - docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.7.4/README.txt /input/ + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-3.1.1/README.txt /input/ docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/* docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output diff --git a/README.md b/README.md index b20a1ac..e836e34 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,7 @@ Version 2.0.0 introduces uses wait_for_it script for the cluster startup # Hadoop Docker ## Supported Hadoop Versions -* 2.7.1 with OpenJDK 7 -* 2.7.1 with OpenJDK 8 +See repository branches for supported hadoop versions ## Quick Start @@ -17,6 +16,11 @@ To deploy an example HDFS cluster, run: docker-compose up ``` +Run example wordcount job: +``` + make wordcount +``` + Or deploy in swarm: ``` docker stack deploy -c docker-compose-v3.yml hadoop @@ -26,9 +30,9 @@ docker stack deploy -c docker-compose-v3.yml hadoop Run `docker network inspect` on the network (e.g. `dockerhadoop_default`) to find the IP the hadoop interfaces are published on. Access these interfaces with the following URLs: -* Namenode: http://:50070/dfshealth.html#tab-overview +* Namenode: http://:9870/dfshealth.html#tab-overview * History server: http://:8188/applicationhistory -* Datanode: http://:50075/ +* Datanode: http://:9864/ * Nodemanager: http://:8042/node * Resource manager: http://:8088/ diff --git a/base/Dockerfile b/base/Dockerfile index de05d9c..cc18fdb 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -40,11 +40,12 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ 0445B7BFC4515847C157ECD16BA72FF1C99785DE \ B74F188889D159F3D7E64A7F348C6D7A0DCED714 \ 4A6AC5C675B6155682729C9E08D51A0A7501105C \ - 8B44A05C308955D191956559A5CEE20A90348D47 + 8B44A05C308955D191956559A5CEE20A90348D47 \ + 57300D45 RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F -ENV HADOOP_VERSION 2.7.4 +ENV HADOOP_VERSION 3.1.1 ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz RUN set -x \ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \ @@ -54,7 +55,6 @@ RUN set -x \ && rm /tmp/hadoop.tar.gz* RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop -RUN cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs RUN mkdir /hadoop-data diff --git a/datanode/Dockerfile b/datanode/Dockerfile index 5e5080c..1e5f510 100644 --- a/datanode/Dockerfile +++ b/datanode/Dockerfile @@ -1,7 +1,7 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov -HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 +HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1 ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data RUN mkdir -p /hadoop/dfs/data @@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/data ADD run.sh /run.sh RUN chmod a+x /run.sh -EXPOSE 50075 +EXPOSE 9864 CMD ["/run.sh"] diff --git a/docker-compose-v3.yml b/docker-compose-v3.yml index a8d6b52..e771819 100644 --- a/docker-compose-v3.yml +++ b/docker-compose-v3.yml @@ -2,7 +2,7 @@ version: '3' services: namenode: - image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8 networks: - hbase volumes: @@ -24,7 +24,7 @@ services: traefik.port: 50070 datanode: - image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8 networks: - hbase volumes: @@ -42,7 +42,7 @@ services: traefik.port: 50075 resourcemanager: - image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8 networks: - hbase environment: @@ -64,7 +64,7 @@ services: disable: true nodemanager: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8 networks: - hbase environment: @@ -80,7 +80,7 @@ services: traefik.port: 8042 historyserver: - image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8 networks: - hbase volumes: diff --git a/docker-compose.yml b/docker-compose.yml index f4f8718..634a929 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3" services: namenode: - image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8 container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name @@ -12,36 +12,36 @@ services: - ./hadoop.env datanode: - image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8 container_name: datanode volumes: - hadoop_datanode:/hadoop/dfs/data environment: - SERVICE_PRECONDITION: "namenode:50070" + SERVICE_PRECONDITION: "namenode:9870" env_file: - ./hadoop.env resourcemanager: - image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8 container_name: resourcemanager environment: - SERVICE_PRECONDITION: "namenode:50070 datanode:50075" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864" env_file: - ./hadoop.env nodemanager1: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8 container_name: nodemanager environment: - SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088" env_file: - ./hadoop.env historyserver: - image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8 container_name: historyserver environment: - SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088" volumes: - hadoop_historyserver:/hadoop/yarn/timeline env_file: diff --git a/hadoop.env b/hadoop.env index 3284e85..3366abf 100644 --- a/hadoop.env +++ b/hadoop.env @@ -38,3 +38,6 @@ MAPRED_CONF_mapreduce_map_memory_mb=4096 MAPRED_CONF_mapreduce_reduce_memory_mb=8192 MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m +MAPRED_CONF_yarn_app_mapreduce_am_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ +MAPRED_CONF_mapreduce_map_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ +MAPRED_CONF_mapreduce_reduce_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ diff --git a/historyserver/Dockerfile b/historyserver/Dockerfile index 8d5f908..a8b794c 100644 --- a/historyserver/Dockerfile +++ b/historyserver/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 diff --git a/namenode/Dockerfile b/namenode/Dockerfile index 85e82b1..727c7d8 100644 --- a/namenode/Dockerfile +++ b/namenode/Dockerfile @@ -1,7 +1,7 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov -HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 +HEALTHCHECK CMD curl -f http://localhost:9870/ || exit 1 ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name RUN mkdir -p /hadoop/dfs/name @@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/name ADD run.sh /run.sh RUN chmod a+x /run.sh -EXPOSE 50070 +EXPOSE 9870 CMD ["/run.sh"] diff --git a/nodemanager/Dockerfile b/nodemanager/Dockerfile index cb36593..c40fda6 100644 --- a/nodemanager/Dockerfile +++ b/nodemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 diff --git a/resourcemanager/Dockerfile b/resourcemanager/Dockerfile index 36a179b..9260a83 100644 --- a/resourcemanager/Dockerfile +++ b/resourcemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 diff --git a/submit/Dockerfile b/submit/Dockerfile index 8efad04..510e557 100644 --- a/submit/Dockerfile +++ b/submit/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov COPY WordCount.jar /opt/hadoop/applications/WordCount.jar From 1c6032427a2967133337137c5ff4f1dc56b41420 Mon Sep 17 00:00:00 2001 From: Giannis Mouchakis Date: Fri, 11 Jan 2019 23:24:50 +0200 Subject: [PATCH 13/13] replace keyserver key retrieval with key file download --- base/Dockerfile | 37 ++----------------------------------- 1 file changed, 2 insertions(+), 35 deletions(-) diff --git a/base/Dockerfile b/base/Dockerfile index cc18fdb..951e36f 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -8,42 +8,9 @@ ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl netcat -RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ - 07617D4968B34D8F13D56E20BE5AAA0BA210C095 \ - 2CAC83124870D88586166115220F69801F27E622 \ - 4B96409A098DBD511DF2BC18DBAF69BEA7239D59 \ - 9DD955653083EFED6171256408458C39E964B5FF \ - B6B3F7EDA5BA7D1E827DE5180DFF492D8EE2F25C \ - 6A67379BEFC1AE4D5595770A34005598B8F47547 \ - 47660BC98BC433F01E5C90581209E7F13D0C92B9 \ - CE83449FDC6DACF9D24174DCD1F99F6EE3CD2163 \ - A11DF05DEA40DA19CE4B43C01214CF3F852ADB85 \ - 686E5EDF04A4830554160910DF0F5BBC30CD0996 \ - 5BAE7CB144D05AD1BB1C47C75C6CC6EFABE49180 \ - AF7610D2E378B33AB026D7574FB955854318F669 \ - 6AE70A2A38F466A5D683F939255ADF56C36C5F0F \ - 70F7AB3B62257ABFBD0618D79FDB12767CC7352A \ - 842AAB2D0BC5415B4E19D429A342433A56D8D31A \ - 1B5D384B734F368052862EB55E43CAB9AEC77EAF \ - 785436A782586B71829C67A04169AA27ECB31663 \ - 5E49DA09E2EC9950733A4FF48F1895E97869A2FB \ - A13B3869454536F1852C17D0477E02D33DD51430 \ - A6220FFCC86FE81CE5AAC880E3814B59E4E11856 \ - EFE2E7C571309FE00BEBA78D5E314EEF7340E1CB \ - EB34498A9261F343F09F60E0A9510905F0B000F0 \ - 3442A6594268AC7B88F5C1D25104A731B021B57F \ - 6E83C32562C909D289E6C3D98B25B9B71EFF7770 \ - E9216532BF11728C86A11E3132CF4BF4E72E74D3 \ - E8966520DA24E9642E119A5F13971DA39475BD5D \ - 1D369094D4CFAC140E0EF05E992230B1EB8C6EFA \ - A312CE6A1FA98892CB2C44EBA79AB712DE5868E6 \ - 0445B7BFC4515847C157ECD16BA72FF1C99785DE \ - B74F188889D159F3D7E64A7F348C6D7A0DCED714 \ - 4A6AC5C675B6155682729C9E08D51A0A7501105C \ - 8B44A05C308955D191956559A5CEE20A90348D47 \ - 57300D45 +RUN curl -O https://dist.apache.org/repos/dist/release/hadoop/common/KEYS -RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F +RUN gpg --import KEYS ENV HADOOP_VERSION 3.1.1 ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz