From 654ad4bd619fac945f7a7efe591dfe0f93f740a5 Mon Sep 17 00:00:00 2001 From: Ivan Ermilov Date: Fri, 5 Oct 2018 16:02:06 +0200 Subject: [PATCH] docker hadoop v3.1.1 --- Makefile | 4 ++-- README.md | 12 ++++++++---- base/Dockerfile | 6 +++--- datanode/Dockerfile | 6 +++--- docker-compose-v3.yml | 10 +++++----- docker-compose.yml | 18 +++++++++--------- hadoop.env | 3 +++ historyserver/Dockerfile | 2 +- namenode/Dockerfile | 6 +++--- nodemanager/Dockerfile | 2 +- resourcemanager/Dockerfile | 2 +- submit/Dockerfile | 2 +- 12 files changed, 40 insertions(+), 33 deletions(-) diff --git a/Makefile b/Makefile index f95859a..f74842a 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -DOCKER_NETWORK = dockerhadoop_default +DOCKER_NETWORK = docker-hadoop_default ENV_FILE = hadoop.env current_branch := $(shell git rev-parse --abbrev-ref HEAD) build: @@ -13,7 +13,7 @@ build: wordcount: docker build -t hadoop-wordcount ./submit docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/ - docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.7.4/README.txt /input/ + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-3.1.1/README.txt /input/ docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/* docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output diff --git a/README.md b/README.md index b20a1ac..e836e34 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,7 @@ Version 2.0.0 introduces uses wait_for_it script for the cluster startup # Hadoop Docker ## Supported Hadoop Versions -* 2.7.1 with OpenJDK 7 -* 2.7.1 with OpenJDK 8 +See repository branches for supported hadoop versions ## Quick Start @@ -17,6 +16,11 @@ To deploy an example HDFS cluster, run: docker-compose up ``` +Run example wordcount job: +``` + make wordcount +``` + Or deploy in swarm: ``` docker stack deploy -c docker-compose-v3.yml hadoop @@ -26,9 +30,9 @@ docker stack deploy -c docker-compose-v3.yml hadoop Run `docker network inspect` on the network (e.g. `dockerhadoop_default`) to find the IP the hadoop interfaces are published on. Access these interfaces with the following URLs: -* Namenode: http://:50070/dfshealth.html#tab-overview +* Namenode: http://:9870/dfshealth.html#tab-overview * History server: http://:8188/applicationhistory -* Datanode: http://:50075/ +* Datanode: http://:9864/ * Nodemanager: http://:8042/node * Resource manager: http://:8088/ diff --git a/base/Dockerfile b/base/Dockerfile index de05d9c..cc18fdb 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -40,11 +40,12 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ 0445B7BFC4515847C157ECD16BA72FF1C99785DE \ B74F188889D159F3D7E64A7F348C6D7A0DCED714 \ 4A6AC5C675B6155682729C9E08D51A0A7501105C \ - 8B44A05C308955D191956559A5CEE20A90348D47 + 8B44A05C308955D191956559A5CEE20A90348D47 \ + 57300D45 RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F -ENV HADOOP_VERSION 2.7.4 +ENV HADOOP_VERSION 3.1.1 ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz RUN set -x \ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \ @@ -54,7 +55,6 @@ RUN set -x \ && rm /tmp/hadoop.tar.gz* RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop -RUN cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs RUN mkdir /hadoop-data diff --git a/datanode/Dockerfile b/datanode/Dockerfile index 5e5080c..1e5f510 100644 --- a/datanode/Dockerfile +++ b/datanode/Dockerfile @@ -1,7 +1,7 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov -HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 +HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1 ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data RUN mkdir -p /hadoop/dfs/data @@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/data ADD run.sh /run.sh RUN chmod a+x /run.sh -EXPOSE 50075 +EXPOSE 9864 CMD ["/run.sh"] diff --git a/docker-compose-v3.yml b/docker-compose-v3.yml index a8d6b52..e771819 100644 --- a/docker-compose-v3.yml +++ b/docker-compose-v3.yml @@ -2,7 +2,7 @@ version: '3' services: namenode: - image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8 networks: - hbase volumes: @@ -24,7 +24,7 @@ services: traefik.port: 50070 datanode: - image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8 networks: - hbase volumes: @@ -42,7 +42,7 @@ services: traefik.port: 50075 resourcemanager: - image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8 networks: - hbase environment: @@ -64,7 +64,7 @@ services: disable: true nodemanager: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8 networks: - hbase environment: @@ -80,7 +80,7 @@ services: traefik.port: 8042 historyserver: - image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8 networks: - hbase volumes: diff --git a/docker-compose.yml b/docker-compose.yml index f4f8718..634a929 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,7 +2,7 @@ version: "3" services: namenode: - image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8 container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name @@ -12,36 +12,36 @@ services: - ./hadoop.env datanode: - image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8 container_name: datanode volumes: - hadoop_datanode:/hadoop/dfs/data environment: - SERVICE_PRECONDITION: "namenode:50070" + SERVICE_PRECONDITION: "namenode:9870" env_file: - ./hadoop.env resourcemanager: - image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8 container_name: resourcemanager environment: - SERVICE_PRECONDITION: "namenode:50070 datanode:50075" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864" env_file: - ./hadoop.env nodemanager1: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8 container_name: nodemanager environment: - SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088" env_file: - ./hadoop.env historyserver: - image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 + image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8 container_name: historyserver environment: - SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088" volumes: - hadoop_historyserver:/hadoop/yarn/timeline env_file: diff --git a/hadoop.env b/hadoop.env index 3284e85..3366abf 100644 --- a/hadoop.env +++ b/hadoop.env @@ -38,3 +38,6 @@ MAPRED_CONF_mapreduce_map_memory_mb=4096 MAPRED_CONF_mapreduce_reduce_memory_mb=8192 MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m +MAPRED_CONF_yarn_app_mapreduce_am_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ +MAPRED_CONF_mapreduce_map_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ +MAPRED_CONF_mapreduce_reduce_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ diff --git a/historyserver/Dockerfile b/historyserver/Dockerfile index 8d5f908..a8b794c 100644 --- a/historyserver/Dockerfile +++ b/historyserver/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 diff --git a/namenode/Dockerfile b/namenode/Dockerfile index 85e82b1..727c7d8 100644 --- a/namenode/Dockerfile +++ b/namenode/Dockerfile @@ -1,7 +1,7 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov -HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 +HEALTHCHECK CMD curl -f http://localhost:9870/ || exit 1 ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name RUN mkdir -p /hadoop/dfs/name @@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/name ADD run.sh /run.sh RUN chmod a+x /run.sh -EXPOSE 50070 +EXPOSE 9870 CMD ["/run.sh"] diff --git a/nodemanager/Dockerfile b/nodemanager/Dockerfile index cb36593..c40fda6 100644 --- a/nodemanager/Dockerfile +++ b/nodemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 diff --git a/resourcemanager/Dockerfile b/resourcemanager/Dockerfile index 36a179b..9260a83 100644 --- a/resourcemanager/Dockerfile +++ b/resourcemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 diff --git a/submit/Dockerfile b/submit/Dockerfile index 8efad04..510e557 100644 --- a/submit/Dockerfile +++ b/submit/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov COPY WordCount.jar /opt/hadoop/applications/WordCount.jar