Merge pull request #26 from earthquakesan/2.0.0-hadoop3.1.1-java8

docker hadoop v3.1.1
This commit is contained in:
Ivan Ermilov 2018-10-05 16:05:31 +02:00 committed by GitHub
commit b05e99fb79
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 40 additions and 33 deletions

View File

@ -1,4 +1,4 @@
DOCKER_NETWORK = dockerhadoop_default DOCKER_NETWORK = docker-hadoop_default
ENV_FILE = hadoop.env ENV_FILE = hadoop.env
current_branch := $(shell git rev-parse --abbrev-ref HEAD) current_branch := $(shell git rev-parse --abbrev-ref HEAD)
build: build:
@ -13,7 +13,7 @@ build:
wordcount: wordcount:
docker build -t hadoop-wordcount ./submit docker build -t hadoop-wordcount ./submit
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/ docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.7.4/README.txt /input/ docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-3.1.1/README.txt /input/
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/* docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/*
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output

View File

@ -7,8 +7,7 @@ Version 2.0.0 introduces uses wait_for_it script for the cluster startup
# Hadoop Docker # Hadoop Docker
## Supported Hadoop Versions ## Supported Hadoop Versions
* 2.7.1 with OpenJDK 7 See repository branches for supported hadoop versions
* 2.7.1 with OpenJDK 8
## Quick Start ## Quick Start
@ -17,6 +16,11 @@ To deploy an example HDFS cluster, run:
docker-compose up docker-compose up
``` ```
Run example wordcount job:
```
make wordcount
```
Or deploy in swarm: Or deploy in swarm:
``` ```
docker stack deploy -c docker-compose-v3.yml hadoop docker stack deploy -c docker-compose-v3.yml hadoop
@ -26,9 +30,9 @@ docker stack deploy -c docker-compose-v3.yml hadoop
Run `docker network inspect` on the network (e.g. `dockerhadoop_default`) to find the IP the hadoop interfaces are published on. Access these interfaces with the following URLs: Run `docker network inspect` on the network (e.g. `dockerhadoop_default`) to find the IP the hadoop interfaces are published on. Access these interfaces with the following URLs:
* Namenode: http://<dockerhadoop_IP_address>:50070/dfshealth.html#tab-overview * Namenode: http://<dockerhadoop_IP_address>:9870/dfshealth.html#tab-overview
* History server: http://<dockerhadoop_IP_address>:8188/applicationhistory * History server: http://<dockerhadoop_IP_address>:8188/applicationhistory
* Datanode: http://<dockerhadoop_IP_address>:50075/ * Datanode: http://<dockerhadoop_IP_address>:9864/
* Nodemanager: http://<dockerhadoop_IP_address>:8042/node * Nodemanager: http://<dockerhadoop_IP_address>:8042/node
* Resource manager: http://<dockerhadoop_IP_address>:8088/ * Resource manager: http://<dockerhadoop_IP_address>:8088/

View File

@ -40,11 +40,12 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \
0445B7BFC4515847C157ECD16BA72FF1C99785DE \ 0445B7BFC4515847C157ECD16BA72FF1C99785DE \
B74F188889D159F3D7E64A7F348C6D7A0DCED714 \ B74F188889D159F3D7E64A7F348C6D7A0DCED714 \
4A6AC5C675B6155682729C9E08D51A0A7501105C \ 4A6AC5C675B6155682729C9E08D51A0A7501105C \
8B44A05C308955D191956559A5CEE20A90348D47 8B44A05C308955D191956559A5CEE20A90348D47 \
57300D45
RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F
ENV HADOOP_VERSION 2.7.4 ENV HADOOP_VERSION 3.1.1
ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
RUN set -x \ RUN set -x \
&& curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
@ -54,7 +55,6 @@ RUN set -x \
&& rm /tmp/hadoop.tar.gz* && rm /tmp/hadoop.tar.gz*
RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop
RUN cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml
RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs
RUN mkdir /hadoop-data RUN mkdir /hadoop-data

View File

@ -1,7 +1,7 @@
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1
ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
RUN mkdir -p /hadoop/dfs/data RUN mkdir -p /hadoop/dfs/data
@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/data
ADD run.sh /run.sh ADD run.sh /run.sh
RUN chmod a+x /run.sh RUN chmod a+x /run.sh
EXPOSE 50075 EXPOSE 9864
CMD ["/run.sh"] CMD ["/run.sh"]

View File

@ -2,7 +2,7 @@ version: '3'
services: services:
namenode: namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8
networks: networks:
- hbase - hbase
volumes: volumes:
@ -24,7 +24,7 @@ services:
traefik.port: 50070 traefik.port: 50070
datanode: datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8
networks: networks:
- hbase - hbase
volumes: volumes:
@ -42,7 +42,7 @@ services:
traefik.port: 50075 traefik.port: 50075
resourcemanager: resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8
networks: networks:
- hbase - hbase
environment: environment:
@ -64,7 +64,7 @@ services:
disable: true disable: true
nodemanager: nodemanager:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8
networks: networks:
- hbase - hbase
environment: environment:
@ -80,7 +80,7 @@ services:
traefik.port: 8042 traefik.port: 8042
historyserver: historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8
networks: networks:
- hbase - hbase
volumes: volumes:

View File

@ -2,7 +2,7 @@ version: "3"
services: services:
namenode: namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8
container_name: namenode container_name: namenode
volumes: volumes:
- hadoop_namenode:/hadoop/dfs/name - hadoop_namenode:/hadoop/dfs/name
@ -12,36 +12,36 @@ services:
- ./hadoop.env - ./hadoop.env
datanode: datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8
container_name: datanode container_name: datanode
volumes: volumes:
- hadoop_datanode:/hadoop/dfs/data - hadoop_datanode:/hadoop/dfs/data
environment: environment:
SERVICE_PRECONDITION: "namenode:50070" SERVICE_PRECONDITION: "namenode:9870"
env_file: env_file:
- ./hadoop.env - ./hadoop.env
resourcemanager: resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8
container_name: resourcemanager container_name: resourcemanager
environment: environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075" SERVICE_PRECONDITION: "namenode:9870 datanode:9864"
env_file: env_file:
- ./hadoop.env - ./hadoop.env
nodemanager1: nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8
container_name: nodemanager container_name: nodemanager
environment: environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088"
env_file: env_file:
- ./hadoop.env - ./hadoop.env
historyserver: historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8 image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8
container_name: historyserver container_name: historyserver
environment: environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088"
volumes: volumes:
- hadoop_historyserver:/hadoop/yarn/timeline - hadoop_historyserver:/hadoop/yarn/timeline
env_file: env_file:

View File

@ -38,3 +38,6 @@ MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192 MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m
MAPRED_CONF_yarn_app_mapreduce_am_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/
MAPRED_CONF_mapreduce_map_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/
MAPRED_CONF_mapreduce_reduce_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1

View File

@ -1,7 +1,7 @@
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:9870/ || exit 1
ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
RUN mkdir -p /hadoop/dfs/name RUN mkdir -p /hadoop/dfs/name
@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/name
ADD run.sh /run.sh ADD run.sh /run.sh
RUN chmod a+x /run.sh RUN chmod a+x /run.sh
EXPOSE 50070 EXPOSE 9870
CMD ["/run.sh"] CMD ["/run.sh"]

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
COPY WordCount.jar /opt/hadoop/applications/WordCount.jar COPY WordCount.jar /opt/hadoop/applications/WordCount.jar