docker hadoop v3.1.1
This commit is contained in:
parent
f1080e9249
commit
654ad4bd61
4
Makefile
4
Makefile
@ -1,4 +1,4 @@
|
||||
DOCKER_NETWORK = dockerhadoop_default
|
||||
DOCKER_NETWORK = docker-hadoop_default
|
||||
ENV_FILE = hadoop.env
|
||||
current_branch := $(shell git rev-parse --abbrev-ref HEAD)
|
||||
build:
|
||||
@ -13,7 +13,7 @@ build:
|
||||
wordcount:
|
||||
docker build -t hadoop-wordcount ./submit
|
||||
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/
|
||||
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.7.4/README.txt /input/
|
||||
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-3.1.1/README.txt /input/
|
||||
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount
|
||||
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/*
|
||||
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output
|
||||
|
12
README.md
12
README.md
@ -7,8 +7,7 @@ Version 2.0.0 introduces uses wait_for_it script for the cluster startup
|
||||
# Hadoop Docker
|
||||
|
||||
## Supported Hadoop Versions
|
||||
* 2.7.1 with OpenJDK 7
|
||||
* 2.7.1 with OpenJDK 8
|
||||
See repository branches for supported hadoop versions
|
||||
|
||||
## Quick Start
|
||||
|
||||
@ -17,6 +16,11 @@ To deploy an example HDFS cluster, run:
|
||||
docker-compose up
|
||||
```
|
||||
|
||||
Run example wordcount job:
|
||||
```
|
||||
make wordcount
|
||||
```
|
||||
|
||||
Or deploy in swarm:
|
||||
```
|
||||
docker stack deploy -c docker-compose-v3.yml hadoop
|
||||
@ -26,9 +30,9 @@ docker stack deploy -c docker-compose-v3.yml hadoop
|
||||
|
||||
Run `docker network inspect` on the network (e.g. `dockerhadoop_default`) to find the IP the hadoop interfaces are published on. Access these interfaces with the following URLs:
|
||||
|
||||
* Namenode: http://<dockerhadoop_IP_address>:50070/dfshealth.html#tab-overview
|
||||
* Namenode: http://<dockerhadoop_IP_address>:9870/dfshealth.html#tab-overview
|
||||
* History server: http://<dockerhadoop_IP_address>:8188/applicationhistory
|
||||
* Datanode: http://<dockerhadoop_IP_address>:50075/
|
||||
* Datanode: http://<dockerhadoop_IP_address>:9864/
|
||||
* Nodemanager: http://<dockerhadoop_IP_address>:8042/node
|
||||
* Resource manager: http://<dockerhadoop_IP_address>:8088/
|
||||
|
||||
|
@ -40,11 +40,12 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \
|
||||
0445B7BFC4515847C157ECD16BA72FF1C99785DE \
|
||||
B74F188889D159F3D7E64A7F348C6D7A0DCED714 \
|
||||
4A6AC5C675B6155682729C9E08D51A0A7501105C \
|
||||
8B44A05C308955D191956559A5CEE20A90348D47
|
||||
8B44A05C308955D191956559A5CEE20A90348D47 \
|
||||
57300D45
|
||||
|
||||
RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F
|
||||
|
||||
ENV HADOOP_VERSION 2.7.4
|
||||
ENV HADOOP_VERSION 3.1.1
|
||||
ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
|
||||
RUN set -x \
|
||||
&& curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
|
||||
@ -54,7 +55,6 @@ RUN set -x \
|
||||
&& rm /tmp/hadoop.tar.gz*
|
||||
|
||||
RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop
|
||||
RUN cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml
|
||||
RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs
|
||||
|
||||
RUN mkdir /hadoop-data
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
|
||||
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
|
||||
|
||||
HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1
|
||||
HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1
|
||||
|
||||
ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
|
||||
RUN mkdir -p /hadoop/dfs/data
|
||||
@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/data
|
||||
ADD run.sh /run.sh
|
||||
RUN chmod a+x /run.sh
|
||||
|
||||
EXPOSE 50075
|
||||
EXPOSE 9864
|
||||
|
||||
CMD ["/run.sh"]
|
||||
|
@ -2,7 +2,7 @@ version: '3'
|
||||
|
||||
services:
|
||||
namenode:
|
||||
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8
|
||||
networks:
|
||||
- hbase
|
||||
volumes:
|
||||
@ -24,7 +24,7 @@ services:
|
||||
traefik.port: 50070
|
||||
|
||||
datanode:
|
||||
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8
|
||||
networks:
|
||||
- hbase
|
||||
volumes:
|
||||
@ -42,7 +42,7 @@ services:
|
||||
traefik.port: 50075
|
||||
|
||||
resourcemanager:
|
||||
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8
|
||||
networks:
|
||||
- hbase
|
||||
environment:
|
||||
@ -64,7 +64,7 @@ services:
|
||||
disable: true
|
||||
|
||||
nodemanager:
|
||||
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8
|
||||
networks:
|
||||
- hbase
|
||||
environment:
|
||||
@ -80,7 +80,7 @@ services:
|
||||
traefik.port: 8042
|
||||
|
||||
historyserver:
|
||||
image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8
|
||||
networks:
|
||||
- hbase
|
||||
volumes:
|
||||
|
@ -2,7 +2,7 @@ version: "3"
|
||||
|
||||
services:
|
||||
namenode:
|
||||
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8
|
||||
container_name: namenode
|
||||
volumes:
|
||||
- hadoop_namenode:/hadoop/dfs/name
|
||||
@ -12,36 +12,36 @@ services:
|
||||
- ./hadoop.env
|
||||
|
||||
datanode:
|
||||
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8
|
||||
container_name: datanode
|
||||
volumes:
|
||||
- hadoop_datanode:/hadoop/dfs/data
|
||||
environment:
|
||||
SERVICE_PRECONDITION: "namenode:50070"
|
||||
SERVICE_PRECONDITION: "namenode:9870"
|
||||
env_file:
|
||||
- ./hadoop.env
|
||||
|
||||
resourcemanager:
|
||||
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8
|
||||
container_name: resourcemanager
|
||||
environment:
|
||||
SERVICE_PRECONDITION: "namenode:50070 datanode:50075"
|
||||
SERVICE_PRECONDITION: "namenode:9870 datanode:9864"
|
||||
env_file:
|
||||
- ./hadoop.env
|
||||
|
||||
nodemanager1:
|
||||
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8
|
||||
container_name: nodemanager
|
||||
environment:
|
||||
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
|
||||
SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088"
|
||||
env_file:
|
||||
- ./hadoop.env
|
||||
|
||||
historyserver:
|
||||
image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
|
||||
image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8
|
||||
container_name: historyserver
|
||||
environment:
|
||||
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
|
||||
SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088"
|
||||
volumes:
|
||||
- hadoop_historyserver:/hadoop/yarn/timeline
|
||||
env_file:
|
||||
|
@ -38,3 +38,6 @@ MAPRED_CONF_mapreduce_map_memory_mb=4096
|
||||
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
|
||||
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
|
||||
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m
|
||||
MAPRED_CONF_yarn_app_mapreduce_am_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/
|
||||
MAPRED_CONF_mapreduce_map_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/
|
||||
MAPRED_CONF_mapreduce_reduce_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
|
||||
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
|
||||
|
||||
HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1
|
||||
|
@ -1,7 +1,7 @@
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
|
||||
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
|
||||
|
||||
HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1
|
||||
HEALTHCHECK CMD curl -f http://localhost:9870/ || exit 1
|
||||
|
||||
ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
|
||||
RUN mkdir -p /hadoop/dfs/name
|
||||
@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/name
|
||||
ADD run.sh /run.sh
|
||||
RUN chmod a+x /run.sh
|
||||
|
||||
EXPOSE 50070
|
||||
EXPOSE 9870
|
||||
|
||||
CMD ["/run.sh"]
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
|
||||
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
|
||||
|
||||
HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
|
||||
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
|
||||
|
||||
HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1
|
||||
|
@ -1,4 +1,4 @@
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
|
||||
FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8
|
||||
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
|
||||
|
||||
COPY WordCount.jar /opt/hadoop/applications/WordCount.jar
|
||||
|
Loading…
x
Reference in New Issue
Block a user