integrated startup sequence controlled by netcat

This commit is contained in:
Ivan Ermilov 2017-10-24 10:53:08 +02:00
parent 75eb1bacc2
commit 7225e1e4f7
14 changed files with 98 additions and 239 deletions

View File

@ -1,4 +1,4 @@
DOCKER_NETWORK = hadoop DOCKER_NETWORK = dockerhadoop_default
ENV_FILE = hadoop.env ENV_FILE = hadoop.env
current_branch := $(shell git rev-parse --abbrev-ref HEAD) current_branch := $(shell git rev-parse --abbrev-ref HEAD)
build: build:
@ -13,7 +13,7 @@ build:
wordcount: wordcount:
docker build -t hadoop-wordcount ./submit docker build -t hadoop-wordcount ./submit
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/ docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.8.0/README.txt /input/ docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-2.7.4/README.txt /input/
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/* docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/*
docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output

View File

@ -29,10 +29,11 @@ The available configurations are:
* /etc/hadoop/yarn-site.xml YARN_CONF * /etc/hadoop/yarn-site.xml YARN_CONF
* /etc/hadoop/httpfs-site.xml HTTPFS_CONF * /etc/hadoop/httpfs-site.xml HTTPFS_CONF
* /etc/hadoop/kms-site.xml KMS_CONF * /etc/hadoop/kms-site.xml KMS_CONF
* /etc/hadoop/mapred-site.xml MAPRED_CONF
If you need to extend some other configuration file, refer to base/entrypoint.sh bash script. If you need to extend some other configuration file, refer to base/entrypoint.sh bash script.
After starting the example Hadoop cluster, you should be able to access interfaces of all the components (substitute domain names by IP addresses from ```network inspect hadoop``` command): After starting the example Hadoop cluster, you should be able to access interfaces of all the components (substitute domain names by IP addresses from ```network inspect dockerhadoop_default``` command):
* Namenode: http://namenode:50070/dfshealth.html#tab-overview * Namenode: http://namenode:50070/dfshealth.html#tab-overview
* History server: http://historyserver:8188/applicationhistory * History server: http://historyserver:8188/applicationhistory
* Datanode: http://datanode:50075/ * Datanode: http://datanode:50075/

View File

@ -6,7 +6,7 @@ RUN echo "deb http://ftp.debian.org/debian jessie-backports main" >> /etc/apt/so
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -t jessie-backports -y --no-install-recommends openjdk-8-jdk RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -t jessie-backports -y --no-install-recommends openjdk-8-jdk
ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl netcat
RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \
07617D4968B34D8F13D56E20BE5AAA0BA210C095 \ 07617D4968B34D8F13D56E20BE5AAA0BA210C095 \

View File

@ -79,4 +79,39 @@ if [ -n "$GANGLIA_HOST" ]; then
done > /etc/hadoop/hadoop-metrics2.properties done > /etc/hadoop/hadoop-metrics2.properties
fi fi
function wait_for_it()
{
local serviceport=$1
local service=${serviceport%%:*}
local port=${serviceport#*:}
local retry_seconds=5
local max_try=100
let i=1
nc -z $service $port
result=$?
until [ $result -eq 0 ]; do
echo "[$i/$max_try] check for ${service}:${port}..."
echo "[$i/$max_try] ${service}:${port} is not available yet"
if (( $i == $max_try )); then
echo "[$i/$max_try] ${service}:${port} is still not available; giving up after ${max_try} tries. :/"
exit 1
fi
echo "[$i/$max_try] try in ${retry_seconds}s once again ..."
let "i++"
sleep $retry_seconds
nc -z $service $port
result=$?
done
echo "[$i/$max_try] $service:${port} is available."
}
for i in "${SERVICE_PRECONDITION[@]}"
do
wait_for_it ${i}
done
exec $@ exec $@

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1

View File

@ -1,95 +0,0 @@
version: "2"
services:
namenode:
build: ./namenode
hostname: namenode
container_name: namenode
volumes:
- ./data/namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop.env
resourcemanager:
build: ./resourcemanager
hostname: resourcemanager
container_name: resourcemanager
depends_on:
- "namenode"
links:
- "namenode"
ports:
- "58088:8088"
env_file:
- ./hadoop.env
historyserver:
build: ./historyserver
hostname: historyserver
container_name: historyserver
volumes:
- ./data/historyserver:/hadoop/yarn/timeline
depends_on:
- "namenode"
links:
- "namenode"
ports:
- "58188:8188"
env_file:
- ./hadoop.env
nodemanager1:
build: ./nodemanager
hostname: nodemanager1
container_name: nodemanager1
depends_on:
- "namenode"
- "resourcemanager"
links:
- "namenode"
- "resourcemanager"
ports:
- "58042:8042"
env_file:
- ./hadoop.env
datanode1:
build: ./datanode
hostname: datanode1
container_name: datanode1
depends_on:
- "namenode"
links:
- "namenode"
volumes:
- ./data/datanode1:/hadoop/dfs/data
env_file:
- ./hadoop.env
datanode2:
build: ./datanode
hostname: datanode2
container_name: datanode2
depends_on:
- "namenode"
links:
- "namenode"
volumes:
- ./data/datanode2:/hadoop/dfs/data
env_file:
- ./hadoop.env
datanode3:
build: ./datanode
hostname: datanode3
container_name: datanode3
depends_on:
- "namenode"
links:
- "namenode"
volumes:
- ./data/datanode3:/hadoop/dfs/data
env_file:
- ./hadoop.env

View File

@ -1,71 +0,0 @@
nginx:
build: ./nginx
hostname: nginx
net: hadoop
container_name: nginx
ports:
- "8080:80"
namenode:
image: bde2020/hadoop-namenode:1.0.0
hostname: namenode
container_name: namenode
domainname: hadoop
net: hadoop
volumes:
- ./data/namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop.env
resourcemanager:
image: bde2020/hadoop-resourcemanager:1.0.0
hostname: resourcemanager
container_name: resourcemanager
domainname: hadoop
net: hadoop
env_file:
- ./hadoop.env
historyserver:
image: bde2020/hadoop-historyserver:1.0.0
hostname: historyserver
container_name: historyserver
domainname: hadoop
net: hadoop
volumes:
- ./data/historyserver:/hadoop/yarn/timeline
env_file:
- ./hadoop.env
nodemanager1:
image: bde2020/hadoop-nodemanager:1.0.0
hostname: nodemanager1
container_name: nodemanager1
domainname: hadoop
net: hadoop
env_file:
- ./hadoop.env
datanode1:
image: bde2020/hadoop-datanode:1.0.0
hostname: datanode1
container_name: datanode1
domainname: hadoop
net: hadoop
volumes:
- ./data/datanode1:/hadoop/dfs/data
env_file:
- ./hadoop.env
datanode2:
image: bde2020/hadoop-datanode:1.0.0
hostname: datanode2
container_name: datanode2
domainname: hadoop
net: hadoop
volumes:
- ./data/datanode2:/hadoop/dfs/data
env_file:
- ./hadoop.env

View File

@ -1,8 +1,8 @@
version: "2" version: "3"
services: services:
namenode: namenode:
image: bde2020/hadoop-namenode:1.1.0-hadoop2.7.1-java8 image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
container_name: namenode container_name: namenode
volumes: volumes:
- hadoop_namenode:/hadoop/dfs/name - hadoop_namenode:/hadoop/dfs/name
@ -11,71 +11,43 @@ services:
env_file: env_file:
- ./hadoop.env - ./hadoop.env
datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
container_name: datanode
volumes:
- hadoop_datanode:/hadoop/dfs/data
environment:
SERVICE_PRECONDITION: "namenode:50070"
env_file:
- ./hadoop.env
resourcemanager: resourcemanager:
image: bde2020/hadoop-resourcemanager:1.1.0-hadoop2.7.1-java8 image: bde2020/hadoop-resourcemanager:2.0.0-hadoop2.7.4-java8
container_name: resourcemanager container_name: resourcemanager
depends_on: environment:
- namenode SERVICE_PRECONDITION: "namenode:50070 datanode:50075"
- datanode1 env_file:
- datanode2 - ./hadoop.env
nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop2.7.4-java8
container_name: nodemanager
environment:
SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
env_file: env_file:
- ./hadoop.env - ./hadoop.env
historyserver: historyserver:
image: bde2020/hadoop-historyserver:1.1.0-hadoop2.7.1-java8 image: bde2020/hadoop-historyserver:2.0.0-hadoop2.7.4-java8
container_name: historyserver container_name: historyserver
depends_on: environment:
- namenode SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088"
- datanode1
- datanode2
volumes: volumes:
- hadoop_historyserver:/hadoop/yarn/timeline - hadoop_historyserver:/hadoop/yarn/timeline
env_file: env_file:
- ./hadoop.env - ./hadoop.env
nodemanager1:
image: bde2020/hadoop-nodemanager:1.1.0-hadoop2.7.1-java8
container_name: nodemanager1
depends_on:
- namenode
- datanode1
- datanode2
env_file:
- ./hadoop.env
datanode1:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
container_name: datanode1
depends_on:
- namenode
volumes:
- hadoop_datanode1:/hadoop/dfs/data
env_file:
- ./hadoop.env
datanode2:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
container_name: datanode2
depends_on:
- namenode
volumes:
- hadoop_datanode2:/hadoop/dfs/data
env_file:
- ./hadoop.env
datanode3:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
container_name: datanode3
depends_on:
- namenode
volumes:
- hadoop_datanode3:/hadoop/dfs/data
env_file:
- ./hadoop.env
volumes: volumes:
hadoop_namenode: hadoop_namenode:
hadoop_datanode1: hadoop_datanode:
hadoop_datanode2:
hadoop_datanode3:
hadoop_historyserver: hadoop_historyserver:

View File

@ -1,22 +1,39 @@
CORE_CONF_fs_defaultFS=hdfs://namenode:8020 CORE_CONF_fs_defaultFS=hdfs://namenode:9000
CORE_CONF_hadoop_http_staticuser_user=root CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=* CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=* CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec
HDFS_CONF_dfs_webhdfs_enabled=true HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
YARN_CONF_yarn_scheduler_fair_allow___undeclared___pools=false
YARN_CONF_yarn_scheduler_fair_user___as___default___queue=false
YARN_CONF_yarn_log___aggregation___enable=true YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_resourcemanager_recovery_enabled=true YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_mapreduce_map_output_compress=true
YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec
YARN_CONF_yarn_nodemanager_resource_memory___mb=16384
YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8
YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle
MAPRED_CONF_mapreduce_framework_name=yarn
MAPRED_CONF_mapred_child_java_opts=-Xmx4096m
MAPRED_CONF_mapreduce_map_memory_mb=4096
MAPRED_CONF_mapreduce_reduce_memory_mb=8192
MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m
MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:1.2.0-hadoop2.7.4-java8 FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1

View File

@ -1,4 +1,4 @@
FROM bde2020/hadoop-base:1.2.0-hadoop2.8-java8 FROM bde2020/hadoop-base:2.0.0-hadoop2.7.4-java8
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com> MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
COPY WordCount.jar /opt/hadoop/applications/WordCount.jar COPY WordCount.jar /opt/hadoop/applications/WordCount.jar