diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..f74842a --- /dev/null +++ b/Makefile @@ -0,0 +1,20 @@ +DOCKER_NETWORK = docker-hadoop_default +ENV_FILE = hadoop.env +current_branch := $(shell git rev-parse --abbrev-ref HEAD) +build: + docker build -t bde2020/hadoop-base:$(current_branch) ./base + docker build -t bde2020/hadoop-namenode:$(current_branch) ./namenode + docker build -t bde2020/hadoop-datanode:$(current_branch) ./datanode + docker build -t bde2020/hadoop-resourcemanager:$(current_branch) ./resourcemanager + docker build -t bde2020/hadoop-nodemanager:$(current_branch) ./nodemanager + docker build -t bde2020/hadoop-historyserver:$(current_branch) ./historyserver + docker build -t bde2020/hadoop-submit:$(current_branch) ./submit + +wordcount: + docker build -t hadoop-wordcount ./submit + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -mkdir -p /input/ + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -copyFromLocal /opt/hadoop-3.1.1/README.txt /input/ + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} hadoop-wordcount + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -cat /output/* + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /output + docker run --network ${DOCKER_NETWORK} --env-file ${ENV_FILE} bde2020/hadoop-base:$(current_branch) hdfs dfs -rm -r /input diff --git a/README.md b/README.md index a998ae1..e836e34 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,12 @@ # Changes -Version 1.1.0 introduces healthchecks for the containers. +Version 2.0.0 introduces uses wait_for_it script for the cluster startup # Hadoop Docker ## Supported Hadoop Versions -* 2.7.1 with OpenJDK 7 -* 2.7.1 with OpenJDK 8 +See repository branches for supported hadoop versions ## Quick Start @@ -17,13 +16,23 @@ To deploy an example HDFS cluster, run: docker-compose up ``` +Run example wordcount job: +``` + make wordcount +``` + +Or deploy in swarm: +``` +docker stack deploy -c docker-compose-v3.yml hadoop +``` + `docker-compose` creates a docker network that can be found by running `docker network list`, e.g. `dockerhadoop_default`. Run `docker network inspect` on the network (e.g. `dockerhadoop_default`) to find the IP the hadoop interfaces are published on. Access these interfaces with the following URLs: -* Namenode: http://:50070/dfshealth.html#tab-overview +* Namenode: http://:9870/dfshealth.html#tab-overview * History server: http://:8188/applicationhistory -* Datanode: http://:50075/ +* Datanode: http://:9864/ * Nodemanager: http://:8042/node * Resource manager: http://:8088/ @@ -49,5 +58,6 @@ The available configurations are: * /etc/hadoop/yarn-site.xml YARN_CONF * /etc/hadoop/httpfs-site.xml HTTPFS_CONF * /etc/hadoop/kms-site.xml KMS_CONF +* /etc/hadoop/mapred-site.xml MAPRED_CONF If you need to extend some other configuration file, refer to base/entrypoint.sh bash script. diff --git a/base/Dockerfile b/base/Dockerfile index db2e51b..951e36f 100644 --- a/base/Dockerfile +++ b/base/Dockerfile @@ -3,48 +3,16 @@ MAINTAINER Ivan Ermilov RUN echo "deb http://ftp.debian.org/debian jessie-backports main" >> /etc/apt/sources.list -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends openjdk-8-jdk +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -t jessie-backports -y --no-install-recommends openjdk-8-jdk ENV JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl netcat -RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \ - 07617D4968B34D8F13D56E20BE5AAA0BA210C095 \ - 2CAC83124870D88586166115220F69801F27E622 \ - 4B96409A098DBD511DF2BC18DBAF69BEA7239D59 \ - 9DD955653083EFED6171256408458C39E964B5FF \ - B6B3F7EDA5BA7D1E827DE5180DFF492D8EE2F25C \ - 6A67379BEFC1AE4D5595770A34005598B8F47547 \ - 47660BC98BC433F01E5C90581209E7F13D0C92B9 \ - CE83449FDC6DACF9D24174DCD1F99F6EE3CD2163 \ - A11DF05DEA40DA19CE4B43C01214CF3F852ADB85 \ - 686E5EDF04A4830554160910DF0F5BBC30CD0996 \ - 5BAE7CB144D05AD1BB1C47C75C6CC6EFABE49180 \ - AF7610D2E378B33AB026D7574FB955854318F669 \ - 6AE70A2A38F466A5D683F939255ADF56C36C5F0F \ - 70F7AB3B62257ABFBD0618D79FDB12767CC7352A \ - 842AAB2D0BC5415B4E19D429A342433A56D8D31A \ - 1B5D384B734F368052862EB55E43CAB9AEC77EAF \ - 785436A782586B71829C67A04169AA27ECB31663 \ - 5E49DA09E2EC9950733A4FF48F1895E97869A2FB \ - A13B3869454536F1852C17D0477E02D33DD51430 \ - A6220FFCC86FE81CE5AAC880E3814B59E4E11856 \ - EFE2E7C571309FE00BEBA78D5E314EEF7340E1CB \ - EB34498A9261F343F09F60E0A9510905F0B000F0 \ - 3442A6594268AC7B88F5C1D25104A731B021B57F \ - 6E83C32562C909D289E6C3D98B25B9B71EFF7770 \ - E9216532BF11728C86A11E3132CF4BF4E72E74D3 \ - E8966520DA24E9642E119A5F13971DA39475BD5D \ - 1D369094D4CFAC140E0EF05E992230B1EB8C6EFA \ - A312CE6A1FA98892CB2C44EBA79AB712DE5868E6 \ - 0445B7BFC4515847C157ECD16BA72FF1C99785DE \ - B74F188889D159F3D7E64A7F348C6D7A0DCED714 \ - 4A6AC5C675B6155682729C9E08D51A0A7501105C \ - 8B44A05C308955D191956559A5CEE20A90348D47 +RUN curl -O https://dist.apache.org/repos/dist/release/hadoop/common/KEYS -RUN gpg --keyserver pool.sks-keyservers.net --recv-key C36C5F0F +RUN gpg --import KEYS -ENV HADOOP_VERSION 2.7.1 +ENV HADOOP_VERSION 3.1.1 ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz RUN set -x \ && curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \ @@ -52,9 +20,8 @@ RUN set -x \ && gpg --verify /tmp/hadoop.tar.gz.asc \ && tar -xvf /tmp/hadoop.tar.gz -C /opt/ \ && rm /tmp/hadoop.tar.gz* - + RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop -RUN cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs RUN mkdir /hadoop-data diff --git a/base/entrypoint.sh b/base/entrypoint.sh index eb061b0..3b16fed 100644 --- a/base/entrypoint.sh +++ b/base/entrypoint.sh @@ -36,6 +36,7 @@ configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF configure /etc/hadoop/yarn-site.xml yarn YARN_CONF configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF configure /etc/hadoop/kms-site.xml kms KMS_CONF +configure /etc/hadoop/mapred-site.xml mapred MAPRED_CONF if [ "$MULTIHOMED_NETWORK" = "1" ]; then echo "Configuring for multihomed network" @@ -78,4 +79,39 @@ if [ -n "$GANGLIA_HOST" ]; then done > /etc/hadoop/hadoop-metrics2.properties fi +function wait_for_it() +{ + local serviceport=$1 + local service=${serviceport%%:*} + local port=${serviceport#*:} + local retry_seconds=5 + local max_try=100 + let i=1 + + nc -z $service $port + result=$? + + until [ $result -eq 0 ]; do + echo "[$i/$max_try] check for ${service}:${port}..." + echo "[$i/$max_try] ${service}:${port} is not available yet" + if (( $i == $max_try )); then + echo "[$i/$max_try] ${service}:${port} is still not available; giving up after ${max_try} tries. :/" + exit 1 + fi + + echo "[$i/$max_try] try in ${retry_seconds}s once again ..." + let "i++" + sleep $retry_seconds + + nc -z $service $port + result=$? + done + echo "[$i/$max_try] $service:${port} is available." +} + +for i in ${SERVICE_PRECONDITION[@]} +do + wait_for_it ${i} +done + exec $@ diff --git a/datanode/Dockerfile b/datanode/Dockerfile index 37aad11..1e5f510 100644 --- a/datanode/Dockerfile +++ b/datanode/Dockerfile @@ -1,7 +1,7 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov -HEALTHCHECK CMD curl -f http://localhost:50075/ || exit 1 +HEALTHCHECK CMD curl -f http://localhost:9864/ || exit 1 ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data RUN mkdir -p /hadoop/dfs/data @@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/data ADD run.sh /run.sh RUN chmod a+x /run.sh -EXPOSE 50075 +EXPOSE 9864 CMD ["/run.sh"] diff --git a/docker-compose-local.yml b/docker-compose-local.yml deleted file mode 100644 index 8de012b..0000000 --- a/docker-compose-local.yml +++ /dev/null @@ -1,95 +0,0 @@ -version: "2" - -services: - namenode: - build: ./namenode - hostname: namenode - container_name: namenode - volumes: - - ./data/namenode:/hadoop/dfs/name - environment: - - CLUSTER_NAME=test - env_file: - - ./hadoop.env - - resourcemanager: - build: ./resourcemanager - hostname: resourcemanager - container_name: resourcemanager - depends_on: - - "namenode" - links: - - "namenode" - ports: - - "58088:8088" - env_file: - - ./hadoop.env - - historyserver: - build: ./historyserver - hostname: historyserver - container_name: historyserver - volumes: - - ./data/historyserver:/hadoop/yarn/timeline - depends_on: - - "namenode" - links: - - "namenode" - ports: - - "58188:8188" - env_file: - - ./hadoop.env - - nodemanager1: - build: ./nodemanager - hostname: nodemanager1 - container_name: nodemanager1 - depends_on: - - "namenode" - - "resourcemanager" - links: - - "namenode" - - "resourcemanager" - ports: - - "58042:8042" - env_file: - - ./hadoop.env - - datanode1: - build: ./datanode - hostname: datanode1 - container_name: datanode1 - depends_on: - - "namenode" - links: - - "namenode" - volumes: - - ./data/datanode1:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode2: - build: ./datanode - hostname: datanode2 - container_name: datanode2 - depends_on: - - "namenode" - links: - - "namenode" - volumes: - - ./data/datanode2:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode3: - build: ./datanode - hostname: datanode3 - container_name: datanode3 - depends_on: - - "namenode" - links: - - "namenode" - volumes: - - ./data/datanode3:/hadoop/dfs/data - env_file: - - ./hadoop.env diff --git a/docker-compose-nginx.yml b/docker-compose-nginx.yml deleted file mode 100644 index 688dba4..0000000 --- a/docker-compose-nginx.yml +++ /dev/null @@ -1,71 +0,0 @@ -nginx: - build: ./nginx - hostname: nginx - net: hadoop - container_name: nginx - ports: - - "8080:80" - -namenode: - image: bde2020/hadoop-namenode:1.0.0 - hostname: namenode - container_name: namenode - domainname: hadoop - net: hadoop - volumes: - - ./data/namenode:/hadoop/dfs/name - environment: - - CLUSTER_NAME=test - env_file: - - ./hadoop.env - -resourcemanager: - image: bde2020/hadoop-resourcemanager:1.0.0 - hostname: resourcemanager - container_name: resourcemanager - domainname: hadoop - net: hadoop - env_file: - - ./hadoop.env - -historyserver: - image: bde2020/hadoop-historyserver:1.0.0 - hostname: historyserver - container_name: historyserver - domainname: hadoop - net: hadoop - volumes: - - ./data/historyserver:/hadoop/yarn/timeline - env_file: - - ./hadoop.env - -nodemanager1: - image: bde2020/hadoop-nodemanager:1.0.0 - hostname: nodemanager1 - container_name: nodemanager1 - domainname: hadoop - net: hadoop - env_file: - - ./hadoop.env - -datanode1: - image: bde2020/hadoop-datanode:1.0.0 - hostname: datanode1 - container_name: datanode1 - domainname: hadoop - net: hadoop - volumes: - - ./data/datanode1:/hadoop/dfs/data - env_file: - - ./hadoop.env - -datanode2: - image: bde2020/hadoop-datanode:1.0.0 - hostname: datanode2 - container_name: datanode2 - domainname: hadoop - net: hadoop - volumes: - - ./data/datanode2:/hadoop/dfs/data - env_file: - - ./hadoop.env diff --git a/docker-compose-v3.yml b/docker-compose-v3.yml new file mode 100644 index 0000000..e771819 --- /dev/null +++ b/docker-compose-v3.yml @@ -0,0 +1,110 @@ +version: '3' + +services: + namenode: + image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8 + networks: + - hbase + volumes: + - namenode:/hadoop/dfs/name + environment: + - CLUSTER_NAME=test + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + labels: + traefik.docker.network: hbase + traefik.port: 50070 + + datanode: + image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8 + networks: + - hbase + volumes: + - datanode:/hadoop/dfs/data + env_file: + - ./hadoop.env + environment: + SERVICE_PRECONDITION: "namenode:50070" + deploy: + mode: global + restart_policy: + condition: on-failure + labels: + traefik.docker.network: hbase + traefik.port: 50075 + + resourcemanager: + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8 + networks: + - hbase + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075" + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + restart_policy: + condition: on-failure + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + labels: + traefik.docker.network: hbase + traefik.port: 8088 + healthcheck: + disable: true + + nodemanager: + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8 + networks: + - hbase + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + env_file: + - ./hadoop.env + deploy: + mode: global + restart_policy: + condition: on-failure + labels: + traefik.docker.network: hbase + traefik.port: 8042 + + historyserver: + image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8 + networks: + - hbase + volumes: + - hadoop_historyserver:/hadoop/yarn/timeline + environment: + SERVICE_PRECONDITION: "namenode:50070 datanode:50075 resourcemanager:8088" + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + labels: + traefik.docker.network: hbase + traefik.port: 8188 + +volumes: + datanode: + namenode: + hadoop_historyserver: + +networks: + hbase: + external: + name: hbase diff --git a/docker-compose.yml b/docker-compose.yml index bef8ae7..634a929 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,8 @@ -version: "2" +version: "3" services: namenode: - image: bde2020/hadoop-namenode:1.1.0-hadoop2.7.1-java8 + image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8 container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name @@ -10,72 +10,44 @@ services: - CLUSTER_NAME=test env_file: - ./hadoop.env + + datanode: + image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8 + container_name: datanode + volumes: + - hadoop_datanode:/hadoop/dfs/data + environment: + SERVICE_PRECONDITION: "namenode:9870" + env_file: + - ./hadoop.env resourcemanager: - image: bde2020/hadoop-resourcemanager:1.1.0-hadoop2.7.1-java8 + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8 container_name: resourcemanager - depends_on: - - namenode - - datanode1 - - datanode2 + environment: + SERVICE_PRECONDITION: "namenode:9870 datanode:9864" + env_file: + - ./hadoop.env + + nodemanager1: + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8 + container_name: nodemanager + environment: + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088" env_file: - ./hadoop.env historyserver: - image: bde2020/hadoop-historyserver:1.1.0-hadoop2.7.1-java8 + image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8 container_name: historyserver - depends_on: - - namenode - - datanode1 - - datanode2 + environment: + SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088" volumes: - hadoop_historyserver:/hadoop/yarn/timeline env_file: - ./hadoop.env - nodemanager1: - image: bde2020/hadoop-nodemanager:1.1.0-hadoop2.7.1-java8 - container_name: nodemanager1 - depends_on: - - namenode - - datanode1 - - datanode2 - env_file: - - ./hadoop.env - - datanode1: - image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8 - container_name: datanode1 - depends_on: - - namenode - volumes: - - hadoop_datanode1:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode2: - image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8 - container_name: datanode2 - depends_on: - - namenode - volumes: - - hadoop_datanode2:/hadoop/dfs/data - env_file: - - ./hadoop.env - - datanode3: - image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8 - container_name: datanode3 - depends_on: - - namenode - volumes: - - hadoop_datanode3:/hadoop/dfs/data - env_file: - - ./hadoop.env - volumes: hadoop_namenode: - hadoop_datanode1: - hadoop_datanode2: - hadoop_datanode3: + hadoop_datanode: hadoop_historyserver: diff --git a/hadoop.env b/hadoop.env index 304592c..3366abf 100644 --- a/hadoop.env +++ b/hadoop.env @@ -1,22 +1,43 @@ -CORE_CONF_fs_defaultFS=hdfs://namenode:8020 +CORE_CONF_fs_defaultFS=hdfs://namenode:9000 CORE_CONF_hadoop_http_staticuser_user=root CORE_CONF_hadoop_proxyuser_hue_hosts=* CORE_CONF_hadoop_proxyuser_hue_groups=* +CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec HDFS_CONF_dfs_webhdfs_enabled=true HDFS_CONF_dfs_permissions_enabled=false +HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false YARN_CONF_yarn_log___aggregation___enable=true +YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ YARN_CONF_yarn_resourcemanager_recovery_enabled=true YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore +YARN_CONF_yarn_resourcemanager_scheduler_class=org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler +YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___mb=8192 +YARN_CONF_yarn_scheduler_capacity_root_default_maximum___allocation___vcores=4 YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate -YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs -YARN_CONF_yarn_log_server_url=http://historyserver:8188/applicationhistory/logs/ -YARN_CONF_yarn_timeline___service_enabled=true -YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true YARN_CONF_yarn_resourcemanager_hostname=resourcemanager -YARN_CONF_yarn_timeline___service_hostname=historyserver YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032 YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030 -YARN_CONF_yarn_resourcemanager_resource___tracker_address=resourcemanager:8031 +YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031 +YARN_CONF_yarn_timeline___service_enabled=true +YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true +YARN_CONF_yarn_timeline___service_hostname=historyserver +YARN_CONF_mapreduce_map_output_compress=true +YARN_CONF_mapred_map_output_compress_codec=org.apache.hadoop.io.compress.SnappyCodec +YARN_CONF_yarn_nodemanager_resource_memory___mb=16384 +YARN_CONF_yarn_nodemanager_resource_cpu___vcores=8 +YARN_CONF_yarn_nodemanager_disk___health___checker_max___disk___utilization___per___disk___percentage=98.5 +YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs +YARN_CONF_yarn_nodemanager_aux___services=mapreduce_shuffle + +MAPRED_CONF_mapreduce_framework_name=yarn +MAPRED_CONF_mapred_child_java_opts=-Xmx4096m +MAPRED_CONF_mapreduce_map_memory_mb=4096 +MAPRED_CONF_mapreduce_reduce_memory_mb=8192 +MAPRED_CONF_mapreduce_map_java_opts=-Xmx3072m +MAPRED_CONF_mapreduce_reduce_java_opts=-Xmx6144m +MAPRED_CONF_yarn_app_mapreduce_am_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ +MAPRED_CONF_mapreduce_map_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ +MAPRED_CONF_mapreduce_reduce_env=HADOOP_MAPRED_HOME=/opt/hadoop-3.1.1/ diff --git a/historyserver/Dockerfile b/historyserver/Dockerfile index 55da9e5..a8b794c 100644 --- a/historyserver/Dockerfile +++ b/historyserver/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.0.0 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8188/ || exit 1 diff --git a/namenode/Dockerfile b/namenode/Dockerfile index 6d3d8f5..727c7d8 100644 --- a/namenode/Dockerfile +++ b/namenode/Dockerfile @@ -1,7 +1,7 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov -HEALTHCHECK CMD curl -f http://localhost:50070/ || exit 1 +HEALTHCHECK CMD curl -f http://localhost:9870/ || exit 1 ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name RUN mkdir -p /hadoop/dfs/name @@ -10,6 +10,6 @@ VOLUME /hadoop/dfs/name ADD run.sh /run.sh RUN chmod a+x /run.sh -EXPOSE 50070 +EXPOSE 9870 CMD ["/run.sh"] diff --git a/nodemanager/Dockerfile b/nodemanager/Dockerfile index 1e44977..c40fda6 100644 --- a/nodemanager/Dockerfile +++ b/nodemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8042/ || exit 1 diff --git a/resourcemanager/Dockerfile b/resourcemanager/Dockerfile index 710915f..9260a83 100644 --- a/resourcemanager/Dockerfile +++ b/resourcemanager/Dockerfile @@ -1,4 +1,4 @@ -FROM bde2020/hadoop-base:1.1.0-hadoop2.7.1-java8 +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 MAINTAINER Ivan Ermilov HEALTHCHECK CMD curl -f http://localhost:8088/ || exit 1 diff --git a/submit/Dockerfile b/submit/Dockerfile new file mode 100644 index 0000000..510e557 --- /dev/null +++ b/submit/Dockerfile @@ -0,0 +1,13 @@ +FROM bde2020/hadoop-base:2.0.0-hadoop3.1.1-java8 +MAINTAINER Ivan Ermilov + +COPY WordCount.jar /opt/hadoop/applications/WordCount.jar + +ENV JAR_FILEPATH="/opt/hadoop/applications/WordCount.jar" +ENV CLASS_TO_RUN="WordCount" +ENV PARAMS="/input /output" + +ADD run.sh /run.sh +RUN chmod a+x /run.sh + +CMD ["/run.sh"] diff --git a/submit/WordCount.jar b/submit/WordCount.jar new file mode 100644 index 0000000..7b6a178 Binary files /dev/null and b/submit/WordCount.jar differ diff --git a/submit/run.sh b/submit/run.sh new file mode 100644 index 0000000..5ebe79e --- /dev/null +++ b/submit/run.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +$HADOOP_PREFIX/bin/hadoop jar $JAR_FILEPATH $CLASS_TO_RUN $PARAMS