From 2b3b4b120516c8265f062d4de443d0c708a4b881 Mon Sep 17 00:00:00 2001 From: Giannis Mouchakis Date: Thu, 10 Mar 2016 02:40:42 +0200 Subject: [PATCH] Added support for running hadoop in single node mode. Changed README accordingly --- README.md | 41 ++++++++++++++++++++++++++++------------- hadoop-base/Dockerfile | 2 ++ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 44148b9..d0a2026 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,42 @@ -This is a Hadoop cluster running in docker containers. The namenode and datanodes run in different containers. +# Hadoop Docker -The cluster by default uses data replication "2". To change it edit the hdfs-site.xml file. +This repository provides Hadoop in Docker containers. You can either run Hadoop in a single node or create a cluster. + +The deployed Hadoop uses data replication "2". To change it edit the hdfs-site.xml file. + +All data are stored in /hdfs-data, so to store data in a host directory run the container using "-v /path/to/host:/hdfs-data". +By default the container formats the namenode directory only if not exists (hdfs namenode -format -nonInteractive). +If you want to mount an external directory that already contains a namenode directory and format it you have to first delete it manually. + +## Single node mode + +To deploy a single Hadoop node run + + docker run -h namenode bde2020/hadoop-base + +To store data in a host directory run the container as as + + docker run -h namenode -v /path/to/host:/hdfs-data bde2020/hadoop-base + +## Cluster mode + +The namenode runs in a seperate container than the datanodes. To start the namenode run docker run --name namenode -h namenode bde2020/hadoop-namenode -To start two datanodes on the same host run +To add a datanode to the cluster run - docker run --name datanode1 --link namenode:namenode bde2020/hadoop-datanode - docker run --name datanode2 --link namenode:namenode bde2020/hadoop-datanode - -More info is comming soon on how to run hadoop docker using docker network and docker swarm + docker run --link namenode:namenode bde2020/hadoop-datanode -All data are stored in /hdfs-data, so to store data in a host directory datanodes as +Use the same command to add more datanodes to the cluster - docker run --name datanode1 --link namenode:namenode -v /path/to/host:/hdfs-data bde2020/hadoop-datanode - docker run --name datanode2 --link namenode:namenode -v /path/to/host:/hdfs-data bde2020/hadoop-datanode +More info is comming soon on how to deploy a Hadoop cluster using docker network and docker swarm -By default the namenode formats the namenode directory only if not exists (hdfs namenode -format -nonInteractive). -If you want to mount an external directory that already contains a namenode directory and format it you have to first delete it manually. +# access the namenode -Hadoop namenode listens on +The namenode listens on hdfs://namenode:8020 diff --git a/hadoop-base/Dockerfile b/hadoop-base/Dockerfile index c4c4069..f775c73 100644 --- a/hadoop-base/Dockerfile +++ b/hadoop-base/Dockerfile @@ -26,3 +26,5 @@ RUN mv hadoop-$HADOOP_VERSION $HADOOP_PREFIX # add configuration files ADD core-site.xml $HADOOP_CONF_DIR/core-site.xml ADD hdfs-site.xml $HADOOP_CONF_DIR/hdfs-site.xml + +CMD hdfs namenode -format -nonInteractive & hdfs namenode && hdfs datanode