From 2b3b4b120516c8265f062d4de443d0c708a4b881 Mon Sep 17 00:00:00 2001
From: Giannis Mouchakis <gmouchakis@iit.demokritos.gr>
Date: Thu, 10 Mar 2016 02:40:42 +0200
Subject: [PATCH] Added support for running hadoop in single node mode. Changed
 README accordingly

---
 README.md              | 41 ++++++++++++++++++++++++++++-------------
 hadoop-base/Dockerfile |  2 ++
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/README.md b/README.md
index 44148b9..d0a2026 100644
--- a/README.md
+++ b/README.md
@@ -1,27 +1,42 @@
-This is a Hadoop cluster running in docker containers. The namenode and datanodes run in different containers.
+# Hadoop Docker
 
-The cluster by default uses data replication "2". To change it edit the hdfs-site.xml file.
+This repository provides Hadoop in Docker containers. You can either run Hadoop in a single node or create a cluster.
+
+The deployed Hadoop uses data replication "2". To change it edit the hdfs-site.xml file.
+
+All data are stored in /hdfs-data, so to store data in a host directory run the container using "-v /path/to/host:/hdfs-data".
+By default the container formats the namenode directory only if not exists (hdfs namenode -format -nonInteractive).
+If you want to mount an external directory that already contains a namenode directory and format it you have to first delete it manually.
+
+## Single node mode
+
+To deploy a single Hadoop node run
+
+    docker run -h namenode bde2020/hadoop-base
+
+To store data in a host directory run the container as as
+
+    docker run -h namenode -v /path/to/host:/hdfs-data bde2020/hadoop-base
+
+## Cluster mode
+
+The namenode runs in a seperate container than the datanodes.
 
 To start the namenode run 
 
     docker run --name namenode -h namenode bde2020/hadoop-namenode
 
-To start two datanodes on the same host run
+To add a datanode to the cluster run
 
-    docker run --name datanode1 --link namenode:namenode bde2020/hadoop-datanode
-    docker run --name datanode2 --link namenode:namenode bde2020/hadoop-datanode
-    
-More info is comming soon on how to run hadoop docker using docker network and docker swarm
+    docker run --link namenode:namenode bde2020/hadoop-datanode
 
-All data are stored in /hdfs-data, so to store data in a host directory datanodes as
+Use the same command to add more datanodes to the cluster
 
-    docker run --name datanode1 --link namenode:namenode -v /path/to/host:/hdfs-data bde2020/hadoop-datanode
-    docker run --name datanode2 --link namenode:namenode -v /path/to/host:/hdfs-data bde2020/hadoop-datanode
+More info is comming soon on how to deploy a Hadoop cluster using docker network and docker swarm
 
-By default the namenode formats the namenode directory only if not exists (hdfs namenode -format -nonInteractive). 
-If you want to mount an external directory that already contains a namenode directory and format it you have to first delete it manually.
+# access the namenode
 
-Hadoop namenode listens on 
+The namenode listens on 
 
     hdfs://namenode:8020
     
diff --git a/hadoop-base/Dockerfile b/hadoop-base/Dockerfile
index c4c4069..f775c73 100644
--- a/hadoop-base/Dockerfile
+++ b/hadoop-base/Dockerfile
@@ -26,3 +26,5 @@ RUN mv hadoop-$HADOOP_VERSION $HADOOP_PREFIX
 # add configuration files
 ADD core-site.xml $HADOOP_CONF_DIR/core-site.xml
 ADD hdfs-site.xml $HADOOP_CONF_DIR/hdfs-site.xml
+
+CMD hdfs namenode -format -nonInteractive & hdfs namenode && hdfs datanode