From b97e590f6f21fc5b005919b1c97ec7de5b29c57e Mon Sep 17 00:00:00 2001 From: Giannis Mouchakis Date: Wed, 9 Mar 2016 17:37:43 +0200 Subject: [PATCH] first version of hadoop-base hadoop-namenode and hadoop-datanode --- README.md | 30 +++++++++++++++++++++++++++- hadoop-base/Dockerfile | 28 ++++++++++++++++++++++++++ hadoop-base/core-site.xml | 24 +++++++++++++++++++++++ hadoop-base/hdfs-site.xml | 40 ++++++++++++++++++++++++++++++++++++++ hadoop-datanote/Dockerfile | 5 +++++ hadoop-nanemode/Dockerfile | 5 +++++ 6 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 hadoop-base/Dockerfile create mode 100644 hadoop-base/core-site.xml create mode 100644 hadoop-base/hdfs-site.xml create mode 100644 hadoop-datanote/Dockerfile create mode 100644 hadoop-nanemode/Dockerfile diff --git a/README.md b/README.md index c2c2c41..5ce09d7 100644 --- a/README.md +++ b/README.md @@ -1 +1,29 @@ -this will be the repo for docker-hadoop +This is a docker container for hadoop. + +By default it uses data replication "2". To change it edit the hdfs-site.xml file. + +To start the namenode run + + docker run --name namenode -h bde2020/hadoop-namenode + +To start two datanodes on the same host run + + docker run --name datanode1 --link namenode:namenode bde2020/hadoop-datanode + docker run --name datanode2 --link namenode:namenode bde2020/hadoop-datanode + +More info is comming soon on how to run hadoop docker using docker network and docker swarm + +All data are stored in /hdfs-data, so to store data in a host directory datanodes as + + docker run --name datanode1 --link namenode:namenode -v /path/to/host:/hdfs-data bde2020/hadoop-datanode + docker run --name datanode2 --link namenode:namenode -v /path/to/host:/hdfs-data bde2020/hadoop-datanode + +By default the namenode formats the namenode directory only if not exists (hdfs namenode -format -nonInteractive). +If you want to mount an external directory that already contains a namenode directory and format it you have to first delete it manually. + +Hadoop namenode listens on + + hdfs://namenode:8020 + +To use access the namenode from another container link it using "--link namenode:namenode" and then use the afformentioned URL. +More info on how to access it using docker network coming soon. \ No newline at end of file diff --git a/hadoop-base/Dockerfile b/hadoop-base/Dockerfile new file mode 100644 index 0000000..c4c4069 --- /dev/null +++ b/hadoop-base/Dockerfile @@ -0,0 +1,28 @@ +FROM java:8-jre + +MAINTAINER Yiannis Mouchakis + +# define hadoop version +ENV HADOOP_VERSION 2.7.1 + +# Hadoop env variables +ENV HADOOP_PREFIX /opt/hadoop +ENV HADOOP_CONF_DIR $HADOOP_PREFIX/conf +ENV PATH $PATH:$HADOOP_PREFIX/bin +ENV PATH $PATH:$HADOOP_PREFIX/sbin + +RUN apt-get update && apt-get install -y \ + wget \ + tar \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# deploy hadoop +RUN wget http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz +RUN tar -zxf /hadoop-$HADOOP_VERSION.tar.gz +RUN rm /hadoop-$HADOOP_VERSION.tar.gz +RUN mv hadoop-$HADOOP_VERSION $HADOOP_PREFIX + +# add configuration files +ADD core-site.xml $HADOOP_CONF_DIR/core-site.xml +ADD hdfs-site.xml $HADOOP_CONF_DIR/hdfs-site.xml diff --git a/hadoop-base/core-site.xml b/hadoop-base/core-site.xml new file mode 100644 index 0000000..0a336c8 --- /dev/null +++ b/hadoop-base/core-site.xml @@ -0,0 +1,24 @@ + + + + + + + + + fs.defaultFS + hdfs://namenode:8020 + + diff --git a/hadoop-base/hdfs-site.xml b/hadoop-base/hdfs-site.xml new file mode 100644 index 0000000..f453848 --- /dev/null +++ b/hadoop-base/hdfs-site.xml @@ -0,0 +1,40 @@ + + + + + + + + dfs.replication + 2 + + + dfs.datanode.data.dir + /hdfs-data/datanode + + + dfs.namenode.name.dir + /hdfs-data/namenode + + + dfs.namenode.datanode.registration.ip-hostname-check + false + + + dfs.permissions.enabled + false + + + diff --git a/hadoop-datanote/Dockerfile b/hadoop-datanote/Dockerfile new file mode 100644 index 0000000..582c240 --- /dev/null +++ b/hadoop-datanote/Dockerfile @@ -0,0 +1,5 @@ +FROM bde2020/hadoop-base + +MAINTAINER Yiannis Mouchakis + +CMD hdfs datanode \ No newline at end of file diff --git a/hadoop-nanemode/Dockerfile b/hadoop-nanemode/Dockerfile new file mode 100644 index 0000000..cd215c7 --- /dev/null +++ b/hadoop-nanemode/Dockerfile @@ -0,0 +1,5 @@ +FROM bde2020/hadoop-base + +MAINTAINER Yiannis Mouchakis + +CMD hdfs namenode -format -nonInteractive & hdfs namenode \ No newline at end of file