merged Hadoop from HDFS Workbench into v1.0.0

This commit is contained in:
Ivan Ermilov 2016-05-11 14:41:37 +02:00
parent c5cd0d46ad
commit b155c89600
19 changed files with 320 additions and 106 deletions

68
base/Dockerfile Normal file
View File

@ -0,0 +1,68 @@
FROM debian:7
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends openjdk-7-jdk
ENV JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64/
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends net-tools curl
RUN gpg --keyserver pool.sks-keyservers.net --recv-keys \
07617D4968B34D8F13D56E20BE5AAA0BA210C095 \
2CAC83124870D88586166115220F69801F27E622 \
4B96409A098DBD511DF2BC18DBAF69BEA7239D59 \
9DD955653083EFED6171256408458C39E964B5FF \
B6B3F7EDA5BA7D1E827DE5180DFF492D8EE2F25C \
6A67379BEFC1AE4D5595770A34005598B8F47547 \
47660BC98BC433F01E5C90581209E7F13D0C92B9 \
CE83449FDC6DACF9D24174DCD1F99F6EE3CD2163 \
A11DF05DEA40DA19CE4B43C01214CF3F852ADB85 \
686E5EDF04A4830554160910DF0F5BBC30CD0996 \
5BAE7CB144D05AD1BB1C47C75C6CC6EFABE49180 \
AF7610D2E378B33AB026D7574FB955854318F669 \
6AE70A2A38F466A5D683F939255ADF56C36C5F0F \
70F7AB3B62257ABFBD0618D79FDB12767CC7352A \
842AAB2D0BC5415B4E19D429A342433A56D8D31A \
1B5D384B734F368052862EB55E43CAB9AEC77EAF \
785436A782586B71829C67A04169AA27ECB31663 \
5E49DA09E2EC9950733A4FF48F1895E97869A2FB \
A13B3869454536F1852C17D0477E02D33DD51430 \
A6220FFCC86FE81CE5AAC880E3814B59E4E11856 \
EFE2E7C571309FE00BEBA78D5E314EEF7340E1CB \
EB34498A9261F343F09F60E0A9510905F0B000F0 \
3442A6594268AC7B88F5C1D25104A731B021B57F \
6E83C32562C909D289E6C3D98B25B9B71EFF7770 \
E9216532BF11728C86A11E3132CF4BF4E72E74D3 \
E8966520DA24E9642E119A5F13971DA39475BD5D \
1D369094D4CFAC140E0EF05E992230B1EB8C6EFA \
A312CE6A1FA98892CB2C44EBA79AB712DE5868E6 \
0445B7BFC4515847C157ECD16BA72FF1C99785DE \
B74F188889D159F3D7E64A7F348C6D7A0DCED714 \
4A6AC5C675B6155682729C9E08D51A0A7501105C \
8B44A05C308955D191956559A5CEE20A90348D47
ENV HADOOP_VERSION 2.7.1
ENV HADOOP_URL https://www.apache.org/dist/hadoop/common/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
RUN set -x \
&& curl -fSL "$HADOOP_URL" -o /tmp/hadoop.tar.gz \
&& curl -fSL "$HADOOP_URL.asc" -o /tmp/hadoop.tar.gz.asc \
&& gpg --verify /tmp/hadoop.tar.gz.asc \
&& tar -xvf /tmp/hadoop.tar.gz -C /opt/ \
&& rm /tmp/hadoop.tar.gz*
RUN ln -s /opt/hadoop-$HADOOP_VERSION/etc/hadoop /etc/hadoop
RUN cp /etc/hadoop/mapred-site.xml.template /etc/hadoop/mapred-site.xml
RUN mkdir /opt/hadoop-$HADOOP_VERSION/logs
RUN mkdir /hadoop-data
ENV HADOOP_PREFIX=/opt/hadoop-$HADOOP_VERSION
ENV HADOOP_CONF_DIR=/etc/hadoop
ENV MULTIHOMED_NETWORK=1
ENV USER=root
ENV PATH $HADOOP_PREFIX/bin/:$PATH
ADD entrypoint.sh /entrypoint.sh
RUN chmod a+x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]

81
base/entrypoint.sh Normal file
View File

@ -0,0 +1,81 @@
#!/bin/bash
# Set some sensible defaults
export CORE_CONF_fs_defaultFS=${CORE_CONF_fs_defaultFS:-hdfs://`hostname -f`:8020}
function addProperty() {
local path=$1
local name=$2
local value=$3
local entry="<property><name>$name</name><value>${value}</value></property>"
local escapedEntry=$(echo $entry | sed 's/\//\\\//g')
sed -i "/<\/configuration>/ s/.*/${escapedEntry}\n&/" $path
}
function configure() {
local path=$1
local module=$2
local envPrefix=$3
local var
local value
echo "Configuring $module"
for c in `printenv | perl -sne 'print "$1 " if m/^${envPrefix}_(.+?)=.*/' -- -envPrefix=$envPrefix`; do
name=`echo ${c} | perl -pe 's/___/-/g; s/__/_/g; s/_/./g'`
var="${envPrefix}_${c}"
value=${!var}
echo " - Setting $name=$value"
addProperty /etc/hadoop/$module-site.xml $name "$value"
done
}
configure /etc/hadoop/core-site.xml core CORE_CONF
configure /etc/hadoop/hdfs-site.xml hdfs HDFS_CONF
configure /etc/hadoop/yarn-site.xml yarn YARN_CONF
configure /etc/hadoop/httpfs-site.xml httpfs HTTPFS_CONF
configure /etc/hadoop/kms-site.xml kms KMS_CONF
if [ "$MULTIHOMED_NETWORK" = "1" ]; then
echo "Configuring for multihomed network"
# HDFS
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.rpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.servicerpc-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.http-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.namenode.https-bind-host 0.0.0.0
addProperty /etc/hadoop/hdfs-site.xml dfs.client.use.datanode.hostname true
addProperty /etc/hadoop/hdfs-site.xml dfs.datanode.use.datanode.hostname true
# YARN
addProperty /etc/hadoop/yarn-site.xml yarn.resourcemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.nodemanager.bind-host 0.0.0.0
addProperty /etc/hadoop/yarn-site.xml yarn.timeline-service.bind-host 0.0.0.0
# MAPRED
addProperty /etc/hadoop/mapred-site.xml yarn.nodemanager.bind-host 0.0.0.0
fi
if [ -n "$GANGLIA_HOST" ]; then
mv /etc/hadoop/hadoop-metrics.properties /etc/hadoop/hadoop-metrics.properties.orig
mv /etc/hadoop/hadoop-metrics2.properties /etc/hadoop/hadoop-metrics2.properties.orig
for module in mapred jvm rpc ugi; do
echo "$module.class=org.apache.hadoop.metrics.ganglia.GangliaContext31"
echo "$module.period=10"
echo "$module.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics.properties
for module in namenode datanode resourcemanager nodemanager mrappmaster jobhistoryserver; do
echo "$module.sink.ganglia.class=org.apache.hadoop.metrics2.sink.ganglia.GangliaSink31"
echo "$module.sink.ganglia.period=10"
echo "$module.sink.ganglia.supportsparse=true"
echo "$module.sink.ganglia.slope=jvm.metrics.gcCount=zero,jvm.metrics.memHeapUsedM=both"
echo "$module.sink.ganglia.dmax=jvm.metrics.threadsBlocked=70,jvm.metrics.memHeapUsedM=40"
echo "$module.sink.ganglia.servers=$GANGLIA_HOST:8649"
done > /etc/hadoop/hadoop-metrics2.properties
fi
exec $@

11
datanode/Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM bde2020/hadoop-base:1.0.0
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
ENV HDFS_CONF_dfs_datanode_data_dir=file:///hadoop/dfs/data
RUN mkdir -p /hadoop/dfs/data
VOLUME /hadoop/dfs/data
ADD run.sh /run.sh
RUN chmod a+x /run.sh
CMD ["/run.sh"]

9
datanode/run.sh Normal file
View File

@ -0,0 +1,9 @@
#!/bin/bash
datadir=`echo $HDFS_CONF_dfs_datanode_data_dir | perl -pe 's#file://##'`
if [ ! -d $datadir ]; then
echo "Datanode data directory not found: $datadir"
exit 2
fi
$HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR datanode

63
docker-compose.yml Normal file
View File

@ -0,0 +1,63 @@
namenode:
image: bde2020/hadoop-namenode:1.0.0
hostname: namenode
container_name: namenode
domainname: hadoop
net: hadoop
volumes:
- ./data/namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop.env
resourcemanager:
image: bde2020/hadoop-resourcemanager:1.0.0
hostname: resourcemanager
container_name: resourcemanager
domainname: hadoop
net: hadoop
env_file:
- ./hadoop.env
historyserver:
image: bde2020/hadoop-historyserver:1.0.0
hostname: historyserver
container_name: historyserver
domainname: hadoop
net: hadoop
volumes:
- historyserver:/hadoop/yarn/timeline
env_file:
- ./hadoop.env
nodemanager1:
image: bde2020/hadoop-nodemanager:1.0.0
hostname: nodemanager1
container_name: nodemanager1
domainname: hadoop
net: hadoop
env_file:
- ./hadoop.env
datanode1:
image: uhopper/hadoop-datanode:1.0.0
hostname: datanode1
container_name: datanode1
domainname: hadoop
net: hadoop
volumes:
- ./data/datanode1:/hadoop/dfs/data
env_file:
- ./hadoop.env
datanode2:
image: uhopper/hadoop-datanode:1.0.0
hostname: datanode2
container_name: datanode2
domainname: hadoop
net: hadoop
volumes:
- ./data/datanode2:/hadoop/dfs/data
env_file:
- ./hadoop.env

View File

@ -1,30 +0,0 @@
FROM java:8-jre
MAINTAINER Yiannis Mouchakis <gmouchakis@iit.demokritos.gr>
# define hadoop version
ENV HADOOP_VERSION 2.7.1
# Hadoop env variables
ENV HADOOP_PREFIX /opt/hadoop
ENV HADOOP_CONF_DIR $HADOOP_PREFIX/conf
ENV PATH $PATH:$HADOOP_PREFIX/bin
ENV PATH $PATH:$HADOOP_PREFIX/sbin
RUN apt-get update && apt-get install -y \
wget \
tar \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# deploy hadoop
RUN wget http://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz
RUN tar -zxf /hadoop-$HADOOP_VERSION.tar.gz
RUN rm /hadoop-$HADOOP_VERSION.tar.gz
RUN mv hadoop-$HADOOP_VERSION $HADOOP_PREFIX
# add configuration files
ADD core-site.xml $HADOOP_CONF_DIR/core-site.xml
ADD hdfs-site.xml $HADOOP_CONF_DIR/hdfs-site.xml
CMD hdfs namenode -format -nonInteractive & hdfs namenode && hdfs datanode

View File

@ -1,24 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode:8020</value>
</property>
</configuration>

View File

@ -1,40 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/hdfs-data/datanode</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/hdfs-data/namenode</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
</configuration>

View File

@ -1,5 +0,0 @@
FROM bde2020/hadoop-base
MAINTAINER Yiannis Mouchakis <gmouchakis@iit.demokritos.gr>
CMD hdfs datanode

View File

@ -1,7 +0,0 @@
FROM bde2020/hadoop-base
MAINTAINER Yiannis Mouchakis <gmouchakis@iit.demokritos.gr>
EXPOSE 50070 8020
CMD hdfs namenode -format -nonInteractive & hdfs namenode

24
hadoop.env Normal file
View File

@ -0,0 +1,24 @@
#GANGLIA_HOST=ganglia.hadoop
CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
YARN_CONF_yarn_log___aggregation___enable=true
YARN_CONF_yarn_resourcemanager_recovery_enabled=true
YARN_CONF_yarn_resourcemanager_store_class=org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore
YARN_CONF_yarn_resourcemanager_fs_state___store_uri=/rmstate
YARN_CONF_yarn_nodemanager_remote___app___log___dir=/app-logs
YARN_CONF_yarn_log_server_url=http://historyserver.hadoop:8188/applicationhistory/logs/
YARN_CONF_yarn_timeline___service_enabled=true
YARN_CONF_yarn_timeline___service_generic___application___history_enabled=true
YARN_CONF_yarn_resourcemanager_system___metrics___publisher_enabled=true
YARN_CONF_yarn_resourcemanager_hostname=resourcemanager
YARN_CONF_yarn_timeline___service_hostname=historyserver.hadoop

11
historyserver/Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM bde2020/hadoop-base:1.0.0
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
ENV YARN_CONF_yarn_timeline___service_leveldb___timeline___store_path=/hadoop/yarn/timeline
RUN mkdir -p /hadoop/yarn/timeline
VOLUME /hadoop/yarn/timeline
ADD run.sh /run.sh
RUN chmod a+x /run.sh
CMD ["/run.sh"]

3
historyserver/run.sh Normal file
View File

@ -0,0 +1,3 @@
#!/bin/bash
$HADOOP_PREFIX/bin/yarn --config $HADOOP_CONF_DIR historyserver

11
namenode/Dockerfile Normal file
View File

@ -0,0 +1,11 @@
FROM bde2020/hadoop-base:1.0.0
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
ENV HDFS_CONF_dfs_namenode_name_dir=file:///hadoop/dfs/name
RUN mkdir -p /hadoop/dfs/name
VOLUME /hadoop/dfs/name
ADD run.sh /run.sh
RUN chmod a+x /run.sh
CMD ["/run.sh"]

19
namenode/run.sh Normal file
View File

@ -0,0 +1,19 @@
#!/bin/bash
namedir=`echo $HDFS_CONF_dfs_namenode_name_dir | perl -pe 's#file://##'`
if [ ! -d $namedir ]; then
echo "Namenode name directory not found: $namedir"
exit 2
fi
if [ -z "$CLUSTER_NAME" ]; then
echo "Cluster name not specified"
exit 2
fi
if [ "`ls -A $namedir`" == "" ]; then
echo "Formatting namenode name directory: $namedir"
$HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode -format $CLUSTER_NAME
fi
$HADOOP_PREFIX/bin/hdfs --config $HADOOP_CONF_DIR namenode

7
nodemanager/Dockerfile Normal file
View File

@ -0,0 +1,7 @@
FROM bde2020/hadoop-base:1.0.0
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
ADD run.sh /run.sh
RUN chmod a+x /run.sh
CMD ["/run.sh"]

3
nodemanager/run.sh Normal file
View File

@ -0,0 +1,3 @@
#!/bin/bash
$HADOOP_PREFIX/bin/yarn --config $HADOOP_CONF_DIR nodemanager

View File

@ -0,0 +1,7 @@
FROM bde2020/hadoop-base:1.0.0
MAINTAINER Ivan Ermilov <ivan.s.ermilov@gmail.com>
ADD run.sh /run.sh
RUN chmod a+x /run.sh
CMD ["/run.sh"]

3
resourcemanager/run.sh Normal file
View File

@ -0,0 +1,3 @@
#!/bin/bash
$HADOOP_PREFIX/bin/yarn --config $HADOOP_CONF_DIR resourcemanager