Hadoop集群搭建

在Centos7系统上进行安装,采用hadoop3.2.1

环境准备

IP 名称 功能
192.168.9.214 node-1 ResourceManager
NameNode
DataNode
NodeManager
192.168.9.215 node-2 SecondaryNameNode
NodeManager
DataNode
192.168.9.216 node-3 NodeManager
DataNode

修改机器名

1
hostnamectl set-hostname node-1/node-2/node-3

设置HOSTS

1
2
3
192.168.9.214 node-1
192.168.9.215 node-2
192.168.9.216 node-3

配置免密码登陆

1
2
3
4
# 在node-1中:
ssh-keygen -t rsa
ssh-copy-id node-2
ssh-copy-id node-3

关闭防火墙、selinux

1
2
3
4
5
6
7
8
9
10
systemctl stop firewalld
systemctl disable firewalld

systemctl stop iptables
systemctl disable iptables

setenforce 0
sed -i 's/^SELINUX=.*/SELINUX=disabled/' /etc/selinux/config

reboot

安装JDK

1
2
3
4
5
6
7
8
9
10
tar xvf jdk-8u221-linux-x64.tar.gz
mv jdk1.8.0_221 /usr/local/

cat >> /etc/profile << "EOF"
export JAVA_HOME=/usr/local/jdk1.8.0_221
export PATH=$JAVA_HOME/bin:$PATH
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
EOF

source /etc/profile

安装配置Hadoop

解压

1
2
3
4
5
6
7
8
9
10
tar xvf hadoop-3.2.1.tar.gz
mv hadoop-3.2.1 /usr/local/hadoop

cat >> /etc/profile << "EOF"
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_COMMON_LIB_NATIVE_DIR"
EOF

修改配置

配置文件全部放在/usr/local/hadoop/etc/hadoop目录下

hadoop-env.sh

增加JDK路径

1
export JAVA_HOME=/usr/local/jdk1.8.0_221
core-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
<configuration>
<!-- 指定hadoop运行时产生文件的存储目录,可以指定自己熟悉的目录,默认/tmp/hadoop-${user.name} -->
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hadoop/data/hddata</value>
<description>Abase for other temporary directories.</description>
</property>
<!-- 指定hadoop使用的文件系统,HDFS的老大NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://node-1:9000</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
1
2
# 其他机器也要创建
mkdir -p /data/hadoop/data/hddata
hdfs-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
<configuration>
<!-- 指定HDFS副本数量,默认3 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node-2:50090</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/data/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/data/hadoop/hdfs/data</value>
</property>
</configuration>
1
2
3
# 其他机器也要创建
mkdir -p /data/hadoop/hdfs/name
mkdir -p /data/hadoop/hdfs/data
mapred-site.xml
1
2
3
4
5
6
7
8
9
10
11
<configuration>
<!-- 指定mapred运行时的框架:yarn,默认local -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>http://node-1:9001</value>
</property>
</configuration>
yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
<configuration>

<!-- Site specific YARN configuration properties -->
<!-- 指定yarn(老大)的地址(ResourceManager) -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node-1</value>
</property>
<!-- nodemanager上运行的附属服务,指定mapreduce_shuffle才可以运行mapReduce默认 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
slaves

删除内容添加机器:

1
2
3
node-1
node-2
node-3
workers

删除内容添加机器, 这份配置是允许哪些机器启动data节点:

1
2
3
node-1
node-2
node-3

同步文件到其他服务器

1
2
3
4
5
rsync -avzP /uar/local/hadoop root@node-2:/usr/local/
rsync -avzP /uar/local/hadoop root@node-3:/usr/local/

scp /etc/profile root@node-2:/etc
scp /etc/profile root@node-3:/etc

启动集群

格式化dhfs

格式化只能在初始启动之前启动一次,是对文件系统进行一些初始化操作,因为此时hdfs还不存在;在初始化完成之后,集群启动,之后不能再进行初始化

1
hdfs namenode -format

启动服务

1
2
cd /usr/local/hadoop
./sbin/start-all.sh

检查服务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[root@node-1 hadoop]# jps
12996 NodeManager
12231 NameNode
12409 DataNode
24748 Jps
12782 ResourceManager

[root@node-2 ~]# jps
18001 DataNode
21990 Jps
18106 SecondaryNameNode
18186 NodeManager


[root@node-3 ~]# jps
18001 DataNode
21990 Jps
18186 NodeManager

测试

1
2
3
hdfs dfs -ls /
hdfs dfs -mkdir /wisp
hdfs dfs -ls /

备注