前置环境
Linux环境Zookeeper集群安装(详细教程)-CSDN博客
Hadoop HA高可用集群3.3.6搭建(详细教程)-CSDN博客
MySQL8.0.40离线安装(详细教程)_mysql 8.0.40 ftp-CSDN博客
Hadoop3.3.6官网下载链接地址
部署规划
服务器节点 | MetaStore | HiveServer2 |
bigdata01 | √ | |
bigdata02 | √ | √ |
bigdata03 | √ |
1.先在bigdata01解压安装(先配置一个节点再分发)
# 解压到/opt目录下
tar -zxvf /opt/apk/apache-hive-4.0.1-bin.tar.gz -C /opt
# 创建软链接
ln -s /opt/apache-hive-4.0.1-bin /opt/apps/hive
# 添加环境变量
vi /etc/profile.d/my_env.sh
### 在my_env.sh中添加以下内容
# HIVE_HOME
export HIVE_HOME=/opt/apps/hive
export PATH=$PATH:$HIVE_HOME/bin
# 同步配置环境到其他节点,并生效
xsync -i "bigdata02 bigdata03" /etc/profile.d/my_env.sh
xcall source /etc/profile
2.上传MySQL8.0驱动jar包
MySQL8.0驱动下载链接: 百度网盘 请输入提取码 提取码: chxd 复制这段内容后打开百度网盘手机App,操作更方便哦 --来自百度网盘超级会员v6的分享
# 将下载的jar包添加到目录$HIVE_HOME/hive/lib下
[root@bigdata01 apk]# cp mysql-connector-java-8.0.16.jar $HIVE_HOME/hive/lib
# 查看hadoop下的guava版本
[root@bigdata01 lib]# ll /opt/apps/hadoop/share/hadoop/common/lib |grep guava
-rw-r--r--. 1 hadoop hadoop 2747878 Jan 4 14:55 guava-27.0-jre.jar
-rw-r--r--. 1 hadoop hadoop 3362359 Jan 4 14:55 hadoop-shaded-guava-1.1.1.jar
-rw-r--r--. 1 hadoop hadoop 2199 Jan 4 14:55 listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar
# 查看hive下的guava版本,发现不一致
[root@bigdata01 lib]# ll /opt/apps/hive/lib |grep guava
-rw-r--r--. 1 root root 2575022 Sep 25 15:52 guava-22.0.jar
# 删除hive的guava,复制hadoop的到hive
[root@bigdata01 lib]# cp /opt/apps/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /opt/apps/hive/lib/
[root@bigdata01 lib]# mv /opt/apps/hive/lib/guava-22.0.jar /opt/apps/hive/lib/guava-22.0.jar_bak
3.进入bigdata04(数据库服务器)
[root@bigdata04 ~]# mysql -uroot -p
mysql> create user 'hive'@'%' identified by 'Lucky@#998**';
Query OK, 0 rows affected (0.02 sec)
mysql> GRANT ALL PRIVILEGES ON hive.* TO 'hive'@'%'; -- 授权hive用户hive数据库所有权限
Query OK, 0 rows affected (0.00 sec)
mysql> flush privileges; -- 刷新权限生效
Query OK, 0 rows affected (0.00 sec)
mysql> quit;
Bye
4.配置日志组件(bigdata01)
# 创建日志目录
mkdir -p /opt/apps/hive/logs
# 拷贝默认文件
cp /opt/apps/hive/conf/hive-log4j2.properties.template /opt/apps/hive/conf/hive-log4j2.properties
# 修改日志配置文件
vi /opt/apps/hive/conf/hive-log4j2.properties
# 找到日志的位置,修改如下:
property.hive.log.dir = /opt/apps/hive/logs
5.回到bigdata01配置hive(先配置一个节点再分发)
# Hive配置文件里要用到HDFS的一些路径,需要先手动创建
hdfs dfs -mkdir -p /usr/hive/{warehouse,tmp,log}
hdfs dfs -chmod g+w /usr/hive/进入Hive的配置目录: cd /opt/apps/hive/conf/
5.1配置hive环境变量
配置hive-env.sh
export HADOOP_HOME=/opt/apps/hadoop
export HIVE_CONF_DIR=/opt/apps/hive/conf
export HIVE_AUX_JARS_PATH=/opt/apps/hive/lib
export JAVA_HOME=/opt/apps/java
配置hive-site.xml
<configuration>
<!-- ################# MetaStore相关配置 ################# -->
<!-- 数据库地址,名称 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://bigdata04:3306/hive?createDatabaseIfNotExist=true&serverTimezone=GMT%2B8&useSSL=false&allowPublicKeyRetrieval=true</value>
</property>
<!-- 数据库连接用户 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive</value>
</property>
<!-- 数据库连接密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>Lucky@#998**</value>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
</property>
<!-- 显示数据库名称 -->
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<!--显示表的列名 -->
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<!-- MetaStore 高可用 -->
<property>
<name>hive.metastore.uris</name>
<value>thrift://bigdata01:9083,thrift://bigdata02:9083</value>
</property>
<!-- hdfs 上 Hive元数据存放的位置 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/usr/hive/warehouse</value>
</property>
<!-- Hive作业的HDFS根目录位置 -->
<property>
<name>hive.exec.scratchdir</name>
<value>/usr/hive/tmp</value>
</property>
<!-- Hive作业的HDFS根目录创建权限 -->
<property>
<name>hive.scratch.dir.permission</name>
<value>775</value>
</property>
<!-- ####################### HiveServer2相关配置 ######################## -->
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>
<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2_zk</value>
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>bigdata01:2181,bigdata02:2181,bigdata03:2181</value>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>
<property>
<name>hive.server2.thrift.bind.host</name>
<value>bigdata01</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10001</value>
</property>
</configuration>
5.2 需修改hadoop 配置(安装hadoop时已配置,可跳过)
在hadoop安装目录下的core-site.xml中,需要开启hadoop代理用户配置
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.users</name>
<value>*</value>
</property>
5.3初始化元数据库
配置完metastore-site.xml,需要先初始化hive元数据库,选择在一台节点初始化即可
schematool -initSchema -dbType mysql -verbose
5.4 启动metastore(规划为bigdata01、bigdata02部署)
# 先把hive整体同步到bigdata02、bigdata03
[root@bigdata01 apps]# xsync -i "bigdata02 bigdata03" /opt/apps/hive
[root@bigdata01 apps]# xsync -i "bigdata02 bigdata03" /opt/apache-hive-4.0.1-bin/
# 修改bigdata02,bigdata03中的hiveserver2服务的配置,再启动
[root@bigdata02 conf]# vi /opt/apps/hive/conf/hive-site.xml
<!-- 修改值为bigdata02,其余节点以此类推 -->
<!-- 指定metastore连接地址 -->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>bigdata02</value>
</property>
# 希望哪台服务器配置元数据库服务,就在那台执行hive --service metastore >> $HIVE_HOME/logs/metastore.log 2>&1 &
# 可查看端口9083是否占用判断启动成功,xcall -i "bigdata01 bigdata02" "ss -nptl |grep 9083"
# 以下是bigdata01 bigdata02都执行启动
[root@bigdata01 apps]# xcall -i "bigdata01 bigdata02" "hive --service metastore >> $HIVE_HOME/logs/metastore.log 2>&1 &"
5.5 启动hiveserver2服务 (规划为bigdata02、bigdata03部署)
分别在bigdata02 、bigdata03启动
# 可查看端口10001是否占用判断启动成功,xcall -i "bigdata02 bigdata03" "ss -nptl |grep 10001"
[root@bigdata01 apps]# xcall -i "bigdata02 bigdata03" "hive --service hiveserver2 >> $HIVE_HOME/logs/hiveserver2.log 2>&1 &"
5.6访问zk
[root@bigdata02 conf]# zkCli.sh -server bigdata01:2181
[zk: localhost:2181(CONNECTED) 1] ls /
[hadoop-ha, hiveserver2_zk, killQueries, rmstore, yarn-leader-election, zookeeper]
[zk: localhost:2181(CONNECTED) 2] ls /h
hadoop-ha hiveserver2_zk
[zk: localhost:2181(CONNECTED) 2] ls /hiveserver2_zk
[serverUri=bigdata01:10001;version=4.0.1;sequence=0000000003, serverUri=bigdata02:10001;version=4.0.1;sequence=0000000002]
6.使用hive客户端
hive4.0已经弃用默认的Hive Cli
使用命令hive或者beeline
6.1客户端连接
[root@bigdata03 conf]# hive
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/apache-hive-4.0.1-bin/lib/log4j-slf4j-impl-2.18.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop-3.3.6/share/hadoop/common/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/opt/apache-hive-4.0.1-bin/lib/log4j-slf4j-impl-2.18.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/opt/hadoop-3.3.6/share/hadoop/common/lib/slf4j-reload4j-1.7.36.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Beeline version 4.0.1 by Apache Hive
beeline>
连接方式一:
!connect jdbc:hive2://bigdata01:10001
连接方式二:
!connect jdbc:hive2://bigdata01:2181,bigdata02:2181,bigdata03:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2_zk root
6.2 访问hiveserver2的web页面
部署了hiveserver2的服务器均可访问
http://bigdata02:10002/ 或者 http://bigdata03:10002/
7.关闭hive集群
hive本身没有直接关停服务的命令,可以通过端口进程的方式进行关闭。两个RunJar分别为metastore、hiveserver2的进程,可以通过“kill -9 进程号”进行关停
附录
附录一、hive集群一键启停脚本
连接方式一:
!connect jdbc:hive2://bigdata01:10001
连接方式二:
!connect jdbc:hive2://bigdata01:2181,bigdata02:2181,bigdata03:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2_zk root
退出命令 !quit 或者 !exit
#!/bin/bash
# chmod a+x /usr/local/bin/hiveCluster
if [ $# -lt 1 ]
then
echo "No Args Input!"
exit;
fi
case $1 in
"start")
echo -e "\n================= 启动 hive 高可用集群 ================="
echo " ------------------- 启动 metastore 服务集群 --------------------"
echo "Starting metastore on [bigdata01]"
ssh bigdata01 "hive --service metastore >> $HIVE_HOME/logs/metastore.log 2>&1 &"
echo "Starting metastore on [bigdata02]"
ssh bigdata02 "hive --service metastore >> $HIVE_HOME/logs/metastore.log 2>&1 &"
sleep 4
echo " ------------------- 启动 hiveserver2 服务集群 --------------------"
echo "Starting hiveserver2 on [bigdata02]"
ssh bigdata02 "hive --service hiveserver2 >> $HIVE_HOME/logs/hiveserver2.log 2>&1 &"
echo "Starting hiveserver2 on [bigdata03]"
ssh bigdata03 "hive --service hiveserver2 >> $HIVE_HOME/logs/hiveserver2.log 2>&1 &"
sleep 2
echo -e "\n"
;;
"stop")
echo -e "\n================= 关闭 hive 高可用集群 ================="
echo " ------------------- 关闭 hiveserver2 服务集群 --------------------"
echo "Stopping hiveserver2 on [bigdata02]"
ssh bigdata02 "ss -nptl | grep 10001 | awk '{print \$6}' | awk -F'pid=' '{print \$2}' | awk -F',' '{print \$1}' | xargs -r kill"
echo "Stopping hiveserver2 on [bigdata03]"
ssh bigdata03 "ss -nptl | grep 10001 | awk '{print \$6}' | awk -F'pid=' '{print \$2}' | awk -F',' '{print \$1}' | xargs -r kill"
# 等待4秒
sleep 4
echo " ------------------- 关闭 metastore 服务集群 --------------------"
echo "Stopping metastore on [bigdata01]"
ssh bigdata01 "ss -nptl | grep 9083 | awk '{print \$6}' | awk -F'pid=' '{print \$2}' | awk -F',' '{print \$1}' | xargs -r kill"
echo "Stopping metastore on [bigdata02]"
ssh bigdata02 "ss -nptl | grep 9083 | awk '{print \$6}' | awk -F'pid=' '{print \$2}' | awk -F',' '{print \$1}' | xargs -r kill"
echo -e "\n"
;;
"status")
echo -e "\n================= 检查 hive 高可用集群状态 ================="
echo " ------------------- 检查 metastore 服务状态 --------------------"
echo "Checking metastore on [bigdata01]"
ssh bigdata01 "ss -tuln | grep :9083" > /dev/null
if [ $? -eq 0 ]; then
echo "metastore on bigdata01 is running."
else
echo "metastore on bigdata01 is not running."
fi
echo "Checking metastore on [bigdata02]"
ssh bigdata02 "ss -tuln | grep :9083" > /dev/null
if [ $? -eq 0 ]; then
echo "metastore on bigdata02 is running."
else
echo "metastore on bigdata02 is not running."
fi
echo " ------------------- 检查 hiveserver2 服务状态 --------------------"
echo "Checking hiveserver2 on [bigdata02]"
ssh bigdata02 "ss -tuln | grep :10001" > /dev/null
if [ $? -eq 0 ]; then
echo "hiveserver2 on bigdata02 is running."
else
echo "hiveserver2 on bigdata02 is not running."
fi
echo "Checking hiveserver2 on [bigdata03]"
ssh bigdata03 "ss -tuln | grep :10001" > /dev/null
if [ $? -eq 0 ]; then
echo "hiveserver2 on bigdata03 is running."
else
echo "hiveserver2 on bigdata03 is not running."
fi
echo -e "\n"
;;
*)
echo "Input Args Error!"
;;
esac