Hive集群配置

  • Hive解压【storageServer1主机执行】
1
2
3
4
cd /data/upload_dir
tar -zxvf apache-hive-3.1.2-bin.tar.gz -C ../
mv apache-hive-3.1.2-bin/ hive-3.1.2

  • 配置Hive【storageServer1主机执行】
1
2
3
4
5
6
7
8
9
10
11
12
# 切换root用户
vim /etc/profile

#HIVE_HOME
export HIVE_HOME=/data/hive-3.1.2
export PATH=$PATH:$HIVE_HOME/bin

source /etc/profile

# 上传pgsql JDBC驱动到/data/hive-3.1.2/lib
chown -R app:app /data/hive-3.1.2/lib

1
mv $HIVE_HOME/lib/log4j-slf4j-impl-2.18.0.jar $HIVE_HOME/lib/log4j-slf4j-impl-2.18.0.bak
  • 修改Hive配置文件【storageServer1主机执行】
1
2
3
4
5
6
7
8
9
10
11

cd /data/hive/conf
cp hive-env.sh.template hive-env.sh
vim hive-env.sh

# 定义hadoop hive相关路径
export HADOOP_HOME=/data/hadoop-3.3.0
export HIVE_CONF_DIR=/data/hive/conf

cp hive-default.xml.template hive-site.xml
vim hive-site.xml
  • hive-site.xml配置文件内容【storageServer1主机执行】
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?><!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--><configuration>
<!--修改临时文件的路径-->
<property>
<name>system:java.io.tmpdir</name>
<value>/data/hive/tmp</value>
</property>
<property>
<name>system:user.name</name>
<value>${user.name}</value>
</property>
<property>
<name>hive.log.file</name>
<value>/data/hive/logs/hive.log</value>
</property>

<!--Hive作业的HDFS根目录位置 -->
<property>
<name>hive.exec.scratchdir</name>
<value>/data/hive/tmp</value>
</property>
<!--Hive作业的HDFS根目录创建写权限 -->
<property>
<name>hive.scratch.dir.permission</name>
<value>775</value>
</property>
<!--hdfs上hive数据存放位置 -->
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/data/hive/warehouse</value>
</property>

<!--连接数据库地址,名称 -->
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:postgresql://postnd01:5432/hivemeta_db</value>
</property>
<!--连接数据库驱动 -->
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>org.postgresql.Driver</value>
</property>
<!--连接数据库用户名称 -->
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>app</value>
</property>
<!--连接数据库用户密码 -->
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>yourpassword</value>
</property>

<property>
<name>hive.server2.datasource.provider</name>
<value>org.apache.hive.jdbc.HikariCPDataSourceProvider</value>
</property>
<property>
<name>hive.server2.datasource.hikari.connection-test-query</name>
<value>SELECT 1</value>
</property>
<property>
<name>hive.server2.datasource.hikari.validation-timeout</name>
<value>5000</value>
</property>
<property>
<name>hive.server2.datasource.hikari.connection-timeout</name>
<value>23000</value> <!-- 默认值为30秒 -->
</property>
<property>
<name>hive.server2.datasource.hikari.maximum-pool-size</name>
<value>10</value>
</property>
<!-- 以下为hiveserver.xml的配置-->
<property>
<name>hive.server2.thrift.bind.host</name>
<value>storageServer1</value>
<description>zk返回给beeline连接的HiveServer2服务器地址,每台HiveServer2 HA都需要修改</description>
</property>

<property>
<name>hive.metastore.uris</name>
<value>thrift://storageServer1:9083,thrift://storageServer2:9083,thrift://storageServer3:9083,thrift://storageServer4:9083</value>
</property>
<property>
<name>hive.metastore.event.db.notification.api.auth</name>
<value>false</value>
</property> 

<property>
<name>hive.server2.active.passive.ha.enable</name>
<value>true</value>
</property>
<property>
<name>hive.server2.support.dynamic.service.discovery</name>
<value>true</value>
</property>


<property>
<name>hive.server2.zookeeper.namespace</name>
<value>hiveserver2_ha</value>
</property>

<property>
<name>hive.zookeeper.quorum</name>
<value>queryServer1,queryServer2,queryServer3</value>
</property>
<property>
<name>hive.zookeeper.client.port</name>
<value>2181</value>
</property>
<property>
<name>hive.server2.thrift.port</name>
<value>10000</value>
</property>
<!--这边通过beeline.hs2-->
<property>
<!-- hiveserver2用户名 -->
<name>beeline.hs2.connection.user</name>
<value>hive3</value>
</property>

<property>
<!-- hiveserver2密码 -->
<name>beeline.hs2.connection.password</name>
<value>yourpassword</value>
</property>


<property>

<name>hive.cli.print.header</name>
<value>true</value>
</property>



<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>hive.metastore.fshandler.threads</name>
<value>20</value>
</property>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.integral.jdo.pushdown</name>
<value>true</value>
</property>
</configuration>

  • 使用Hive提供的schema工具初始化元数据库【storageServer1主机执行】
1
$HIVE_HOME/bin/schematool -dbType postgres -initSchema
  • 分发已经配置好的hive文件夹到各个服务器【storageServer1主机执行】
1
2
3
4
scp -r hive-3.1.2 server002:`pwd`/
scp -r hive-3.1.2 server003:`pwd`/
scp -r hive-3.1.2 server004:`pwd`/

systemd脚本【所有机器执行】

vim /usr/lib/systemd/system/hive-metastore.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
[Unit]
Description=Hive Metastore Service
After=network.target

[Service]
Type=simple
User=app
Group=app
Environment=HADOOP_HOME=/data/hadoop-3.3.0
Environment=HIVE_HOME=/data/hive
WorkingDirectory=/data/hive
ExecStart=/data/hive/bin/hive --service metastore
SuccessExitStatus=143
Restart=on-failure
RestartSec=5

[Install]
WantedBy=multi-user.target

vim /usr/lib/systemd/system/hive-server2.service

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
[Unit]
Description=HiveServer2 Service
After=network.target

[Service]
Type=simple
User=app
Group=app
Environment=HADOOP_HOME=/data/hadoop-3.3.0
Environment=HIVE_HOME=/data/hive
WorkingDirectory=/data/hive
ExecStart=/data/hive/bin/hive --service hiveserver2
SuccessExitStatus=143
Restart=on-failure
RestartSec=5

[Install]
WantedBy=multi-user.target

  • 实现systemctl开机自启动服务
1
2
3
4
5
systemctl enable hive-metastore.service
systemctl enable hive-server2.service
systemctl daemon-reload
systemctl list-unit-files|grep hive

  • 执行以下命令使得app用户可以执行systemd命令
1
echo "app ALL = (root) NOPASSWD:/usr/bin/systemctl start Hived,/usr/bin/systemctl restart Hived,/usr/bin/systemctl stop Hived,/usr/bin/systemctl reload Hived,/usr/bin/systemctl status Hived" | sudo tee /etc/sudoers.d/app