postgresql+repmgr+keepalived (16.0)
ip6
postgresql
5432:用于接收客户端连接请求
ip7
postgresql
5432:用于接收客户端连接请求
前置准备工作(ip6/ip7 两台服务器执行相同操作) 1 2 3 4 5 6 7 8 9 10 11 mkdir -p /data adduser app passwd app # 设置密码如下 yourpassword chown -R app:app /data vim /etc/sudoers app ALL = (root) NOPASSWD:/usr/bin/systemctl start postgresql.service,/usr/bin/systemctl restart postgresql.service,/usr/bin/systemctl stop postgresql.service,/usr/bin/systemctl reload postgresql.service,/usr/bin/systemctl status postgresql.service
1 2 3 4 vim / eto/ hosts ip6 storageServer3 ip7 storageServer4
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 ssh- keygen - t rsa - b 4096 cd ~ / .ssh cat id_rsa.pub # 在第一台服务器上创建authorized keys vim authorized_keys # 同一组别内不能赋予写权限,否则免密失败 chmod 600 ~ / .ssh/ authorized_keys # 把其他服务器的密钥id_rsa.pub加入authorized_keys # 分发到其他服务器上 scp / home/ app/ .ssh/ authorized_keys pguser@{目标IP}:/ home/ app/ .ssh/ authorized_keys # 用ssh测试服务器之间连通性 ssh app@{目标IP}
postgresql 1 2 3 4 5 6 7 8 9 yum install -y gcc gcc-c++&& yum install -y libicu-dev && yum install -y readline-devel&& yum install -y zlib-devel ./configure --prefix=/data/postgresql16 --without-icu make && make install
1 2 3 4 5 6 7 cd /usr/lib/systemd/system/ vim postgresql.service //配置自启 systemctl deamon-reload systemctl enable postgresql.service
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 [Unit] Description=postgresql.service After=network.target [Service] Type=forking User=app Group=app WorkingDirectory=/data/postgresql16 ExecStart=/data/postgresql16/bin/pg_ctl start -D /data/postgresql16/data ExecReload=/data/postgresql16/bin/pg_ctl restart -D /data/postgresql16/data ExecStop=/data/postgresql16/bin/pg_ctl stop -D /data/postgresql16/data Restart=always RestartSec=3 [Install] WantedBy=multi-user.target
1 2 3 4 5 6 # 从节点不需要注册 initdb -Upostgres -W -D /data/postgresql16/data -k # 密码如下 yourpassword
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 # 监听地址,'0.0.0.0' 表示监听所有可用的 IP 地址; listen_addresses = '0.0.0.0' # 最大连接数,100 为默认值,修改需要重新启动 PostgreSQL; max_connections = 100 # UNIX 套接字目录,修改需要重新启动 PostgreSQL; unix_socket_directories = '/tmp' # 共享缓冲区大小,至少为 128MB; shared_buffers = 128MB # 动态共享内存类型,默认通常是 posix; dynamic_shared_memory_type = posix # WAL 级别,可选值为 'minimal', 'replica', 或 'logical'; wal_level = 'hot_standby' # 同时进行非关键更新的完整页写入; wal_log_hints = on # 最大 WAL 文件大小为 1GB; max_wal_size = 1GB # 最小 WAL 文件大小为 80MB; min_wal_size = 80MB # 启用归档模式,可选值为 off, on, 或 always; archive_mode = on # 用于归档 WAL 文件的命令,'/bin/true' 表示禁用归档; archive_command = 'cp %p /data/postgresql16/archive/%f' # 最大的 WAL 发送者进程数; max_wal_senders = 10 # 热备模式,on 表示允许在恢复期间执行查询,off 则禁止; hot_standby = on # 日志时区,'Asia/Shanghai' 表示上海时区; log_timezone = 'Asia/Shanghai' # 日期样式,'iso, mdy' 表示国际标准日期格式; datestyle = 'iso, mdy' # 时区,'Asia/Shanghai' 表示上海时区; timezone = 'Asia/Shanghai' # 系统错误消息的区域设置; lc_messages = 'en_US.UTF-8' # 货币格式的区域设置; lc_monetary = 'en_US.UTF-8' # 数字格式的区域设置; lc_numeric = 'en_US.UTF-8' # 时间格式的区域设置; lc_time = 'en_US.UTF-8' # 默认文本搜索配置; default_text_search_config = 'pg_catalog.english' # 预加载库,'repmgr' 表示预加载 repmgr 模块,修改需要重新启动 PostgreSQL; shared_preload_libraries = 'repmgr'
1 2 3 4 5 6 7 8 9 10 11 12 # Allow replication connections from localhost, by a user with the # replication privilege. local replication all trust host replication all 127.0.0.1/32 trust host replication all ::1/128 trust host all all ip6/24 trust host all all ip7/24 trust host replication repmgr ip7/24 trust host replication repmgr ip7/24 trust host all all 0.0.0.0/0 md5
repmgr
1 2 3 4 tar -zxvf repmgr-5.4.1.tar.gz -C /data mv repmgr-5.4.1/ repmgr541 chown -R postgres:postgres /data/repmgr541/
1 2 3 4 5 6 7 8 9 10 11 12 13 14 yum install libcurl-devel yum install json-c-devel # 如果安装失败,上传json安装包后执行以下操作 rpm -ivh json-c-devel-0.11-4.el7_0.x86_64.rpm yum install flex export PG_CONFIG=/data/postgresql16/bin/pg_config ./configure make && make install # 编译成功后,执行以下命令可以发现两个repmgr文件 ll /data/postgresql16/bin | grep repmgr
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 cd /data/postgresql16 vim repmgr.conf #repmgr.conf主节点ip6内容 node_id=1 node_name=node1 conninfo='host=storageserver3 user=repmgr dbname=repmgr connect_timeout=2' data_directory='/data/postgresql16/data' config_directory='/data/postgresql16/repmgr.conf' log_level='INFO' log_facility='STDERR' log_file='/data/postgresql16/repmgr.log' promote_command='/data/postgresql16/bin/repmgr standby promote -f /data/postgresql16/repmgr.conf' follow_command='/data/postgresql16/bin/repmgr standby follow -f /data/postgresql16/repmgr.conf -W --upstream-node-id=%n' failover='automatic' service_start_command = 'sudo systemctl start postgresql.service' service_stop_command = 'sudo systemctl stop postgresql.service' service_restart_command = 'sudo systemctl restart postgresql.service' service_reload_command = 'sudo systemctl reload postgresql.service' repmgrd_service_start_command = '/data/postgresql16/bin/repmgrd -f /data/postgresql16/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize' repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`' #repmgr.conf从节点ip7内容 node_id=2 node_name=node2 conninfo='host=storageserver4 user=repmgr dbname=repmgr connect_timeout=2' data_directory='/data/postgresql16/data' config_directory='/data/postgresql16/repmgr.conf' log_level='INFO' log_facility='STDERR' log_file='/data/postgresql16/repmgr.log' promote_command='/data/postgresql16/bin/repmgr standby promote -f /data/postgresql16/repmgr.conf' follow_command='/data/postgresql16/bin/repmgr standby follow -f /data/postgresql16/repmgr.conf -W --upstream-node-id=%n' failover='automatic' service_start_command = 'sudo systemctl start postgresql.service' service_stop_command = 'sudo systemctl stop postgresql.service' service_restart_command = 'sudo systemctl restart postgresql.service' service_reload_command = 'sudo systemctl reload postgresql.service' repmgrd_service_start_command = '/data/postgresql16/bin/repmgrd -f /data/postgresql16/repmgr.conf --pid-file /tmp/repmgrd.pid --daemonize' repmgrd_service_stop_command = 'kill `cat /tmp/repmgrd.pid`'
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 [Unit] Description=repmgrd.service After=network.target postgresql.service [Service] Type=forking User=postgres Group=postgres WorkingDirectory=/data/postgresql16 ExecStart=/data/postgresql16/bin/repmgr -f /data/postgresql16/repmgr.conf daemon start ExecStop=/data/postgresql16/bin/repmgr -f /data/postgresql16/repmgr.conf daemon stop ExecReload=/data/postgresql16/bin/repmgr -f /data/postgresql16/repmgr.conf daemon stop Restart=always RestartSec=3 [Install] WantedBy=multi-user.target
在主服务器ip6的PostgreSQL上创建一个专用超级用户repmgr和一个专用的数据库repmgr
1 2 3 4 5 /data/postgresql16/bin/createuser -s repmgr /data/postgresql16/bin/createdb repmgr -O repmgr /data/postgresql16/bin/psql -p5432 -h127.0.0.1 -Upostgres -c 'ALTER USER repmgr SET search_path TO repmgr, "$user", public;'
1 2 3 scp ./json-c-devel-0.11-4.el7_0.x86_64.rpm app@storageserver4:/data/upload_dir/ /data/postgresql16/bin/psql 'host=storageserver3 user=postgres dbname=postgres connect_timeout=2'
1 2 3 4 5 6 7 8 9 10 /data/postgresql16/bin/repmgr -f /data/postgresql16/repmgr.conf primary register --force #查看集群状态 /data/postgresql16/bin/repmgr -f /data/postgresql16/repmgr.conf cluster show #查看repmgr元数据表 /data/postgresql16/bin/psql -p5432 -h127.0.0.1 -Urepmgr \x SELECT * FROM repmgr.nodes;
1 2 3 4 5 6 7 8 9 # 清空从节点数据 rm -rf /data/postgresql16/data/* # 从节点上进行测试能否克隆 /data/postgresql16/bin/repmgr -h storageserver3 -U repmgr -d repmgr -f /data/postgresql16/repmgr.conf standby clone --dry-run # 满足条件则执行克隆 cd /data/postgresql16/bin /data/postgresql16/bin/repmgr -h storageserver3 -U repmgr -d repmgr -f /data/postgresql16/repmgr.conf standby clone
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 # 启动从节点PGSQL sudo systemctl start postgresql # 注册从节点 /data/postgresql16/bin/repmgr -f /data/postgresql16/repmgr.conf standby register --force # 启动从节点postgresql sudo systemctl start postgresql # 查看repmgr元数据表 /data/postgresql16/bin/psql -p5432 -h127.0.0.1 -Urepmgr \x SELECT * FROM pg_stat_wal_receiver; # 验证是否正常注册 【主节点】执行 /data/postgresql16/bin/psql -p5432 -h127.0.0.1 -Urepmgr \x SELECT * FROM pg_stat_replication; # 查看集群状态 /data/postgresql16/bin/repmgr -f /data/postgresql16/repmgr.conf cluster show
keepalived
1 2 3 ./configure --prefix=/data/keepalived make && make install
1 2 3 4 5 # keepalived.service在安装后会自动生成 vim /lib/systemd/system/keepalived.service # 只需要更改以下内容 ExecStart=/opt/app/keepalived/sbin/keepalived -f /data/keepalived/etc/keepalived/keepalived.conf $KEEPALIVED_OPTIONS
1 2 3 4 5 6 7 8 9 ip a #选择有ipv4和ipv6的物理端口,否则会报错如下 entering FAULT state (interface ens25f0 down) Keepalived_vrrp[35803]: (VI_1_BACKUP) entering FAULT state (no IPv4 address for interface) Keepalived_vrrp[35803]: (VI_1_BACKUP) entering FAULT state Keepalived_vrrp[35803]: Registering gratuitous ARP shared channel Keepalived_vrrp[35803]: (VI_1_BACKUP) removing VIPs.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 vim /data/keepalived/etc/keepalived/keepalived.conf global_defs { router_id ip6 #script_user root #enable_script_security } vrrp_script chk_http_port_1 { script "/data/postgresql16/pg_alive.sh" interval 10 weight 2 user app } vrrp_instance VI_1_MASTER { state MASTER interface bond4 # 编号相同 virtual_router_id 50 # 权重相同 priority 50 advert_int 1 authentication { auth_type PASS auth_pass 1111 } virtual_ipaddress { 15.3.69.137 } track_script { chk_http_port_1 } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 vim /data/keepalived/etc/keepalived/keepalived.conf global_defs { router_id ip7 #script_user root #enable_script_security } vrrp_script chk_http_port_1 { script "/data/postgresql16/pg_alive.sh" interval 10 weight 2 user app } vrrp_instance VI_1_MASTER { state MASTER interface bond4 # 编号相同 virtual_router_id 50 # 权重相同 priority 50 advert_int 1 authentication { auth_type PASS auth_pass 1111 } virtual_ipaddress { 15.3.69.137 } track_script { chk_http_port_1 } }
1 2 3 4 5 psql -Upostgres create table sr_delay(id serial ,sr_date timestamp default now(),new_primary_ip varchar(50),standby_num int); insert into sr_delay(id,sr_date) values(1,now());
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 vim /data/postgresql16/pg_alive.sh #!/bin/bash # FILENAME pg_alive.sh # define DB parameters PGDATABASE=postgres PGPORT=5432 PGUSER=postgres PGHOME=/data/postgresql/bin LOGFILE=/data/postgresql/pg_alive.log PATH=$PATH:$PGHOME keeplognums=30000 # define sql scripts SQL_UPDATE_HEART="update sr_delay set sr_date = now() where id =1;" SQL_ROLE="SELECT pg_is_in_recovery from pg_is_in_recovery();" SQL_CHK_DBALIVED="SELECT 1;" # process step db_role=`echo $SQL_ROLE | psql -p $PGPORT -d $PGDATABASE -U $PGUSER -At -w` if [ $db_role == 't' ];then echo -e `date +"%F %T"` "Attention1:the current database is standby DB!" >> $LOGFILE exit 0 fi #备库不检查存活,主库更新状态 echo $SQL_CHK_DBALIVED | psql -p $PGPORT -d $PGDATABASE -U $PGUSER -At -w if [ $? -eq 0 ] ;then echo $SQL_UPDATE_HEART | psql -p $PGPORT -d $PGDATABASE -U $PGUSER -At -w echo -e `date +"%F %T"` "Success: update the master sr_delay successed!" >> $LOGFILE exit 0 else echo -e `date +"%F %T"` "Error:Is the server is running?" >> $LOGFILE echo -e `date +"%F %T"` "Error:Postgresql is tring stop..." >> $LOGFILE systemctl stop postgresql >> $LOGFILE 2>&1 sleep 5 echo -e `date +"%F %T"` "Error:keepalived is tring stop..." >> $LOGFILE #由keepavlived自行切换 #service keepalived stop >>$LOGFILE 2>&1 exit 1 fi #日志保留 keeplognums 行 if [ ! -f ${MONITOR_LOG} ] ;then touch ${MONITOR_LOG};fi lognums=`cat ${MONITOR_LOG} |wc -l` catnum=$((${lognums} -${keeplognums})) if [[ $lognums -gt ${keeplognums} ]] ; then sed -i "1,${catnum}d" ${MONITOR_LOG}; fi
备份: 每台 ip6 和 ip6 的 /data/ 下存放 以下2个脚本,并且有执行的权限
每日全量配置数据库数据
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 # !/bin/bash # 设置环境变量 source /etc/profile source ~/.bash_profile # 配置 PostgreSQL 连接信息 user=postgres password="yourpassword" host=localhost port=5432 # 默认 PostgreSQL 端口 pg_bin=/data/postgresql16/bin # pg_dump 所在路径 date=$(date +%Y%m%d%H%M) # 创建备份目录 backup_dir="/data/postgresql_databak/$host/" if [ ! -d "$backup_dir" ]; then mkdir -p "$backup_dir" fi # 获取所有数据库名称,排除系统数据库和空行 for dbname in $(psql -U $user -h $host -p $port -lqt | awk '{print $1}' | grep -Ev '^(template[01]|postgres|\|)$') do # 执行备份 $ pg_bin/pg_dump -U $user -h $host -p $port --format=c --compress=9 --file="$backup_dir /$dbname -$date .dump" $dbname if [ $? -eq 0 ]; then # 记录日志 log="$backup_dir/${host}_backup.log" echo "${date} ${dbname} ---backup ok!" | tee -a "$log" else echo "${date} ${dbname} ---backup fail!" | tee -a "$log" fi done # 清理超过30天的备份记录 find "$backup_dir" -type f -mtime +30 -exec rm -rf {} \; >/dev/null 2>&1
开启定时备份
1 2 3 4 crontab -e # 新增 0 1 * * * sh /data/postgresql_backup.sh > /dev/null 2>&1
还原 还原指定库脚本: 如有需要还原的话
注意:
修改要还原的库的文件
删除要还原的库,再执行。 之所以删除库没在脚本里是怕误操作。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 # !/bin/bash # 设置环境变量 source /etc/profile source ~/.bash_profile # 配置 PostgreSQL 连接信息 user=postgres password="yourpassword" host=localhost port=5432 # 默认 PostgreSQL 端口 pg_bin=/data/postgresql16/bin # 或者是你的 pg_dump 所在路径 backup_file="/data/postgresql_databak/$host/nacos-202408291111.dump" # 创建数据库(如果不存在) $ dbname=nacos if ! psql -U $user -h $host -p $port -lqt | cut -d \| -f 1 | grep -qw $dbname; then createdb -U $user -h $host -p $port $dbname fi # 还原备份文件 $ pg_bin/pg_restore -U $user -h $host -p $port -d $dbname $backup_file # 检查还原结果 if [ $? -eq 0 ]; then echo "Restore successful." else echo "Restore failed." fi