04-HA_keepalived
1. 高可用理论
- high available
- 单点故障(single point of failure,缩写SPOF)
- LVS单点故障
- RS一台挂掉。一部分用户会请求异常,lvs还存有这个RS的负载记录
1. 单点故障思路(一变多)
- 主备
- 主从:主从的主机也单点,主机一般也主备
- 主主。vip唯一,要引入动态dns
主通知备,备监控主
1. 主备通讯(方向性)
- 备机定时请求主机(增加主机负担)
- 主机定时广播
2. 备转主(效率性)
- 各个备机加权重
2. keepalived
- 人响应,最不靠谱的!企业追求自动化~!把人解耦出去~!用程序替代!
- 实现HA:代替人自动运维,解决单点故障
作用:
- 监控自己的服务
- master广播,slave监听。master挂了,推选一个新master
- 配置vip,添加ipvs,有配置文件
- 对后端server做健康检查
1. RS挂了怎么确定?
- ping
- ip在OSI第三层,只能确定网络是否通。握手都确定不了,谈何确定应用层的服务
- 访问一下
- 底层:验证应用层http协议,发请求,判断返回200
2. keepalived架构
3. keepalived实操
1. 初始化LVS
- 清除node1之前配置
ipvsadm -C
ifconfig eth0:8 down
# 1. 清空ipvsadm
[root@localhost ~]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.10.100:80 rr
-> 192.168.10.12:80 Route 1 0 0
-> 192.168.10.13:80 Route 1 0 0
[root@localhost ~]# ipvsadm -C
[root@localhost ~]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
[root@localhost ~]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.10.11 netmask 255.255.255.0 broadcast 192.168.10.255
inet6 fe80::bc2a:e603:a0b9:417d prefixlen 64 scopeid 0x20<link>
inet6 fe80::e9b4:7ddd:b4a9:ecf5 prefixlen 64 scopeid 0x20<link>
inet6 fe80::75c8:b33c:7d85:2caf prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:b2:ac:57 txqueuelen 1000 (Ethernet)
RX packets 2389 bytes 244316 (238.5 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 1029 bytes 169400 (165.4 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
ens33:8: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.10.100 netmask 255.255.255.0 broadcast 192.168.10.255
ether 00:0c:29:b2:ac:57 txqueuelen 1000 (Ethernet)
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 68 bytes 5912 (5.7 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 68 bytes 5912 (5.7 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
# 2. 删除网卡
[root@localhost ~]# ifconfig ens33:3 down
[root@localhost ~]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.10.11 netmask 255.255.255.0 broadcast 192.168.10.255
inet6 fe80::bc2a:e603:a0b9:417d prefixlen 64 scopeid 0x20<link>
inet6 fe80::e9b4:7ddd:b4a9:ecf5 prefixlen 64 scopeid 0x20<link>
inet6 fe80::75c8:b33c:7d85:2caf prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:b2:ac:57 txqueuelen 1000 (Ethernet)
RX packets 2462 bytes 250781 (244.9 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 1078 bytes 175892 (171.7 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 68 bytes 5912 (5.7 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 68 bytes 5912 (5.7 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
2. 安装keepalived
- node1、node4
yum install keepalived ipvsadm -y
[root@localhost ~]# cd /etc/keepalived/
[root@localhost keepalived]# ll
总用量 4
-rw-r--r--. 1 root root 3598 10月 1 2020 keepalived.conf
# 备份配置文件
[root@localhost keepalived]# cp keepalived.conf keepalived.conf.bak
! Configuration File for keepalived
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 192.168.200.1
smtp_connect_timeout 30
router_id LVS_DEVEL
vrrp_skip_check_adv_addr
vrrp_strict
vrrp_garp_interval 0
vrrp_gna_interval 0
}
vrrp_instance VI_1 {
state MASTER
interface eth0
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.200.16
192.168.200.17
192.168.200.18
}
}
virtual_server 192.168.200.100 443 {
delay_loop 6
lb_algo rr
lb_kind NAT
persistence_timeout 50
protocol TCP
real_server 192.168.201.100 443 {
weight 1
SSL_GET {
url {
path /
digest ff20ad2481f97b1754ef3e12ecd3a9cc
}
url {
path /mrtg/
digest 9b3a0c85a887a256d6939da88aabd8cd
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
virtual_server 10.10.10.2 1358 {
delay_loop 6
lb_algo rr
lb_kind NAT
persistence_timeout 50
protocol TCP
sorry_server 192.168.200.200 1358
real_server 192.168.200.2 1358 {
weight 1
HTTP_GET {
url {
path /testurl/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
url {
path /testurl2/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
url {
path /testurl3/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.200.3 1358 {
weight 1
HTTP_GET {
url {
path /testurl/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334c
}
url {
path /testurl2/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334c
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
virtual_server 10.10.10.3 1358 {
delay_loop 3
lb_algo rr
lb_kind NAT
persistence_timeout 50
protocol TCP
real_server 192.168.200.4 1358 {
weight 1
HTTP_GET {
url {
path /testurl/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
url {
path /testurl2/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
url {
path /testurl3/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.200.5 1358 {
weight 1
HTTP_GET {
url {
path /testurl/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
url {
path /testurl2/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
url {
path /testurl3/test.jsp
digest 640205b7b0fc66c1ea91c463fac6334d
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
3. global_defs
# 进行删除。严格遵守VRRP协议。解决:虚拟ip无法访问
vrrp_strict
4. vrrp
- vrrp:虚拟路由冗余协议
vrrp_instance VI_1 {
state MASTER // 主标识。node04 BACKUP
interface ens33 // 一个服务器有多张网卡
virtual_router_id 51 // 多套keepalived的唯一标识
priority 100 // 权重。node04 50
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.10.100/24 dev ens33 label ens33:3
}
}
5. virtual_server
- persistence_timeout:已经分手,LVS依然指向的持续时间,避免同一用户资源RS重新开辟
virtual_server 192.168.10.100 80 { // vip
delay_loop 6
lb_algo rr
lb_kind DR // NAT => DR
nat_mask 255.255.255.0 // 增加
persistence_timeout 0 // 测试一节为0
protocol TCP
real_server 192.168.10.12 80 {
weight 1
HTTP_GET {
url {
path /
status_code 200
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
real_server 192.168.10.13 80 {
weight 1
HTTP_GET {
url {
path /
status_code 200
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
}
6. real_server
- 健康检查。SSL是https。查man帮助文档
real_server 192.168.10.12 80 {
weight 1
HTTP_GET {
url {
path /
status_code 200
}
connect_timeout 3
nb_get_retry 3
delay_before_retry 3
}
}
:.,$-1y
:复制从光标到倒数第二行p
:粘贴
7. LVS_slave
# node1 复制到 node4
# `pwd`和前面的路径是一样的
scp ./keepalived.conf root@node04:`pwd`
- 修改vrrp
vrrp_instance VI_1 {
state BACKUP # 主标识。node04 BACKUP
interface ens33
virtual_router_id 51
priority 50 # 权重
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
virtual_ipaddress {
192.168.150.100/24 dev ens33 label ens33:3
}
}
4. 验证
1. 静态配置查看
1. LVS_master
- vip已经创建
[root@localhost keepalived]# systemctl status keepalived
● keepalived.service - LVS and VRRP High Availability Monitor
Loaded: loaded (/usr/lib/systemd/system/keepalived.service; disabled; vendor preset: disabled)
Active: active (running) since 三 2023-06-07 08:54:04 CST; 2min 5s ago
Process: 10265 ExecStart=/usr/sbin/keepalived $KEEPALIVED_OPTIONS (code=exited, status=0/SUCCESS)
Main PID: 10266 (keepalived)
CGroup: /system.slice/keepalived.service
├─10266 /usr/sbin/keepalived -D
├─10267 /usr/sbin/keepalived -D
└─10268 /usr/sbin/keepalived -D
6月 07 08:54:06 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:06 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:06 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:06 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:11 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:11 localhost.localdomain Keepalived_vrrp[10268]: VRRP_Instance(VI_1) Sending/queueing gratuitous ARPs on ens33 for 192.168.10.100
6月 07 08:54:11 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:11 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:11 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
6月 07 08:54:11 localhost.localdomain Keepalived_vrrp[10268]: Sending gratuitous ARP on ens33 for 192.168.10.100
[root@localhost keepalived]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.10.11 netmask 255.255.255.0 broadcast 192.168.10.255
inet6 fe80::bc2a:e603:a0b9:417d prefixlen 64 scopeid 0x20<link>
inet6 fe80::e9b4:7ddd:b4a9:ecf5 prefixlen 64 scopeid 0x20<link>
inet6 fe80::75c8:b33c:7d85:2caf prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:b2:ac:57 txqueuelen 1000 (Ethernet)
RX packets 73736 bytes 8519540 (8.1 MiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 105555 bytes 12148075 (11.5 MiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
# 自动增加网卡ip
ens33:3: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.10.100 netmask 255.255.255.0 broadcast 0.0.0.0
ether 00:0c:29:b2:ac:57 txqueuelen 1000 (Ethernet)
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 68 bytes 5912 (5.7 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 68 bytes 5912 (5.7 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
- 内核模块的配置
[root@localhost keepalived]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.10.100:80 rr
-> 192.168.10.12:80 Route 1 0 0
-> 192.168.10.13:80 Route 1 0 0
2. LVS_slave
- 备机:vip不配置
[root@localhost keepalived]# ifconfig
ens33: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 192.168.10.14 netmask 255.255.255.0 broadcast 192.168.10.255
inet6 fe80::bc2a:e603:a0b9:417d prefixlen 64 scopeid 0x20<link>
inet6 fe80::e9b4:7ddd:b4a9:ecf5 prefixlen 64 scopeid 0x20<link>
inet6 fe80::75c8:b33c:7d85:2caf prefixlen 64 scopeid 0x20<link>
ether 00:0c:29:8f:b0:58 txqueuelen 1000 (Ethernet)
RX packets 56249 bytes 17662585 (16.8 MiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 10836 bytes 1071954 (1.0 MiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
lo: flags=73<UP,LOOPBACK,RUNNING> mtu 65536
inet 127.0.0.1 netmask 255.0.0.0
inet6 ::1 prefixlen 128 scopeid 0x10<host>
loop txqueuelen 1000 (Local Loopback)
RX packets 72 bytes 6256 (6.1 KiB)
RX errors 0 dropped 0 overruns 0 frame 0
TX packets 72 bytes 6256 (6.1 KiB)
TX errors 0 dropped 0 overruns 0 carrier 0 collisions 0
- 备机:内核已经配置
[root@localhost keepalived]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.10.100:80 rr
-> 192.168.10.12:80 Route 1 0 0
-> 192.168.10.13:80 Route 1 0 0
2. HA
1. LVS_master挂掉
# 主机物理网卡down掉。vip漂移到node4
ifconfig ens33 down
# xshell已经不能连,直接VMware连
ifconfig ens33 up
- keepalived主机修复,抢回主机位,备机回到备机位。主备模式,主修复了不一定会抢回主机位
- 参考成本复杂度。某些主备切换是要停机,拷贝同步数据
2. RS挂掉
- RS挂掉,主、备机ipvsadm踢掉挂掉的RS。RS恢复,ipvsadm重新添加回来
[root@localhost ~]# service httpd stop
Redirecting to /bin/systemctl stop httpd.service
[root@localhost ~]# ipvsadm -ln
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Scheduler Flags
-> RemoteAddress:Port Forward Weight ActiveConn InActConn
TCP 192.168.10.100:80 rr
-> 192.168.10.13:80 Route 1 1 1
5. keepalived异常退出
- 引入zookeeper集群,来保证HA
- java要开线程,linux是开进程
# 两个子进程负责,2个RS健康检查
[root@localhost ~]# ps -ef | grep keep
root 10266 1 0 08:54 ? 00:00:00 /usr/sbin/keepalived -D # 主进程
root 10267 10266 0 08:54 ? 00:00:00 /usr/sbin/keepalived -D # 子进程
root 10268 10266 0 08:54 ? 00:00:00 /usr/sbin/keepalived -D
root 10433 10411 0 09:10 pts/1 00:00:00 grep --color=auto keep
# 1. 强制退出,为异常退出。资源都没有回收
# vip没有收回,内核配置也没有收回
kill -9 10266
kill -9 10267
kill -9 10268
# 2. 备机。vip创建和主机vip冲突。造成丢包
systemctl restart keepalived
6. man
- linux帮助程序man。可以看程序命令,也可以看配置文件
man 5 keepalived.conf
# 查找
/virtual_ipaddress