Redis Sentinel集群双机房容灾实施步骤
概要目标
防止双机房情况下任一个机房完全无法提供服务时如何让Redis继续提供服务。
架构设计
A、B两机房,其中A机房有一Master一Slave和两个Sentinel,B机房只有2个Sentinel,如下图。
初始规划
A机房
192.168.71.213 S+哨兵
192.168.71.214 M+哨兵
B机房
192.168.70.214 S
192.168.70.215 S
目录创建
--redis软件目录
mkdir -p /home/redis
--pidfile文件存放目录
mkdir -p /home/redis/redisrun/
解压redis截止到 /home/redis
集群配置
【Master】
选择71.214作为Master
[root@node-71 redis]# vi /home/redis/redis.conf
#后台启动
daemonize yes
pidfile "/home/redis/redisrun/redis_6379.pid"
port 6379
timeout 0
tcp-keepalive 0
loglevel notice
logfile "/home/redis/redis.log"
databases 16
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename "dump.rdb"
dir "/home/redis/redisdb"
#如果做故障切换,不论主从节点都要填写密码且要保持一致
masterauth "123456"
slave-serve-stale-data yes
slave-read-only yes
repl-disable-tcp-nodelay no
slave-priority 98
#当前redis密码
requirepass "123456"
appendonly yes
# appendfsync always
appendfsync everysec
# appendfsync no
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
lua-time-limit 5000
slowlog-log-slower-than 10000
slowlog-max-len 128
notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-entries 512
list-max-ziplist-value 64
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
activerehashing yes
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
hz 10
aof-rewrite-incremental-fsync yes
# Generated by CONFIG REWRITE
【Slave】
选择其余3个几点作为Slave
[root@node-71 redis]# vi /home/redis/redis.conf
daemonize yes
pidfile "/home/redis/redisrun/redis_6379.pid"
port 6379
timeout 0
tcp-keepalive 0
loglevel notice
logfile "/home/redis/redis.log"
databases 16
save 900 1
save 300 10
save 60 10000
stop-writes-on-bgsave-error yes
rdbcompression yes
rdbchecksum yes
dbfilename "dump.rdb"
dir "/home/redis/redisdb"
#主节点密码
masterauth "123456"
slave-serve-stale-data yes
slave-read-only yes
repl-disable-tcp-nodelay no
slave-priority 98
requirepass "123456"
appendonly yes
# appendfsync always
appendfsync everysec
# appendfsync no
no-appendfsync-on-rewrite no
auto-aof-rewrite-percentage 100
auto-aof-rewrite-min-size 64mb
lua-time-limit 5000
slowlog-log-slower-than 10000
slowlog-max-len 128
notify-keyspace-events ""
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-entries 512
list-max-ziplist-value 64
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
activerehashing yes
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
hz 10
aof-rewrite-incremental-fsync yes
# Generated by CONFIG REWRITE
#配置主节点信息
slaveof 192.168.71.214 6379
--检查修正
daemonize yes
pidfile "/home/redis/redisrun//redis_6379.pid"
logfile "/home/redis/redis.log"
【sentinel.conf】
选择A机房2节点作为sentinel
vi /home/redis/sentinel.conf
port 26379
#1表示在sentinel集群中只要有两个节点检测到redis主节点出故障就进行切换,单sentinel节点无效(自己测试发现的)
#如果3s内mymaster无响应,则认为mymaster宕机了
#如果10秒后,mysater仍没活过来,则启动failover
sentinel monitor mymaster 192.168.71.214 6379 1
sentinel down-after-milliseconds mymaster 3000
sentinel failover-timeout mymaster 10000
daemonize yes
#指定工作目录
dir "/home/redis/sentinel-work"
protected-mode no
logfile "/home/redis/sentinellog/sentinel.log"
#redis主节点密码
sentinel auth-pass mymaster 123456
# Generated by CONFIG REWRITE
--检查修正
sentinel monitor mymaster 192.168.71.214 6379 1
dir "/home/redis/sentinel-work"
logfile "/home/redis/sentinellog/sentinel.log"
启动检查
【启动集群与日志监控】
每个几点都执行
cd /home/redis/src/
./redis-server /home/redis/redis.conf
tail -f /home/redis/redis.log
只在sentinel节点执行
cd /home/redis/src/
./redis-sentinel /home/redis/sentinel.conf
tail -f /home/redis/sentinellog/sentinel.log
【Master检查】
cd /home/redis/src/
[root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456
192.168.70.214:6379> info Replication
# Replication
role:master
connected_slaves:3
slave0:ip=192.168.71.213,port=6379,state=online,offset=1107595,lag=1
slave1:ip=192.168.70.214,port=6379,state=online,offset=1107742,lag=0
slave2:ip=192.168.70.215,port=6379,state=online,offset=1107889,lag=0
master_repl_offset:1107889
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:59314
repl_backlog_histlen:1048576
192.168.70.214:6379> set test zgy
OK
192.168.70.214:6379> get test
"zgy"
192.168.70.214:6379>
【Slave检查,只读】
192.168.71.214:6379> get test
"zgy"
192.168.71.214:6379> set test zgy2
(error) READONLY You can't write against a read only slave.
192.168.71.214:6379> info Replication
# Replication
role:slave
master_host:192.168.70.214
master_port:6379
master_link_status:up
master_last_io_seconds_ago:1
master_sync_in_progress:0
slave_repl_offset:42385
slave_priority:100
slave_read_only:1
connected_slaves:0
master_repl_offset:0
repl_backlog_active:0
repl_backlog_size:1048576
repl_backlog_first_byte_offset:0
repl_backlog_histlen:0
192.168.71.214:6379>
断网断电测试
断网
通过开启防火墙来模拟
service iptables status
--service iptables start
--70网段2节点的防火墙配置
[root@localhost redis]# cat /etc/sysconfig/iptables
# Firewall configuration written by system-config-firewall
# Manual customization of this file is not recommended.
*filter
:INPUT ACCEPT [0:0]
:FORWARD ACCEPT [0:0]
:OUTPUT ACCEPT [0:0]
#屏蔽A机房2个节点
-I INPUT -s 192.168.71.213 -j DROP
-I INPUT -s 192.168.71.214 -j DROP
-A INPUT -m state --state ESTABLISHED,RELATED -j ACCEPT
-A INPUT -p icmp -j ACCEPT
-A INPUT -i lo -j ACCEPT
-A INPUT -m state --state NEW -m tcp -p tcp --dport 22 -j ACCEPT
-A INPUT -j REJECT --reject-with icmp-host-prohibited
-A FORWARD -j REJECT --reject-with icmp-host-prohibited
COMMIT
断网
B机房断网前
--前
192.168.71.214:6379> info Replication
# Replication
role:master
connected_slaves:3
slave0:ip=192.168.71.213,port=6379,state=online,offset=12825868,lag=1
slave1:ip=192.168.70.214,port=6379,state=online,offset=12825868,lag=1
slave2:ip=192.168.70.215,port=6379,state=online,offset=12826015,lag=0
master_repl_offset:12826162
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:11777587
repl_backlog_histlen:1048576
192.168.71.214:6379>
--后
--明显找不到70网段的那2个节点啦
192.168.71.214:6379> info Replication
# Replication
role:master
connected_slaves:1
slave0:ip=192.168.71.213,port=6379,state=online,offset=12909588,lag=1
master_repl_offset:12909588
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:11861013
repl_backlog_histlen:1048576
192.168.71.214:6379>
而Master还能继续对外提供服务
A机房断网前、后
前
192.168.71.214:6379> info Replication
# Replication
role:master
connected_slaves:3
slave0:ip=192.168.71.213,port=6379,state=online,offset=12942691,lag=1
slave1:ip=192.168.70.214,port=6379,state=online,offset=12942691,lag=1
slave2:ip=192.168.70.215,port=6379,state=online,offset=12942838,lag=0
master_repl_offset:12942838
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:11894263
repl_backlog_histlen:1048576
后,出现2个Master??
192.168.71.214:6379> info Replication
# Replication
role:master
connected_slaves:0
master_repl_offset:12957363
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:11908788
repl_backlog_histlen:1048576
192.168.71.214:6379>
192.168.71.213:6379> info replication
# Replication
role:master
connected_slaves:0
master_repl_offset:12943881
repl_backlog_active:0
repl_backlog_size:1048576
repl_backlog_first_byte_offset:0
repl_backlog_histlen:0
192.168.71.213:6379>
断电
通过kill redis进程来模拟
ps -ef|grep redis
断电前
192.168.71.213:6379> info replication
# Replication
role:master
connected_slaves:3
slave0:ip=192.168.70.215,port=6379,state=online,offset=13091227,lag=0
slave1:ip=192.168.70.214,port=6379,state=online,offset=13091227,lag=0
slave2:ip=192.168.71.214,port=6379,state=online,offset=13091080,lag=1
master_repl_offset:13091227
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:13087442
repl_backlog_histlen:3786
192.168.71.214:6379> info Replication
# Replication
role:master
connected_slaves:1
slave0:ip=192.168.71.213,port=6379,state=online,offset=13096642,lag=1
master_repl_offset:13096642
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:13092272
repl_backlog_histlen:4371
192.168.71.214:6379>
断电后
192.168.70.214:6379> info Replication
# Replication
role:slave
master_host:192.168.71.214
master_port:6379
master_link_status:down
master_last_io_seconds_ago:-1
master_sync_in_progress:0
slave_repl_offset:13159324
master_link_down_since_seconds:18
slave_priority:100
slave_read_only:1
connected_slaves:0
master_repl_offset:0
repl_backlog_active:0
repl_backlog_size:1048576
repl_backlog_first_byte_offset:0
repl_backlog_histlen:0
92.168.70.215:6379> info Replication
# Replication
role:slave
master_host:192.168.71.214
master_port:6379
master_link_status:down
master_last_io_seconds_ago:-1
master_sync_in_progress:0
slave_repl_offset:13159324
master_link_down_since_seconds:28
slave_priority:100
slave_read_only:1
connected_slaves:0
master_repl_offset:0
repl_backlog_active:0
repl_backlog_size:1048576
repl_backlog_first_byte_offset:0
repl_backlog_histlen:0
70网段都变成Slave无法正常提供服务了。。。
此时,需要修改其中一个节点的配置来向外提供服务
先Kill掉redis进程,再修改某一节点的redis参数,指向其中一个节点,如70.215,并检查另外一台,删除这一项,最后重启2个节点,对外正常提供服务
vi /home/redis/redis.conf
slaveof 192.168.70.214 6379
[root@localhost src]# ./redis-cli -h 192.168.70.214 -p 6379 -a 123456
192.168.70.214:6379> info Replication
# Replication
role:master
connected_slaves:1
slave0:ip=192.168.70.215,port=6379,state=online,offset=15,lag=1
master_repl_offset:15
repl_backlog_active:1
repl_backlog_size:1048576
repl_backlog_first_byte_offset:2
repl_backlog_histlen:14
192.168.70.214:6379>
【还原初始】
修改71.214 之外的参数
vi /home/redis/redis.conf
slaveof 192.168.71.214 6379
vi /home/redis/sentinel.conf
sentinel monitor mymaster 192.168.71.214 6379 1
并删除最后几行
数据校验
Master执行更新数据会同步Slave
注意事项
见每步后面
Redis Sentinel集群双机房容灾实施步骤的更多相关文章
- redis主从集群搭建及容灾部署(哨兵sentinel)
Redis也用了一段时间了,记录一下相关集群搭建及配置详解,方便后续使用查阅. 提纲 Redis安装 整体架构 Redis主从结构搭建 Redis容灾部署(哨兵sentinel) Redis常见问题 ...
- 主从集群搭建及容灾部署redis
redis主从集群搭建及容灾部署(哨兵sentinel) Redis也用了一段时间了,记录一下相关集群搭建及配置详解,方便后续使用查阅. 提纲 l Redis安装 l 整体架构 l Redis主 ...
- redis sentinel 集群监控 配置
环境: ip 172.16.1.31 26379 redis sentinel ip 172.16.1.30 6379 主 1 ip 172.16.1.31 6380 从 1 ip ...
- redis sentinel集群的搭建
背景说明: 这里采用1主2从的redis集群,3个sentinel搭建高可用redis集群. 一,关于搭建redis-sentinel高可用之前,我们必须要了解redis主从搭建redis-senti ...
- Redis Sentinel 集群安装 step by step
一. 准备材料 服务器 IP address 操作系统 位数 Redis 版本 CNT06CAH05 192.168.3.47 CentOS 6.5 x64 Redis-3.2.6 sentine ...
- Redis Sentinel集群配置中的一些细节
今天在配置Redis集群,用作Tomcat集群的缓存共享.关于Redis集群的配置网上有很多文章,这里只是记录一下我在配置过程中遇到的一些小的细节问题. 1. 关于Protected Mode的问题 ...
- redis sentinel 集群配置-主从切换
1.配置redis master,redis slave(配置具体操作见上文http://www.cnblogs.com/wangchaozhi/p/5140469.html). redis mast ...
- helm安装redis+Sentinel集群搭建
一.redis集群特点 数据 在多个Redis节点之间自动分片 sentinel特点: 它的主要功能有以下几点 不时地监控redis是否按照预期良好地运行; 如果发现某个redis节点运行出现状况,能 ...
- elasticsearch集群扩容和容灾
elasticsearch专栏:https://www.cnblogs.com/hello-shf/category/1550315.html 一.集群健康 Elasticsearch 的集群监控信息 ...
随机推荐
- 基于java的ES开发
3.1 环境配置 Jdk 1.8及以上 Elasticsearch.client 5.5.2(与服务器版本一致) Log4j 2.7及以下 maven工程必要的jar包依赖 <project x ...
- 关于内核转储(core dump)的设置方法
原作者:http://blog.csdn.net/wj_j2ee/article/details/7161586 1. 内核转储作用 (1) 内核转储的最大好处是能够保存问题发生时的状态. (2) 只 ...
- eclipse中javadoc给项目生成api文档
步骤 1.打开java代码,编写JavaDoc 注释,只有按照java的规范编写注释,才能很好的生成API文档,javadoc注释与普通注释的区别为多一个*(星号).普通代码注释为/*XXX*/,而j ...
- HTTP认证方式详解
HTTP请求报头: Authorization HTTP响应报头: WWW-Authenticate HTTP认证 基于 质询 /回应( challenge/response)的认证模式. ◆ ...
- Tiny4412之外部中断
一:外部中断 在之前我们学习按键驱动的时候,我们检测按键有没有按下是通过轮循的方式(也就是我们说的死循环),这样虽然可以检测实现按键,但太浪费系统资源了,不论我们按键中断有没有发生,cpu都要一直进行 ...
- RestTemplate的设置及使用
概述 RestTemplate是spring内置的http请求封装,在使用spring的情况下,http请求直接使用RestTemplate是不错的选择. Rest服务端 使用RestTemplate ...
- 为什么选择Spring Boot作为微服务的入门级微框架
摘要:1. Spring Boot是什么,解决哪些问题 1) Spring Boot使编码变简单 2) Spring Boot使配置变简单 3) Spring Boot使部署变简单 4) Spring ...
- Vue路由学习心得
GoodBoy and GoodGirl~进来了就看完点个赞再离开,写了这么多也不容易的~ 一.介绍 1.概念:路由其实就是指向的意思,当我们点击home按钮时,页面中就要显示home的内容,点击l ...
- jquery开关按钮效果
.circular1{ width: 50px; height: 30px; border-radius: 16px; background-color: #ccc; transition: .3s; ...
- Python爬虫进阶六之多进程的用法
前言 在上一节中介绍了thread多线程库.python中的多线程其实并不是真正的多线程,并不能做到充分利用多核CPU资源. 如果想要充分利用,在python中大部分情况需要使用多进程,那么这个包就叫 ...