mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-23 00:28:26 -05:00
Sentinel: more aggressive failover start desynchronization.
Sentinel needs to avoid split brain conditions due to multiple sentinels trying to get voted at the exact same time. So far some desynchronization was provided by fluctuating server.hz, that is the frequency of the timer function call. However the desynchonization provided in this way was not enough when using many Sentinel instances, especially when a large quorum value is used in order to force a greater degree of agreement (more than N/2+1). It was verified that it was likely to trigger a split brain condition, forcing the system to try again after a timeout. Usually the system will succeed after a few retries, but this is not optimal. This commit desynchronizes instances in a more effective way to make it likely that the first attempt will be successful.
This commit is contained in:
parent
08da025f56
commit
47750998a6
@ -84,6 +84,7 @@ typedef struct sentinelAddr {
|
||||
#define SENTINEL_DEFAULT_FAILOVER_TIMEOUT (60*3*1000)
|
||||
#define SENTINEL_MAX_PENDING_COMMANDS 100
|
||||
#define SENTINEL_ELECTION_TIMEOUT 10000
|
||||
#define SENTINEL_MAX_DESYNC 1000
|
||||
|
||||
/* Failover machine different states. */
|
||||
#define SENTINEL_FAILOVER_STATE_NONE 0 /* No failover in progress. */
|
||||
@ -2943,7 +2944,7 @@ char *sentinelVoteLeader(sentinelRedisInstance *master, uint64_t req_epoch, char
|
||||
* time to now, in order to force a delay before we can start a
|
||||
* failover for the same master. */
|
||||
if (strcasecmp(master->leader,server.runid))
|
||||
master->failover_start_time = mstime();
|
||||
master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
|
||||
}
|
||||
|
||||
*leader_epoch = master->leader_epoch;
|
||||
@ -3088,7 +3089,7 @@ void sentinelStartFailover(sentinelRedisInstance *master) {
|
||||
sentinelEvent(REDIS_WARNING,"+new-epoch",master,"%llu",
|
||||
(unsigned long long) sentinel.current_epoch);
|
||||
sentinelEvent(REDIS_WARNING,"+try-failover",master,"%@");
|
||||
master->failover_start_time = mstime();
|
||||
master->failover_start_time = mstime()+rand()%SENTINEL_MAX_DESYNC;
|
||||
master->failover_state_change_time = mstime();
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user