mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 16:18:28 -05:00
Sentinel: more robust failover detection as observer.
Sentinel observers detect failover checking if a slave attached to the monitored master turns into its replication state from slave to master. However while this change may in theory only happen after a SLAVEOF NO ONE command, in practie it is very easy to reboot a slave instance with a wrong configuration that turns it into a master, especially if it was a past master before a successfull failover. This commit changes the detection policy so that if an instance goes from slave to master, but at the same time the runid has changed, we sense a reboot, and in that case we don't detect a failover at all. This commit also introduces the "reboot" sentinel event, that is logged at "warning" level (so this will trigger an admin notification). The commit also fixes a problem in the disconnect handler that assumed that the instance object always existed, that is not the case. Now we no longer assume that redisAsyncFree() will call the disconnection handler before returning.
This commit is contained in:
parent
1bbdf1709f
commit
d876d6feac
@ -598,10 +598,12 @@ void releaseSentinelRedisInstance(sentinelRedisInstance *ri) {
|
||||
/* Release hiredis connections. Note that redisAsyncFree() will call
|
||||
* the disconnection callback. */
|
||||
if (ri->cc) {
|
||||
ri->cc->data = NULL;
|
||||
redisAsyncFree(ri->cc);
|
||||
ri->cc = NULL;
|
||||
}
|
||||
if (ri->pc) {
|
||||
ri->pc->data = NULL;
|
||||
redisAsyncFree(ri->pc);
|
||||
ri->pc = NULL;
|
||||
}
|
||||
@ -865,8 +867,11 @@ char *sentinelHandleConfiguration(char **argv, int argc) {
|
||||
* for async conenctions. */
|
||||
void sentinelDisconnectInstanceFromContext(const redisAsyncContext *c) {
|
||||
sentinelRedisInstance *ri = c->data;
|
||||
int pubsub = (ri->pc == c);
|
||||
int pubsub;
|
||||
|
||||
if (ri == NULL) return; /* The instance no longer exists. */
|
||||
|
||||
pubsub = (ri->pc == c);
|
||||
sentinelEvent(REDIS_DEBUG, pubsub ? "-pubsub-link" : "-cmd-link", ri,
|
||||
"%@ #%s", c->errstr);
|
||||
if (pubsub)
|
||||
@ -961,7 +966,8 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
|
||||
sds *lines;
|
||||
int numlines, j;
|
||||
int role = 0;
|
||||
|
||||
int runid_changed = 0; /* true if runid changed. */
|
||||
int first_runid = 0; /* true if this is the first runid we receive. */
|
||||
|
||||
/* The following fields must be reset to a given value in the case they
|
||||
* are not found at all in the INFO output. */
|
||||
@ -977,10 +983,14 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
|
||||
if (sdslen(l) >= 47 && !memcmp(l,"run_id:",7)) {
|
||||
if (ri->runid == NULL) {
|
||||
ri->runid = sdsnewlen(l+7,40);
|
||||
first_runid = 1;
|
||||
} else {
|
||||
/* TODO: check if run_id has changed. This means the
|
||||
* instance has been restarted, we want to set a flag
|
||||
* and notify this event. */
|
||||
if (strncmp(ri->runid,l+7,40) != 0) {
|
||||
runid_changed = 1;
|
||||
sentinelEvent(REDIS_NOTICE,"+reboot",ri,"%@");
|
||||
sdsfree(ri->runid);
|
||||
ri->runid = sdsnewlen(l+7,40);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1048,7 +1058,22 @@ void sentinelRefreshInstanceInfo(sentinelRedisInstance *ri, const char *info) {
|
||||
|
||||
/* Act if a slave turned into a master. */
|
||||
if ((ri->flags & SRI_SLAVE) && role == SRI_MASTER) {
|
||||
if (ri->flags & SRI_PROMOTED) {
|
||||
if (!(ri->master->flags & SRI_FAILOVER_IN_PROGRESS) &&
|
||||
(runid_changed || first_runid))
|
||||
{
|
||||
int retval;
|
||||
|
||||
/* If a slave turned into a master, but at the same time the
|
||||
* runid has changed, or it is simply the first time we see and
|
||||
* INFO output from this instance, this is a reboot with a wrong
|
||||
* configuration.
|
||||
*
|
||||
* Log the event and remove the slave. */
|
||||
sentinelEvent(REDIS_WARNING,"-slave-restart-as-master",ri,"%@ #removing it from the attached slaves");
|
||||
retval = dictDelete(ri->master->slaves,ri->name);
|
||||
redisAssert(retval == REDIS_OK);
|
||||
return;
|
||||
} else if (ri->flags & SRI_PROMOTED) {
|
||||
/* If this is a promoted slave we can change state to the
|
||||
* failover state machine. */
|
||||
if (ri->master &&
|
||||
|
Loading…
Reference in New Issue
Block a user