mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-23 00:28:26 -05:00
Cluster: PFAIL -> FAIL transition allowed for slaves.
First change: now there is no need to be a master in order to detect a failure, however the majority of masters signaling PFAIL or FAIL is needed. This change is important because it allows slaves rejoining the cluster after a partition to sense the FAIL condition so that eventually all the nodes agree on failures.
This commit is contained in:
parent
925ea9f858
commit
3c9bb8751a
@ -594,25 +594,36 @@ void clusterRenameNode(clusterNode *node, char *newname) {
|
|||||||
/* This function checks if a given node should be marked as FAIL.
|
/* This function checks if a given node should be marked as FAIL.
|
||||||
* It happens if the following conditions are met:
|
* It happens if the following conditions are met:
|
||||||
*
|
*
|
||||||
* 1) We are a master node. Only master nodes can mark a node as failing.
|
* 1) We received enough failure reports from other master nodes via gossip.
|
||||||
* 2) We received enough failure reports from other nodes via gossip.
|
* Enough means that the majority of the masters signaled the node is
|
||||||
* Enough means that the majority of the masters believe the node is
|
* down recently.
|
||||||
* down.
|
* 2) We believe this node is in PFAIL state.
|
||||||
* 3) We believe this node is in PFAIL state.
|
|
||||||
*
|
*
|
||||||
* If a failure is detected we also inform the whole cluster about this
|
* If a failure is detected we also inform the whole cluster about this
|
||||||
* event trying to force every other node to set the FAIL flag for the node.
|
* event trying to force every other node to set the FAIL flag for the node.
|
||||||
|
*
|
||||||
|
* Note that the form of agreement used here is weak, as we collect the majority
|
||||||
|
* of masters state during some time, and even if we force agreement by
|
||||||
|
* propagating the FAIL message, because of partitions we may not reach every
|
||||||
|
* node. However:
|
||||||
|
*
|
||||||
|
* 1) Either we reach the majority and eventually the FAIL state will propagate
|
||||||
|
* to all the cluster.
|
||||||
|
* 2) Or there is no majority so no slave promotion will be authorized and the
|
||||||
|
* FAIL flag will be cleared after some time.
|
||||||
*/
|
*/
|
||||||
void markNodeAsFailingIfNeeded(clusterNode *node) {
|
void markNodeAsFailingIfNeeded(clusterNode *node) {
|
||||||
int failures;
|
int failures;
|
||||||
int needed_quorum = (server.cluster->size / 2) + 1;
|
int needed_quorum = (server.cluster->size / 2) + 1;
|
||||||
|
|
||||||
if (!(server.cluster->myself->flags & REDIS_NODE_MASTER)) return;
|
|
||||||
if (!(node->flags & REDIS_NODE_PFAIL)) return; /* We can reach it. */
|
if (!(node->flags & REDIS_NODE_PFAIL)) return; /* We can reach it. */
|
||||||
if (node->flags & REDIS_NODE_FAIL) return; /* Already FAILing. */
|
if (node->flags & REDIS_NODE_FAIL) return; /* Already FAILing. */
|
||||||
|
|
||||||
failures = 1 + clusterNodeFailureReportsCount(node); /* +1 is for myself. */
|
failures = clusterNodeFailureReportsCount(node);
|
||||||
if (failures < needed_quorum) return;
|
/* Also count myself as a voter if I'm a master. */
|
||||||
|
if (server.cluster->myself->flags & REDIS_NODE_MASTER)
|
||||||
|
failures += 1;
|
||||||
|
if (failures < needed_quorum) return; /* No weak agreement from masters. */
|
||||||
|
|
||||||
redisLog(REDIS_NOTICE,
|
redisLog(REDIS_NOTICE,
|
||||||
"Marking node %.40s as failing (quorum reached).", node->name);
|
"Marking node %.40s as failing (quorum reached).", node->name);
|
||||||
@ -622,8 +633,10 @@ void markNodeAsFailingIfNeeded(clusterNode *node) {
|
|||||||
node->flags |= REDIS_NODE_FAIL;
|
node->flags |= REDIS_NODE_FAIL;
|
||||||
node->fail_time = time(NULL);
|
node->fail_time = time(NULL);
|
||||||
|
|
||||||
/* Broadcast the failing node name to everybody */
|
/* Broadcast the failing node name to everybody, forcing all the other
|
||||||
clusterSendFail(node->name);
|
* reachable nodes to flag the node as FAIL. */
|
||||||
|
if (server.cluster->myself->flags & REDIS_NODE_MASTER)
|
||||||
|
clusterSendFail(node->name);
|
||||||
clusterUpdateState();
|
clusterUpdateState();
|
||||||
clusterSaveConfigOrDie();
|
clusterSaveConfigOrDie();
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user