Turn into replica on SETSLOT (#10489)

* Fix race condition where node loses its last slot and turns into replica

When a node has lost its last slot and finds out from the SETSLOT command
before the cluster bus PONG from the new owner arrives. In this case, the
node didn't turn itself into a replica of the new slot owner.

This commit adds the same logic to the SETSLOT command as already exists
for the cluster bus PONG processing.

* Revert "Fix new / failing cluster slot migration test (#10482)"

This reverts commit 0b21ef8d49.

In this test, the old slot owner finds out that it has lost its last
slot in a nondeterministic way. Either the cluster bus PONG from the
new slot owner and sometimes in a SETSLOT command from redis-cli. In
both cases, the result should be the same and the old owner should
turn itself into a replica of the new slot owner.
This commit is contained in:
Viktor Söderqvist 2022-04-02 23:58:07 +02:00 committed by GitHub
parent b8eb2a7340
commit b53c7f2c0b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 40 additions and 3 deletions

View File

@ -1899,6 +1899,17 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE|
CLUSTER_TODO_FSYNC_CONFIG);
} else if (myself->slaveof && myself->slaveof->slaveof) {
/* Safeguard against sub-replicas. A replica's master can turn itself
* into a replica if its last slot is removed. If no other node takes
* over the slot, there is nothing else to trigger replica migration. */
serverLog(LL_WARNING,
"I'm a sub-replica! Reconfiguring myself as a replica of grandmaster %.40s",
myself->slaveof->slaveof->name);
clusterSetMaster(myself->slaveof->slaveof);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG|
CLUSTER_TODO_UPDATE_STATE|
CLUSTER_TODO_FSYNC_CONFIG);
} else if (dirty_slots_count) {
/* If we are here, we received an update message which removed
* ownership for certain slots we still have keys about, but still
@ -5403,9 +5414,26 @@ NULL
server.cluster->migrating_slots_to[slot])
server.cluster->migrating_slots_to[slot] = NULL;
int slot_was_mine = server.cluster->slots[slot] == myself;
clusterDelSlot(slot);
clusterAddSlot(n,slot);
/* If we are a master left without slots, we should turn into a
* replica of the new master. */
if (slot_was_mine &&
n != myself &&
myself->numslots == 0 &&
server.cluster_allow_replica_migration)
{
serverLog(LL_WARNING,
"Configuration change detected. Reconfiguring myself "
"as a replica of %.40s", n->name);
clusterSetMaster(n);
clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG |
CLUSTER_TODO_UPDATE_STATE |
CLUSTER_TODO_FSYNC_CONFIG);
}
/* If this node was importing this slot, assigning the slot to
* itself also clears the importing status. */
if (n == myself &&

View File

@ -45,11 +45,12 @@ test "Resharding all the master #0 slots away from it" {
}
test "Master #0 should lose its replicas" {
test "Master #0 who lost all slots should turn into a replica without replicas" {
wait_for_condition 1000 50 {
[llength [lindex [R 0 role] 2]] == 0
[RI 0 role] == "slave" && [RI 0 connected_slaves] == 0
} else {
fail "Master #0 still has replicas"
puts [R 0 info replication]
fail "Master #0 didn't turn itself into a replica"
}
}

View File

@ -300,6 +300,14 @@ test {Migrate the last slot away from a node using redis-cli} {
# Check that the key foo has been migrated back to the original owner.
catch { $newnode_r get foo } e
assert_equal "MOVED $slot $owner_host:$owner_port" $e
# Check that the empty node has turned itself into a replica of the new
# owner and that the new owner knows that.
wait_for_condition 1000 50 {
[string match "*slave*" [$owner_r CLUSTER REPLICAS $owner_id]]
} else {
fail "Empty node didn't turn itself into a replica."
}
}
}