From 42f5da5d2d373b1a016d725b2b7111288949a28e Mon Sep 17 00:00:00 2001 From: Qu Chen Date: Thu, 21 May 2020 18:39:38 -0700 Subject: [PATCH] Disconnect chained replicas when the replica performs PSYNC with the master always to avoid replication offset mismatch between master and chained replicas. --- src/replication.c | 13 ++++++-- tests/integration/psync2-pingoff.tcl | 50 ++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index b10635f25..7eb161f7d 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2023,12 +2023,19 @@ int slaveTryPartialResynchronization(connection *conn, int read_reply) { * new one. */ memcpy(server.replid,new,sizeof(server.replid)); memcpy(server.cached_master->replid,new,sizeof(server.replid)); - - /* Disconnect all the sub-slaves: they need to be notified. */ - disconnectSlaves(); } } + /* Disconnect all the sub-replicas: they need to be notified always because + * in case the master has last replicated some non-meaningful commands + * (e.g. PINGs), the primary replica will request the PSYNC offset for the + * last known meaningful command. This means the master will again replicate + * the non-meaningful commands. If the sub-replicas still remains connected, + * they will receive those commands a second time and increment the master + * replication offset to be higher than the master's offset forever. + */ + disconnectSlaves(); + /* Setup the replication to continue. */ sdsfree(reply); replicationResurrectCachedMaster(conn); diff --git a/tests/integration/psync2-pingoff.tcl b/tests/integration/psync2-pingoff.tcl index 420747d21..2c2303141 100644 --- a/tests/integration/psync2-pingoff.tcl +++ b/tests/integration/psync2-pingoff.tcl @@ -60,3 +60,53 @@ start_server {} { } } }} + + +start_server {tags {"psync2"}} { +start_server {} { +start_server {} { + + for {set j 0} {$j < 3} {incr j} { + set R($j) [srv [expr 0-$j] client] + set R_host($j) [srv [expr 0-$j] host] + set R_port($j) [srv [expr 0-$j] port] + $R($j) CONFIG SET repl-ping-replica-period 1 + } + + test "Chained replicas disconnect when replica re-connect with the same master" { + # Add a second replica as a chained replica of the current replica + $R(1) replicaof $R_host(0) $R_port(0) + $R(2) replicaof $R_host(1) $R_port(1) + wait_for_condition 50 1000 { + [status $R(2) master_link_status] == "up" + } else { + fail "Chained replica not replicating from its master" + } + + # Do a write on the master, and wait for 3 seconds for the master to + # send some PINGs to its replica + $R(0) INCR counter2 + after 2000 + set sync_partial_master [status $R(0) sync_partial_ok] + set sync_partial_replica [status $R(1) sync_partial_ok] + $R(0) CONFIG SET repl-ping-replica-period 100 + + # Disconnect the master's direct replica + $R(0) client kill type replica + wait_for_condition 50 1000 { + [status $R(1) master_link_status] == "up" && + [status $R(2) master_link_status] == "up" && + [status $R(0) sync_partial_ok] == $sync_partial_master + 1 && + [status $R(1) sync_partial_ok] == $sync_partial_replica + 1 + } else { + fail "Disconnected replica failed to PSYNC with master" + } + + # Verify that the replica and its replica's meaningful and real + # offsets match with the master + assert_equal [status $R(0) master_repl_offset] [status $R(1) master_repl_offset] + assert_equal [status $R(0) master_repl_offset] [status $R(2) master_repl_offset] + assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(1) master_repl_meaningful_offset] + assert_equal [status $R(0) master_repl_meaningful_offset] [status $R(2) master_repl_meaningful_offset] + } +}}}