2020-05-25 18:37:05 +02:00
|
|
|
|
|
|
|
proc show_cluster_status {} {
|
|
|
|
uplevel 1 {
|
|
|
|
# The following is the regexp we use to match the log line
|
|
|
|
# time info. Logs are in the following form:
|
|
|
|
#
|
|
|
|
# 11296:M 25 May 2020 17:37:14.652 # Server initialized
|
|
|
|
set log_regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*}
|
|
|
|
set repl_regexp {(master|repl|sync|backlog|meaningful|offset)}
|
|
|
|
|
|
|
|
puts "Master ID is $master_id"
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
puts "$j: sync_full: [status $R($j) sync_full]"
|
|
|
|
puts "$j: id1 : [status $R($j) master_replid]:[status $R($j) master_repl_offset]"
|
|
|
|
puts "$j: id2 : [status $R($j) master_replid2]:[status $R($j) second_repl_offset]"
|
|
|
|
puts "$j: backlog : firstbyte=[status $R($j) repl_backlog_first_byte_offset] len=[status $R($j) repl_backlog_histlen]"
|
|
|
|
puts "$j: x var is : [$R($j) GET x]"
|
|
|
|
puts "---"
|
|
|
|
}
|
|
|
|
|
|
|
|
# Show the replication logs of every instance, interleaving
|
|
|
|
# them by the log date.
|
|
|
|
#
|
|
|
|
# First: load the lines as lists for each instance.
|
|
|
|
array set log {}
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
set fd [open $R_log($j)]
|
|
|
|
while {[gets $fd l] >= 0} {
|
|
|
|
if {[regexp $log_regexp $l] &&
|
|
|
|
[regexp -nocase $repl_regexp $l]} {
|
|
|
|
lappend log($j) $l
|
|
|
|
}
|
|
|
|
}
|
|
|
|
close $fd
|
|
|
|
}
|
|
|
|
|
|
|
|
# To interleave the lines, at every step consume the element of
|
|
|
|
# the list with the lowest time and remove it. Do it until
|
|
|
|
# all the lists are empty.
|
|
|
|
#
|
|
|
|
# regexp {^[0-9]+:[A-Z] [0-9]+ [A-z]+ [0-9]+ ([0-9:.]+) .*} $l - logdate
|
|
|
|
while 1 {
|
|
|
|
# Find the log with smallest time.
|
|
|
|
set empty 0
|
|
|
|
set best 0
|
|
|
|
set bestdate {}
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
if {[llength $log($j)] == 0} {
|
|
|
|
incr empty
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
regexp $log_regexp [lindex $log($j) 0] - date
|
|
|
|
if {$bestdate eq {}} {
|
|
|
|
set best $j
|
|
|
|
set bestdate $date
|
|
|
|
} else {
|
|
|
|
if {[string compare $bestdate $date] > 0} {
|
|
|
|
set best $j
|
|
|
|
set bestdate $date
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if {$empty == 5} break ; # Our exit condition: no more logs
|
|
|
|
|
|
|
|
# Emit the one with the smallest time (that is the first
|
|
|
|
# event in the time line).
|
|
|
|
puts "\[$best port $R_port($best)\] [lindex $log($best) 0]"
|
|
|
|
set log($best) [lrange $log($best) 1 end]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-09 15:13:24 +03:00
|
|
|
start_server {tags {"psync2 external:skip"}} {
|
2016-11-28 10:13:24 +01:00
|
|
|
start_server {} {
|
|
|
|
start_server {} {
|
|
|
|
start_server {} {
|
|
|
|
start_server {} {
|
|
|
|
set master_id 0 ; # Current master
|
|
|
|
set start_time [clock seconds] ; # Test start time
|
|
|
|
set counter_value 0 ; # Current value of the Redis counter "x"
|
|
|
|
|
|
|
|
# Config
|
2016-11-29 10:22:40 +01:00
|
|
|
set debug_msg 0 ; # Enable additional debug messages
|
|
|
|
|
2017-11-30 18:37:07 +01:00
|
|
|
set no_exit 0 ; # Do not exit at end of the test
|
2016-11-29 10:22:40 +01:00
|
|
|
|
2020-05-16 18:03:28 +02:00
|
|
|
set duration 40 ; # Total test seconds
|
2016-11-28 10:13:24 +01:00
|
|
|
|
|
|
|
set genload 1 ; # Load master with writes at every cycle
|
|
|
|
|
|
|
|
set genload_time 5000 ; # Writes duration time in ms
|
|
|
|
|
|
|
|
set disconnect 1 ; # Break replication link between random
|
|
|
|
# master and slave instances while the
|
|
|
|
# master is loaded with writes.
|
|
|
|
|
|
|
|
set disconnect_period 1000 ; # Disconnect repl link every N ms.
|
|
|
|
|
2016-11-29 10:22:40 +01:00
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
set R($j) [srv [expr 0-$j] client]
|
|
|
|
set R_host($j) [srv [expr 0-$j] host]
|
|
|
|
set R_port($j) [srv [expr 0-$j] port]
|
2021-06-30 14:18:10 +08:00
|
|
|
set R_id_from_port($R_port($j)) $j ;# To get a replica index by port
|
2020-05-25 18:37:05 +02:00
|
|
|
set R_log($j) [srv [expr 0-$j] stdout]
|
2016-11-29 10:22:40 +01:00
|
|
|
if {$debug_msg} {puts "Log file: [srv [expr 0-$j] stdout]"}
|
|
|
|
}
|
|
|
|
|
Make sure execute SLAVEOF command in the right order in psync2 test. (#9316)
The psync2 test has failed several times recently.
In #9159 we only solved half of the problem.
i.e. reordering of the replica that's already connected to
the newly promoted master.
Consider this scenario:
0 slaveof 2
1 slaveof 2
3 slaveof 2
4 slaveof 1
0 slaveof no one, became a new master got a new replid
2 slaveof 0, partial resync and got the new replid
3 reconnect 2, inherit the new replid
3 slaveof 4, use the new replid and got a full resync
And another scenario:
1 slaveof 3
2 slaveof 4
3 slaveof 0
4 slaveof 0
4 slaveof no one, became a new master got a new replid
2 reconnect 4, inherit the new replid
2 slaveof 1, use the new replid and got a full resync
So maybe we should reattach replicas in the right order.
i.e. In the above example, if it would have reattached 1, 3 and 0 to
the new chain formed by 4 before trying to attach 2 to 1, it would succeed.
This commit break the SLAVEOF loop into two loops. (ideas from oran)
First loop that uses random to decide who replicates from who.
Second loop that does the actual SLAVEOF command.
In the second loop, we make sure to execute it in the right order,
and after each SLAVEOF, wait for it to be connected before we proceed.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-08-05 16:26:09 +08:00
|
|
|
set cycle 0
|
2016-11-28 10:13:24 +01:00
|
|
|
while {([clock seconds]-$start_time) < $duration} {
|
2018-07-31 17:28:30 +02:00
|
|
|
incr cycle
|
Make sure execute SLAVEOF command in the right order in psync2 test. (#9316)
The psync2 test has failed several times recently.
In #9159 we only solved half of the problem.
i.e. reordering of the replica that's already connected to
the newly promoted master.
Consider this scenario:
0 slaveof 2
1 slaveof 2
3 slaveof 2
4 slaveof 1
0 slaveof no one, became a new master got a new replid
2 slaveof 0, partial resync and got the new replid
3 reconnect 2, inherit the new replid
3 slaveof 4, use the new replid and got a full resync
And another scenario:
1 slaveof 3
2 slaveof 4
3 slaveof 0
4 slaveof 0
4 slaveof no one, became a new master got a new replid
2 reconnect 4, inherit the new replid
2 slaveof 1, use the new replid and got a full resync
So maybe we should reattach replicas in the right order.
i.e. In the above example, if it would have reattached 1, 3 and 0 to
the new chain formed by 4 before trying to attach 2 to 1, it would succeed.
This commit break the SLAVEOF loop into two loops. (ideas from oran)
First loop that uses random to decide who replicates from who.
Second loop that does the actual SLAVEOF command.
In the second loop, we make sure to execute it in the right order,
and after each SLAVEOF, wait for it to be connected before we proceed.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-08-05 16:26:09 +08:00
|
|
|
test "PSYNC2: --- CYCLE $cycle ---" {}
|
2016-11-28 10:13:24 +01:00
|
|
|
|
|
|
|
# Create a random replication layout.
|
|
|
|
# Start with switching master (this simulates a failover).
|
|
|
|
|
|
|
|
# 1) Select the new master.
|
|
|
|
set master_id [randomInt 5]
|
|
|
|
set used [list $master_id]
|
|
|
|
test "PSYNC2: \[NEW LAYOUT\] Set #$master_id as master" {
|
|
|
|
$R($master_id) slaveof no one
|
2021-06-10 20:39:33 +08:00
|
|
|
$R($master_id) config set repl-ping-replica-period 1 ;# increase the chance that random ping will cause issues
|
2016-11-28 10:13:24 +01:00
|
|
|
if {$counter_value == 0} {
|
|
|
|
$R($master_id) set x $counter_value
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-30 14:18:10 +08:00
|
|
|
# Build a lookup with the root master of each replica (head of the chain).
|
|
|
|
array set root_master {}
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
set r $j
|
|
|
|
while {1} {
|
|
|
|
set r_master_port [status $R($r) master_port]
|
|
|
|
if {$r_master_port == ""} {
|
|
|
|
set root_master($j) $r
|
|
|
|
break
|
|
|
|
}
|
|
|
|
set r_master_id $R_id_from_port($r_master_port)
|
|
|
|
set r $r_master_id
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Wait for the newly detached master-replica chain (new master and existing replicas that were
|
|
|
|
# already connected to it, to get updated on the new replication id.
|
|
|
|
# This is needed to avoid a race that can result in a full sync when a replica that already
|
|
|
|
# got an updated repl id, tries to psync from one that's not yet aware of it.
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
([status $R(0) master_replid] == [status $R($root_master(0)) master_replid]) &&
|
|
|
|
([status $R(1) master_replid] == [status $R($root_master(1)) master_replid]) &&
|
|
|
|
([status $R(2) master_replid] == [status $R($root_master(2)) master_replid]) &&
|
|
|
|
([status $R(3) master_replid] == [status $R($root_master(3)) master_replid]) &&
|
|
|
|
([status $R(4) master_replid] == [status $R($root_master(4)) master_replid])
|
|
|
|
} else {
|
|
|
|
show_cluster_status
|
|
|
|
fail "Replica did not inherit the new replid."
|
|
|
|
}
|
|
|
|
|
Make sure execute SLAVEOF command in the right order in psync2 test. (#9316)
The psync2 test has failed several times recently.
In #9159 we only solved half of the problem.
i.e. reordering of the replica that's already connected to
the newly promoted master.
Consider this scenario:
0 slaveof 2
1 slaveof 2
3 slaveof 2
4 slaveof 1
0 slaveof no one, became a new master got a new replid
2 slaveof 0, partial resync and got the new replid
3 reconnect 2, inherit the new replid
3 slaveof 4, use the new replid and got a full resync
And another scenario:
1 slaveof 3
2 slaveof 4
3 slaveof 0
4 slaveof 0
4 slaveof no one, became a new master got a new replid
2 reconnect 4, inherit the new replid
2 slaveof 1, use the new replid and got a full resync
So maybe we should reattach replicas in the right order.
i.e. In the above example, if it would have reattached 1, 3 and 0 to
the new chain formed by 4 before trying to attach 2 to 1, it would succeed.
This commit break the SLAVEOF loop into two loops. (ideas from oran)
First loop that uses random to decide who replicates from who.
Second loop that does the actual SLAVEOF command.
In the second loop, we make sure to execute it in the right order,
and after each SLAVEOF, wait for it to be connected before we proceed.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-08-05 16:26:09 +08:00
|
|
|
# Build a lookup with the direct connection master of each replica.
|
|
|
|
# First loop that uses random to decide who replicates from who.
|
|
|
|
array set slave_to_master {}
|
2016-11-28 10:13:24 +01:00
|
|
|
while {[llength $used] != 5} {
|
|
|
|
while 1 {
|
|
|
|
set slave_id [randomInt 5]
|
|
|
|
if {[lsearch -exact $used $slave_id] == -1} break
|
|
|
|
}
|
|
|
|
set rand [randomInt [llength $used]]
|
|
|
|
set mid [lindex $used $rand]
|
Make sure execute SLAVEOF command in the right order in psync2 test. (#9316)
The psync2 test has failed several times recently.
In #9159 we only solved half of the problem.
i.e. reordering of the replica that's already connected to
the newly promoted master.
Consider this scenario:
0 slaveof 2
1 slaveof 2
3 slaveof 2
4 slaveof 1
0 slaveof no one, became a new master got a new replid
2 slaveof 0, partial resync and got the new replid
3 reconnect 2, inherit the new replid
3 slaveof 4, use the new replid and got a full resync
And another scenario:
1 slaveof 3
2 slaveof 4
3 slaveof 0
4 slaveof 0
4 slaveof no one, became a new master got a new replid
2 reconnect 4, inherit the new replid
2 slaveof 1, use the new replid and got a full resync
So maybe we should reattach replicas in the right order.
i.e. In the above example, if it would have reattached 1, 3 and 0 to
the new chain formed by 4 before trying to attach 2 to 1, it would succeed.
This commit break the SLAVEOF loop into two loops. (ideas from oran)
First loop that uses random to decide who replicates from who.
Second loop that does the actual SLAVEOF command.
In the second loop, we make sure to execute it in the right order,
and after each SLAVEOF, wait for it to be connected before we proceed.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-08-05 16:26:09 +08:00
|
|
|
set slave_to_master($slave_id) $mid
|
|
|
|
lappend used $slave_id
|
|
|
|
}
|
2016-11-28 10:13:24 +01:00
|
|
|
|
Make sure execute SLAVEOF command in the right order in psync2 test. (#9316)
The psync2 test has failed several times recently.
In #9159 we only solved half of the problem.
i.e. reordering of the replica that's already connected to
the newly promoted master.
Consider this scenario:
0 slaveof 2
1 slaveof 2
3 slaveof 2
4 slaveof 1
0 slaveof no one, became a new master got a new replid
2 slaveof 0, partial resync and got the new replid
3 reconnect 2, inherit the new replid
3 slaveof 4, use the new replid and got a full resync
And another scenario:
1 slaveof 3
2 slaveof 4
3 slaveof 0
4 slaveof 0
4 slaveof no one, became a new master got a new replid
2 reconnect 4, inherit the new replid
2 slaveof 1, use the new replid and got a full resync
So maybe we should reattach replicas in the right order.
i.e. In the above example, if it would have reattached 1, 3 and 0 to
the new chain formed by 4 before trying to attach 2 to 1, it would succeed.
This commit break the SLAVEOF loop into two loops. (ideas from oran)
First loop that uses random to decide who replicates from who.
Second loop that does the actual SLAVEOF command.
In the second loop, we make sure to execute it in the right order,
and after each SLAVEOF, wait for it to be connected before we proceed.
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-08-05 16:26:09 +08:00
|
|
|
# 2) Attach all the slaves to a random instance
|
|
|
|
# Second loop that does the actual SLAVEOF command and make sure execute it in the right order.
|
|
|
|
while {[array size slave_to_master] > 0} {
|
|
|
|
foreach slave_id [array names slave_to_master] {
|
|
|
|
set mid $slave_to_master($slave_id)
|
|
|
|
|
|
|
|
# We only attach the replica to a random instance that already in the old/new chain.
|
|
|
|
if {$root_master($mid) == $root_master($master_id)} {
|
|
|
|
# Find a replica that can be attached to the new chain already attached to the new master.
|
|
|
|
# My new master is in the new chain.
|
|
|
|
} elseif {$root_master($mid) == $root_master($slave_id)} {
|
|
|
|
# My new master and I are in the old chain.
|
|
|
|
} else {
|
|
|
|
# In cycle 1, we do not care about the order.
|
|
|
|
if {$cycle != 1} {
|
|
|
|
# skipping this replica for now to avoid attaching in a bad order
|
|
|
|
# this is done to avoid an unexpected full sync, when we take a
|
|
|
|
# replica that already reconnected to the new chain and got a new replid
|
|
|
|
# and is then set to connect to a master that's still not aware of that new replid
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
set master_host $R_host($master_id)
|
|
|
|
set master_port $R_port($master_id)
|
|
|
|
|
|
|
|
test "PSYNC2: Set #$slave_id to replicate from #$mid" {
|
|
|
|
$R($slave_id) slaveof $master_host $master_port
|
|
|
|
}
|
|
|
|
|
|
|
|
# Wait for replica to be connected before we proceed.
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
[status $R($slave_id) master_link_status] == "up"
|
|
|
|
} else {
|
|
|
|
show_cluster_status
|
|
|
|
fail "Replica not reconnecting."
|
|
|
|
}
|
|
|
|
|
|
|
|
set root_master($slave_id) $root_master($mid)
|
|
|
|
unset slave_to_master($slave_id)
|
|
|
|
break
|
2016-11-28 10:13:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-28 09:18:01 +03:00
|
|
|
# Wait for replicas to sync. so next loop won't get -LOADING error
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
[status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
|
|
|
|
[status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
|
|
|
|
[status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
|
|
|
|
[status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
|
|
|
|
} else {
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
2020-04-28 09:18:01 +03:00
|
|
|
fail "Replica not reconnecting"
|
|
|
|
}
|
|
|
|
|
2016-11-28 10:13:24 +01:00
|
|
|
# 3) Increment the counter and wait for all the instances
|
|
|
|
# to converge.
|
|
|
|
test "PSYNC2: cluster is consistent after failover" {
|
|
|
|
$R($master_id) incr x; incr counter_value
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
[$R($j) get x] == $counter_value
|
|
|
|
} else {
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
2016-11-28 10:13:24 +01:00
|
|
|
fail "Instance #$j x variable is inconsistent"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# 4) Generate load while breaking the connection of random
|
|
|
|
# slave-master pairs.
|
|
|
|
test "PSYNC2: generate load while killing replication links" {
|
|
|
|
set t [clock milliseconds]
|
|
|
|
set next_break [expr {$t+$disconnect_period}]
|
|
|
|
while {[clock milliseconds]-$t < $genload_time} {
|
|
|
|
if {$genload} {
|
|
|
|
$R($master_id) incr x; incr counter_value
|
|
|
|
}
|
|
|
|
if {[clock milliseconds] == $next_break} {
|
|
|
|
set next_break \
|
|
|
|
[expr {[clock milliseconds]+$disconnect_period}]
|
|
|
|
set slave_id [randomInt 5]
|
|
|
|
if {$disconnect} {
|
|
|
|
$R($slave_id) client kill type master
|
2016-11-29 10:22:40 +01:00
|
|
|
if {$debug_msg} {
|
2018-09-11 11:03:28 +02:00
|
|
|
puts "+++ Breaking link for replica #$slave_id"
|
2016-11-29 10:22:40 +01:00
|
|
|
}
|
2016-11-28 10:13:24 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# 5) Increment the counter and wait for all the instances
|
|
|
|
set x [$R($master_id) get x]
|
|
|
|
test "PSYNC2: cluster is consistent after load (x = $x)" {
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
[$R($j) get x] == $counter_value
|
|
|
|
} else {
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
2016-11-28 10:13:24 +01:00
|
|
|
fail "Instance #$j x variable is inconsistent"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-16 18:03:28 +02:00
|
|
|
# wait for all the slaves to be in sync.
|
|
|
|
set masteroff [status $R($master_id) master_repl_offset]
|
2020-03-05 16:55:14 +02:00
|
|
|
wait_for_condition 500 100 {
|
2020-05-16 18:03:28 +02:00
|
|
|
[status $R(0) master_repl_offset] >= $masteroff &&
|
|
|
|
[status $R(1) master_repl_offset] >= $masteroff &&
|
|
|
|
[status $R(2) master_repl_offset] >= $masteroff &&
|
|
|
|
[status $R(3) master_repl_offset] >= $masteroff &&
|
|
|
|
[status $R(4) master_repl_offset] >= $masteroff
|
2020-03-05 16:55:14 +02:00
|
|
|
} else {
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
2020-05-16 18:03:28 +02:00
|
|
|
fail "Replicas offsets didn't catch up with the master after too long time."
|
2020-03-05 16:55:14 +02:00
|
|
|
}
|
|
|
|
|
2016-11-29 10:22:40 +01:00
|
|
|
if {$debug_msg} {
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
2016-11-29 10:22:40 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
test "PSYNC2: total sum of full synchronizations is exactly 4" {
|
|
|
|
set sum 0
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
incr sum [status $R($j) sync_full]
|
|
|
|
}
|
2020-05-25 18:37:05 +02:00
|
|
|
if {$sum != 4} {
|
|
|
|
show_cluster_status
|
|
|
|
assert {$sum == 4}
|
|
|
|
}
|
2016-11-28 10:13:24 +01:00
|
|
|
}
|
2018-07-31 17:28:30 +02:00
|
|
|
|
2020-05-16 18:03:28 +02:00
|
|
|
# In absence of pings, are the instances really able to have
|
|
|
|
# the exact same offset?
|
|
|
|
$R($master_id) config set repl-ping-replica-period 3600
|
2022-11-30 22:03:23 +02:00
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
if {$j == $master_id} continue
|
|
|
|
$R($j) config set repl-timeout 10000
|
|
|
|
}
|
2020-05-16 18:03:28 +02:00
|
|
|
wait_for_condition 500 100 {
|
|
|
|
[status $R($master_id) master_repl_offset] == [status $R(0) master_repl_offset] &&
|
|
|
|
[status $R($master_id) master_repl_offset] == [status $R(1) master_repl_offset] &&
|
|
|
|
[status $R($master_id) master_repl_offset] == [status $R(2) master_repl_offset] &&
|
|
|
|
[status $R($master_id) master_repl_offset] == [status $R(3) master_repl_offset] &&
|
|
|
|
[status $R($master_id) master_repl_offset] == [status $R(4) master_repl_offset]
|
|
|
|
} else {
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
2020-05-16 18:03:28 +02:00
|
|
|
fail "Replicas and master offsets were unable to match *exactly*."
|
|
|
|
}
|
|
|
|
|
2018-07-31 17:28:30 +02:00
|
|
|
# Limit anyway the maximum number of cycles. This is useful when the
|
|
|
|
# test is skipped via --only option of the test suite. In that case
|
|
|
|
# we don't want to see many seconds of this test being just skipped.
|
|
|
|
if {$cycle > 50} break
|
2016-11-28 10:13:24 +01:00
|
|
|
}
|
|
|
|
|
2016-11-29 11:15:12 +01:00
|
|
|
test "PSYNC2: Bring the master back again for next test" {
|
|
|
|
$R($master_id) slaveof no one
|
|
|
|
set master_host $R_host($master_id)
|
|
|
|
set master_port $R_port($master_id)
|
|
|
|
for {set j 0} {$j < 5} {incr j} {
|
|
|
|
if {$j == $master_id} continue
|
|
|
|
$R($j) slaveof $master_host $master_port
|
|
|
|
}
|
|
|
|
|
Keep track of meaningful replication offset in replicas too
Now both master and replicas keep track of the last replication offset
that contains meaningful data (ignoring the tailing pings), and both
trim that tail from the replication backlog, and the offset with which
they try to use for psync.
the implication is that if someone missed some pings, or even have
excessive pings that the promoted replica has, it'll still be able to
psync (avoid full sync).
the downside (which was already committed) is that replicas running old
code may fail to psync, since the promoted replica trims pings form it's
backlog.
This commit adds a test that reproduces several cases of promotions and
demotions with stale and non-stale pings
Background:
The mearningful offset on the master was added recently to solve a problem were
the master is left all alone, injecting PINGs into it's backlog when no one is
listening and then gets demoted and tries to replicate from a replica that didn't
have any of the PINGs (or at least not the last ones).
however, consider this case:
master A has two replicas (B and C) replicating directly from it.
there's no traffic at all, and also no network issues, just many pings in the
tail of the backlog. now B gets promoted, A becomes a replica of B, and C
remains a replica of A. when A gets demoted, it trims the pings from its
backlog, and successfully replicate from B. however, C is still aware of
these PINGs, when it'll disconnect and re-connect to A, it'll ask for something
that's not in the backlog anymore (since A trimmed the tail of it's backlog),
and be forced to do a full sync (something it didn't have to do before the
meaningful offset fix).
Besides that, the psync2 test was always failing randomly here and there, it
turns out the reason were PINGs. Investigating it shows the following scenario:
cycle 1: redis #1 is master, and all the rest are direct replicas of #1
cycle 2: redis #2 is promoted to master, #1 is a replica of #2 and #3 is replica of #1
now we see that when #1 is demoted it prints:
17339:S 21 Apr 2020 11:16:38.523 * Using the meaningful offset 3929963 instead of 3929977 to exclude the final PINGs (14 bytes difference)
17339:S 21 Apr 2020 11:16:39.391 * Trying a partial resynchronization (request e2b3f8817735fdfe5fa4626766daa938b61419e5:3929964).
17339:S 21 Apr 2020 11:16:39.392 * Successful partial resynchronization with master.
and when #3 connects to the demoted #2, #2 says:
17339:S 21 Apr 2020 11:16:40.084 * Partial resynchronization not accepted: Requested offset for secondary ID was 3929978, but I can reply up to 3929964
so the issue here is that the meaningful offset feature saved the day for the
demoted master (since it needs to sync from a replica that didn't get the last
ping), but it didn't help one of the other replicas which did get the last ping.
2020-04-23 15:04:42 +03:00
|
|
|
# Wait for replicas to sync. it is not enough to just wait for connected_slaves==4
|
|
|
|
# since we might do the check before the master realized that they're disconnected
|
2016-11-29 11:15:12 +01:00
|
|
|
wait_for_condition 50 1000 {
|
Keep track of meaningful replication offset in replicas too
Now both master and replicas keep track of the last replication offset
that contains meaningful data (ignoring the tailing pings), and both
trim that tail from the replication backlog, and the offset with which
they try to use for psync.
the implication is that if someone missed some pings, or even have
excessive pings that the promoted replica has, it'll still be able to
psync (avoid full sync).
the downside (which was already committed) is that replicas running old
code may fail to psync, since the promoted replica trims pings form it's
backlog.
This commit adds a test that reproduces several cases of promotions and
demotions with stale and non-stale pings
Background:
The mearningful offset on the master was added recently to solve a problem were
the master is left all alone, injecting PINGs into it's backlog when no one is
listening and then gets demoted and tries to replicate from a replica that didn't
have any of the PINGs (or at least not the last ones).
however, consider this case:
master A has two replicas (B and C) replicating directly from it.
there's no traffic at all, and also no network issues, just many pings in the
tail of the backlog. now B gets promoted, A becomes a replica of B, and C
remains a replica of A. when A gets demoted, it trims the pings from its
backlog, and successfully replicate from B. however, C is still aware of
these PINGs, when it'll disconnect and re-connect to A, it'll ask for something
that's not in the backlog anymore (since A trimmed the tail of it's backlog),
and be forced to do a full sync (something it didn't have to do before the
meaningful offset fix).
Besides that, the psync2 test was always failing randomly here and there, it
turns out the reason were PINGs. Investigating it shows the following scenario:
cycle 1: redis #1 is master, and all the rest are direct replicas of #1
cycle 2: redis #2 is promoted to master, #1 is a replica of #2 and #3 is replica of #1
now we see that when #1 is demoted it prints:
17339:S 21 Apr 2020 11:16:38.523 * Using the meaningful offset 3929963 instead of 3929977 to exclude the final PINGs (14 bytes difference)
17339:S 21 Apr 2020 11:16:39.391 * Trying a partial resynchronization (request e2b3f8817735fdfe5fa4626766daa938b61419e5:3929964).
17339:S 21 Apr 2020 11:16:39.392 * Successful partial resynchronization with master.
and when #3 connects to the demoted #2, #2 says:
17339:S 21 Apr 2020 11:16:40.084 * Partial resynchronization not accepted: Requested offset for secondary ID was 3929978, but I can reply up to 3929964
so the issue here is that the meaningful offset feature saved the day for the
demoted master (since it needs to sync from a replica that didn't get the last
ping), but it didn't help one of the other replicas which did get the last ping.
2020-04-23 15:04:42 +03:00
|
|
|
[status $R($master_id) connected_slaves] == 4 &&
|
|
|
|
[status $R([expr {($master_id+1)%5}]) master_link_status] == "up" &&
|
|
|
|
[status $R([expr {($master_id+2)%5}]) master_link_status] == "up" &&
|
|
|
|
[status $R([expr {($master_id+3)%5}]) master_link_status] == "up" &&
|
|
|
|
[status $R([expr {($master_id+4)%5}]) master_link_status] == "up"
|
2016-11-29 11:15:12 +01:00
|
|
|
} else {
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Replica not reconnecting"
|
2016-11-29 11:15:12 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "PSYNC2: Partial resync after restart using RDB aux fields" {
|
|
|
|
# Pick a random slave
|
|
|
|
set slave_id [expr {($master_id+1)%5}]
|
|
|
|
set sync_count [status $R($master_id) sync_full]
|
2019-05-05 08:19:52 +03:00
|
|
|
set sync_partial [status $R($master_id) sync_partial_ok]
|
Keep track of meaningful replication offset in replicas too
Now both master and replicas keep track of the last replication offset
that contains meaningful data (ignoring the tailing pings), and both
trim that tail from the replication backlog, and the offset with which
they try to use for psync.
the implication is that if someone missed some pings, or even have
excessive pings that the promoted replica has, it'll still be able to
psync (avoid full sync).
the downside (which was already committed) is that replicas running old
code may fail to psync, since the promoted replica trims pings form it's
backlog.
This commit adds a test that reproduces several cases of promotions and
demotions with stale and non-stale pings
Background:
The mearningful offset on the master was added recently to solve a problem were
the master is left all alone, injecting PINGs into it's backlog when no one is
listening and then gets demoted and tries to replicate from a replica that didn't
have any of the PINGs (or at least not the last ones).
however, consider this case:
master A has two replicas (B and C) replicating directly from it.
there's no traffic at all, and also no network issues, just many pings in the
tail of the backlog. now B gets promoted, A becomes a replica of B, and C
remains a replica of A. when A gets demoted, it trims the pings from its
backlog, and successfully replicate from B. however, C is still aware of
these PINGs, when it'll disconnect and re-connect to A, it'll ask for something
that's not in the backlog anymore (since A trimmed the tail of it's backlog),
and be forced to do a full sync (something it didn't have to do before the
meaningful offset fix).
Besides that, the psync2 test was always failing randomly here and there, it
turns out the reason were PINGs. Investigating it shows the following scenario:
cycle 1: redis #1 is master, and all the rest are direct replicas of #1
cycle 2: redis #2 is promoted to master, #1 is a replica of #2 and #3 is replica of #1
now we see that when #1 is demoted it prints:
17339:S 21 Apr 2020 11:16:38.523 * Using the meaningful offset 3929963 instead of 3929977 to exclude the final PINGs (14 bytes difference)
17339:S 21 Apr 2020 11:16:39.391 * Trying a partial resynchronization (request e2b3f8817735fdfe5fa4626766daa938b61419e5:3929964).
17339:S 21 Apr 2020 11:16:39.392 * Successful partial resynchronization with master.
and when #3 connects to the demoted #2, #2 says:
17339:S 21 Apr 2020 11:16:40.084 * Partial resynchronization not accepted: Requested offset for secondary ID was 3929978, but I can reply up to 3929964
so the issue here is that the meaningful offset feature saved the day for the
demoted master (since it needs to sync from a replica that didn't get the last
ping), but it didn't help one of the other replicas which did get the last ping.
2020-04-23 15:04:42 +03:00
|
|
|
set sync_partial_err [status $R($master_id) sync_partial_err]
|
2016-11-29 11:15:12 +01:00
|
|
|
catch {
|
2023-04-18 21:14:26 +08:00
|
|
|
# Make sure the server saves an RDB on shutdown
|
|
|
|
$R($slave_id) config set save "900 1"
|
2016-11-29 11:15:12 +01:00
|
|
|
$R($slave_id) config rewrite
|
2020-08-14 16:05:34 +03:00
|
|
|
restart_server [expr {0-$slave_id}] true false
|
tests/valgrind: don't use debug restart (#7404)
* tests/valgrind: don't use debug restart
DEBUG REATART causes two issues:
1. it uses execve which replaces the original process and valgrind doesn't
have a chance to check for errors, so leaks go unreported.
2. valgrind report invalid calls to close() which we're unable to resolve.
So now the tests use restart_server mechanism in the tests, that terminates
the old server and starts a new one, new PID, but same stdout, stderr.
since the stderr can contain two or more valgrind report, it is not enough
to just check for the absence of leaks, we also need to check for some known
errors, we do both, and fail if we either find an error, or can't find a
report saying there are no leaks.
other changes:
- when killing a server that was already terminated we check for leaks too.
- adding DEBUG LEAK which was used to test it.
- adding --trace-children to valgrind, although no longer needed.
- since the stdout contains two or more runs, we need slightly different way
of checking if the new process is up (explicitly looking for the new PID)
- move the code that handles --wait-server to happen earlier (before
watching the startup message in the log), and serve the restarted server too.
* squashme - CR fixes
2020-07-10 08:26:52 +03:00
|
|
|
set R($slave_id) [srv [expr {0-$slave_id}] client]
|
2016-11-29 11:15:12 +01:00
|
|
|
}
|
2019-05-05 08:19:52 +03:00
|
|
|
# note: just waiting for connected_slaves==4 has a race condition since
|
|
|
|
# we might do the check before the master realized that the slave disconnected
|
2016-11-29 11:15:12 +01:00
|
|
|
wait_for_condition 50 1000 {
|
2019-05-05 08:19:52 +03:00
|
|
|
[status $R($master_id) sync_partial_ok] == $sync_partial + 1
|
2016-11-29 11:15:12 +01:00
|
|
|
} else {
|
Keep track of meaningful replication offset in replicas too
Now both master and replicas keep track of the last replication offset
that contains meaningful data (ignoring the tailing pings), and both
trim that tail from the replication backlog, and the offset with which
they try to use for psync.
the implication is that if someone missed some pings, or even have
excessive pings that the promoted replica has, it'll still be able to
psync (avoid full sync).
the downside (which was already committed) is that replicas running old
code may fail to psync, since the promoted replica trims pings form it's
backlog.
This commit adds a test that reproduces several cases of promotions and
demotions with stale and non-stale pings
Background:
The mearningful offset on the master was added recently to solve a problem were
the master is left all alone, injecting PINGs into it's backlog when no one is
listening and then gets demoted and tries to replicate from a replica that didn't
have any of the PINGs (or at least not the last ones).
however, consider this case:
master A has two replicas (B and C) replicating directly from it.
there's no traffic at all, and also no network issues, just many pings in the
tail of the backlog. now B gets promoted, A becomes a replica of B, and C
remains a replica of A. when A gets demoted, it trims the pings from its
backlog, and successfully replicate from B. however, C is still aware of
these PINGs, when it'll disconnect and re-connect to A, it'll ask for something
that's not in the backlog anymore (since A trimmed the tail of it's backlog),
and be forced to do a full sync (something it didn't have to do before the
meaningful offset fix).
Besides that, the psync2 test was always failing randomly here and there, it
turns out the reason were PINGs. Investigating it shows the following scenario:
cycle 1: redis #1 is master, and all the rest are direct replicas of #1
cycle 2: redis #2 is promoted to master, #1 is a replica of #2 and #3 is replica of #1
now we see that when #1 is demoted it prints:
17339:S 21 Apr 2020 11:16:38.523 * Using the meaningful offset 3929963 instead of 3929977 to exclude the final PINGs (14 bytes difference)
17339:S 21 Apr 2020 11:16:39.391 * Trying a partial resynchronization (request e2b3f8817735fdfe5fa4626766daa938b61419e5:3929964).
17339:S 21 Apr 2020 11:16:39.392 * Successful partial resynchronization with master.
and when #3 connects to the demoted #2, #2 says:
17339:S 21 Apr 2020 11:16:40.084 * Partial resynchronization not accepted: Requested offset for secondary ID was 3929978, but I can reply up to 3929964
so the issue here is that the meaningful offset feature saved the day for the
demoted master (since it needs to sync from a replica that didn't get the last
ping), but it didn't help one of the other replicas which did get the last ping.
2020-04-23 15:04:42 +03:00
|
|
|
puts "prev sync_full: $sync_count"
|
|
|
|
puts "prev sync_partial_ok: $sync_partial"
|
|
|
|
puts "prev sync_partial_err: $sync_partial_err"
|
|
|
|
puts [$R($master_id) info stats]
|
2020-05-25 18:37:05 +02:00
|
|
|
show_cluster_status
|
Keep track of meaningful replication offset in replicas too
Now both master and replicas keep track of the last replication offset
that contains meaningful data (ignoring the tailing pings), and both
trim that tail from the replication backlog, and the offset with which
they try to use for psync.
the implication is that if someone missed some pings, or even have
excessive pings that the promoted replica has, it'll still be able to
psync (avoid full sync).
the downside (which was already committed) is that replicas running old
code may fail to psync, since the promoted replica trims pings form it's
backlog.
This commit adds a test that reproduces several cases of promotions and
demotions with stale and non-stale pings
Background:
The mearningful offset on the master was added recently to solve a problem were
the master is left all alone, injecting PINGs into it's backlog when no one is
listening and then gets demoted and tries to replicate from a replica that didn't
have any of the PINGs (or at least not the last ones).
however, consider this case:
master A has two replicas (B and C) replicating directly from it.
there's no traffic at all, and also no network issues, just many pings in the
tail of the backlog. now B gets promoted, A becomes a replica of B, and C
remains a replica of A. when A gets demoted, it trims the pings from its
backlog, and successfully replicate from B. however, C is still aware of
these PINGs, when it'll disconnect and re-connect to A, it'll ask for something
that's not in the backlog anymore (since A trimmed the tail of it's backlog),
and be forced to do a full sync (something it didn't have to do before the
meaningful offset fix).
Besides that, the psync2 test was always failing randomly here and there, it
turns out the reason were PINGs. Investigating it shows the following scenario:
cycle 1: redis #1 is master, and all the rest are direct replicas of #1
cycle 2: redis #2 is promoted to master, #1 is a replica of #2 and #3 is replica of #1
now we see that when #1 is demoted it prints:
17339:S 21 Apr 2020 11:16:38.523 * Using the meaningful offset 3929963 instead of 3929977 to exclude the final PINGs (14 bytes difference)
17339:S 21 Apr 2020 11:16:39.391 * Trying a partial resynchronization (request e2b3f8817735fdfe5fa4626766daa938b61419e5:3929964).
17339:S 21 Apr 2020 11:16:39.392 * Successful partial resynchronization with master.
and when #3 connects to the demoted #2, #2 says:
17339:S 21 Apr 2020 11:16:40.084 * Partial resynchronization not accepted: Requested offset for secondary ID was 3929978, but I can reply up to 3929964
so the issue here is that the meaningful offset feature saved the day for the
demoted master (since it needs to sync from a replica that didn't get the last
ping), but it didn't help one of the other replicas which did get the last ping.
2020-04-23 15:04:42 +03:00
|
|
|
fail "Replica didn't partial sync"
|
2016-11-29 11:15:12 +01:00
|
|
|
}
|
|
|
|
set new_sync_count [status $R($master_id) sync_full]
|
|
|
|
assert {$sync_count == $new_sync_count}
|
|
|
|
}
|
|
|
|
|
2016-11-29 10:22:40 +01:00
|
|
|
if {$no_exit} {
|
|
|
|
while 1 { puts -nonewline .; flush stdout; after 1000}
|
|
|
|
}
|
2016-11-28 10:13:24 +01:00
|
|
|
|
|
|
|
}}}}}
|