Clean up and stabilize cluster migration tests. (#8745)

This is work in progress, focusing on two main areas:
* Avoiding race conditions with cluster configuration propagation.
* Ignoring limitations with redis-cli --cluster fix which makes it hard
  to distinguish real errors (e.g. failure to fix) from expected
  conditions in this test (e.g. nodes not agreeing on configuration).
This commit is contained in:
Yossi Gottlieb 2021-04-06 11:57:57 +03:00 committed by GitHub
parent 3b74b55084
commit 4724dd439e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 66 additions and 25 deletions

View File

@ -4,6 +4,10 @@
# This software is released under the BSD License. See the COPYING file for # This software is released under the BSD License. See the COPYING file for
# more information. # more information.
# Track cluster configuration as created by create_cluster below
set ::cluster_master_nodes 0
set ::cluster_replica_nodes 0
# Returns a parsed CLUSTER NODES output as a list of dictionaries. # Returns a parsed CLUSTER NODES output as a list of dictionaries.
proc get_cluster_nodes id { proc get_cluster_nodes id {
set lines [split [R $id cluster nodes] "\r\n"] set lines [split [R $id cluster nodes] "\r\n"]
@ -120,6 +124,9 @@ proc create_cluster {masters slaves} {
cluster_allocate_slaves $masters $slaves cluster_allocate_slaves $masters $slaves
} }
assert_cluster_state ok assert_cluster_state ok
set ::cluster_master_nodes $masters
set ::cluster_replica_nodes $slaves
} }
# Set the cluster node-timeout to all the reachalbe nodes. # Set the cluster node-timeout to all the reachalbe nodes.
@ -143,3 +150,28 @@ proc cluster_write_test {id} {
} }
$cluster close $cluster close
} }
# Check if cluster configuration is consistent.
proc cluster_config_consistent {} {
for {set j 0} {$j < $::cluster_master_nodes + $::cluster_replica_nodes} {incr j} {
if {$j == 0} {
set base_cfg [R $j cluster slots]
} else {
set cfg [R $j cluster slots]
if {$cfg != $base_cfg} {
return 0
}
}
}
return 1
}
# Wait for cluster configuration to propagate and be consistent across nodes.
proc wait_for_cluster_propagation {} {
wait_for_condition 50 100 {
[cluster_config_consistent] eq 1
} else {
fail "cluster config did not reach a consistent state"
}
}

View File

@ -32,55 +32,58 @@ reset_cluster
$cluster set aga xyz $cluster set aga xyz
test "Half init migration in 'migrating' is fixable" { test "Half init migration in 'migrating' is fixable" {
$nodefrom(link) cluster setslot 609 migrating $nodeto(id) assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
fix_cluster $nodefrom(addr) fix_cluster $nodefrom(addr)
assert {[$cluster get aga] eq "xyz"} assert_equal "xyz" [$cluster get aga]
} }
test "Half init migration in 'importing' is fixable" { test "Half init migration in 'importing' is fixable" {
$nodeto(link) cluster setslot 609 importing $nodefrom(id) assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
fix_cluster $nodefrom(addr) fix_cluster $nodefrom(addr)
assert {[$cluster get aga] eq "xyz"} assert_equal "xyz" [$cluster get aga]
} }
test "Init migration and move key" { test "Init migration and move key" {
$nodefrom(link) cluster setslot 609 migrating $nodeto(id) assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
$nodeto(link) cluster setslot 609 importing $nodefrom(id) assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000 assert_equal {OK} [$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000]
assert {[$cluster get aga] eq "xyz"} wait_for_cluster_propagation
assert_equal "xyz" [$cluster get aga]
fix_cluster $nodefrom(addr) fix_cluster $nodefrom(addr)
assert {[$cluster get aga] eq "xyz"} assert_equal "xyz" [$cluster get aga]
} }
reset_cluster reset_cluster
test "Move key again" { test "Move key again" {
$nodefrom(link) cluster setslot 609 migrating $nodeto(id) wait_for_cluster_propagation
$nodeto(link) cluster setslot 609 importing $nodefrom(id) assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000 assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
assert {[$cluster get aga] eq "xyz"} assert_equal {OK} [$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000]
wait_for_cluster_propagation
assert_equal "xyz" [$cluster get aga]
} }
test "Half-finish migration" { test "Half-finish migration" {
# half finish migration on 'migrating' node # half finish migration on 'migrating' node
$nodefrom(link) cluster setslot 609 node $nodeto(id) assert_equal {OK} [$nodefrom(link) cluster setslot 609 node $nodeto(id)]
fix_cluster $nodefrom(addr) fix_cluster $nodefrom(addr)
assert {[$cluster get aga] eq "xyz"} assert_equal "xyz" [$cluster get aga]
} }
reset_cluster reset_cluster
test "Move key back" { test "Move key back" {
# 'aga' key is in 609 slot # 'aga' key is in 609 slot
$nodefrom(link) cluster setslot 609 migrating $nodeto(id) assert_equal {OK} [$nodefrom(link) cluster setslot 609 migrating $nodeto(id)]
$nodeto(link) cluster setslot 609 importing $nodefrom(id) assert_equal {OK} [$nodeto(link) cluster setslot 609 importing $nodefrom(id)]
$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000 assert_equal {OK} [$nodefrom(link) migrate $nodeto(host) $nodeto(port) aga 0 10000]
assert {[$cluster get aga] eq "xyz"} assert_equal "xyz" [$cluster get aga]
} }
test "Half-finish importing" { test "Half-finish importing" {
# Now we half finish 'importing' node # Now we half finish 'importing' node
$nodeto(link) cluster setslot 609 node $nodeto(id) assert_equal {OK} [$nodeto(link) cluster setslot 609 node $nodeto(id)]
fix_cluster $nodefrom(addr) fix_cluster $nodefrom(addr)
assert {[$cluster get aga] eq "xyz"} assert_equal "xyz" [$cluster get aga]
} }

View File

@ -3,8 +3,12 @@
source "../tests/includes/init-tests.tcl" source "../tests/includes/init-tests.tcl"
source "../tests/includes/utils.tcl" source "../tests/includes/utils.tcl"
# TODO: This test currently runs without replicas, as failovers (which may
# happen on lower-end CI platforms) are still not handled properly by the
# cluster during slot migration (related to #6339).
test "Create a 10 nodes cluster" { test "Create a 10 nodes cluster" {
create_cluster 10 10 create_cluster 10 0
} }
test "Cluster is up" { test "Cluster is up" {
@ -40,6 +44,7 @@ test "Init migration of many slots" {
} }
test "Fix cluster" { test "Fix cluster" {
wait_for_cluster_propagation
fix_cluster $nodefrom(addr) fix_cluster $nodefrom(addr)
} }

View File

@ -5,14 +5,15 @@ proc fix_cluster {addr} {
exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster fix $addr << yes exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster fix $addr << yes
} result] } result]
if {$code != 0} { if {$code != 0} {
puts $result puts "redis-cli --cluster fix returns non-zero exit code, output below:\n$result"
} }
assert {$code == 0} # Note: redis-cli --cluster fix may return a non-zero exit code if nodes don't agree,
# but we can ignore that and rely on the check below.
assert_cluster_state ok assert_cluster_state ok
wait_for_condition 100 100 { wait_for_condition 100 100 {
[catch {exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster check $addr} result] == 0 [catch {exec ../../../src/redis-cli {*}[rediscli_tls_config "../../../tests"] --cluster check $addr} result] == 0
} else { } else {
puts $result puts "redis-cli --cluster check returns non-zero exit code, output below:\n$result"
fail "Cluster could not settle with configuration" fail "Cluster could not settle with configuration"
} }
} }