redict/tests/unit/moduleapi/cluster.tcl
Meir Shpilraien (Spielrein) f3855a0930
Add new RM_Call flags for script mode, no writes, and error replies. (#10372)
The PR extends RM_Call with 3 new capabilities using new flags that
are given to RM_Call as part of the `fmt` argument.
It aims to assist modules that are getting a list of commands to be
executed from the user (not hard coded as part of the module logic),
think of a module that implements a new scripting language...

* `S` - Run the command in a script mode, this means that it will raise an
  error if a command which are not allowed inside a script (flaged with the
  `deny-script` flag) is invoked (like SHUTDOWN). In addition, on script mode,
  write commands are not allowed if there is not enough good replicas (as
  configured with `min-replicas-to-write`) and/or a disk error happened.

* `W` - no writes mode, Redis will reject any command that is marked with `write`
  flag. Again can be useful to modules that implement a new scripting language
  and wants to prevent any write commands.

* `E` - Return errors as RedisModuleCallReply. Today the errors that happened
  before the command was invoked (like unknown commands or acl error) return
  a NULL reply and set errno. This might be missing important information about
  the failure and it is also impossible to just pass the error to the user using
  RM_ReplyWithCallReply. This new flag allows you to get a RedisModuleCallReply
  object with the relevant error message and treat it as if it was an error that was
  raised by the command invocation.

Tests were added to verify the new code paths.

In addition small refactoring was done to share some code between modules,
scripts, and `processCommand` function:
1. `getAclErrorMessage` was added to `acl.c` to unified to log message extraction
  from the acl result
2. `checkGoodReplicasStatus` was added to `replication.c` to check the status of
  good replicas. It is used on `scriptVerifyWriteCommandAllow`, `RM_Call`, and
  `processCommand`.
3. `writeCommandsGetDiskErrorMessage` was added to `server.c` to get the error
  message on persistence failure. Again it is used on `scriptVerifyWriteCommandAllow`,
  `RM_Call`, and `processCommand`.
2022-03-22 14:13:28 +02:00

218 lines
6.6 KiB
Tcl

# Primitive tests on cluster-enabled redis with modules using redis-cli
source tests/support/cli.tcl
proc cluster_info {r field} {
if {[regexp "^$field:(.*?)\r\n" [$r cluster info] _ value]} {
set _ $value
}
}
# Provide easy access to CLUSTER INFO properties. Same semantic as "proc s".
proc csi {args} {
set level 0
if {[string is integer [lindex $args 0]]} {
set level [lindex $args 0]
set args [lrange $args 1 end]
}
cluster_info [srv $level "client"] [lindex $args 0]
}
set testmodule [file normalize tests/modules/blockonkeys.so]
set testmodule_nokey [file normalize tests/modules/blockonbackground.so]
set testmodule_blockedclient [file normalize tests/modules/blockedclient.so]
# make sure the test infra won't use SELECT
set old_singledb $::singledb
set ::singledb 1
# cluster creation is complicated with TLS, and the current tests don't really need that coverage
tags {tls:skip external:skip cluster modules} {
# start three servers
set base_conf [list cluster-enabled yes cluster-node-timeout 1 loadmodule $testmodule]
start_server [list overrides $base_conf] {
start_server [list overrides $base_conf] {
start_server [list overrides $base_conf] {
set node1 [srv 0 client]
set node2 [srv -1 client]
set node3 [srv -2 client]
set node3_pid [srv -2 pid]
# the "overrides" mechanism can only support one "loadmodule" directive
$node1 module load $testmodule_nokey
$node2 module load $testmodule_nokey
$node3 module load $testmodule_nokey
$node1 module load $testmodule_blockedclient
$node2 module load $testmodule_blockedclient
$node3 module load $testmodule_blockedclient
test {Create 3 node cluster} {
exec src/redis-cli --cluster-yes --cluster create \
127.0.0.1:[srv 0 port] \
127.0.0.1:[srv -1 port] \
127.0.0.1:[srv -2 port]
wait_for_condition 1000 50 {
[csi 0 cluster_state] eq {ok} &&
[csi -1 cluster_state] eq {ok} &&
[csi -2 cluster_state] eq {ok}
} else {
fail "Cluster doesn't stabilize"
}
}
test "Run blocking command (blocked on key) on cluster node3" {
# key9184688 is mapped to slot 10923 (first slot of node 3)
set node3_rd [redis_deferring_client -2]
$node3_rd fsl.bpop key9184688 0
$node3_rd flush
wait_for_condition 50 100 {
[s -2 blocked_clients] eq {1}
} else {
fail "Client executing blocking command (blocked on key) not blocked"
}
}
test "Run blocking command (no keys) on cluster node2" {
set node2_rd [redis_deferring_client -1]
$node2_rd block.block 0
$node2_rd flush
wait_for_condition 50 100 {
[s -1 blocked_clients] eq {1}
} else {
fail "Client executing blocking command (no keys) not blocked"
}
}
test "Perform a Resharding" {
exec src/redis-cli --cluster-yes --cluster reshard 127.0.0.1:[srv -2 port] \
--cluster-to [$node1 cluster myid] \
--cluster-from [$node3 cluster myid] \
--cluster-slots 1
}
test "Verify command (no keys) is unaffected after resharding" {
# verify there are blocked clients on node2
assert_equal [s -1 blocked_clients] {1}
#release client
$node2 block.release 0
}
test "Verify command (blocked on key) got unblocked after resharding" {
# this (read) will wait for the node3 to realize the new topology
assert_error {*MOVED*} {$node3_rd read}
# verify there are no blocked clients
assert_equal [s 0 blocked_clients] {0}
assert_equal [s -1 blocked_clients] {0}
assert_equal [s -2 blocked_clients] {0}
}
test "Wait for cluster to be stable" {
wait_for_condition 1000 50 {
[catch {exec src/redis-cli --cluster \
check 127.0.0.1:[srv 0 port] \
}] == 0
} else {
fail "Cluster doesn't stabilize"
}
}
test "Sanity test push cmd after resharding" {
assert_error {*MOVED*} {$node3 fsl.push key9184688 1}
set node1_rd [redis_deferring_client 0]
$node1_rd fsl.bpop key9184688 0
$node1_rd flush
wait_for_condition 50 100 {
[s 0 blocked_clients] eq {1}
} else {
puts "Client not blocked"
puts "read from blocked client: [$node1_rd read]"
fail "Client not blocked"
}
$node1 fsl.push key9184688 2
assert_equal {2} [$node1_rd read]
}
$node1_rd close
$node2_rd close
$node3_rd close
test "Run blocking command (blocked on key) again on cluster node1" {
$node1 del key9184688
# key9184688 is mapped to slot 10923 which has been moved to node1
set node1_rd [redis_deferring_client 0]
$node1_rd fsl.bpop key9184688 0
$node1_rd flush
wait_for_condition 50 100 {
[s 0 blocked_clients] eq {1}
} else {
fail "Client executing blocking command (blocked on key) again not blocked"
}
}
test "Run blocking command (no keys) again on cluster node2" {
set node2_rd [redis_deferring_client -1]
$node2_rd block.block 0
$node2_rd flush
wait_for_condition 50 100 {
[s -1 blocked_clients] eq {1}
} else {
fail "Client executing blocking command (no keys) again not blocked"
}
}
test "Kill a cluster node and wait for fail state" {
# kill node3 in cluster
exec kill -SIGSTOP $node3_pid
wait_for_condition 1000 50 {
[csi 0 cluster_state] eq {fail} &&
[csi -1 cluster_state] eq {fail}
} else {
fail "Cluster doesn't fail"
}
}
test "Verify command (blocked on key) got unblocked after cluster failure" {
assert_error {*CLUSTERDOWN*} {$node1_rd read}
}
test "Verify command (no keys) got unblocked after cluster failure" {
assert_error {*CLUSTERDOWN*} {$node2_rd read}
# verify there are no blocked clients
assert_equal [s 0 blocked_clients] {0}
assert_equal [s -1 blocked_clients] {0}
}
test "Verify command RM_Call is rejected when cluster is down" {
assert_error "ERR Can not execute a command 'set' while the cluster is down" {$node1 do_rm_call set x 1}
}
exec kill -SIGCONT $node3_pid
$node1_rd close
$node2_rd close
# stop three servers
}
}
}
} ;# tags
set ::singledb $old_singledb