2022-07-12 13:41:29 -04:00
|
|
|
# Primitive tests on cluster-enabled redis with modules
|
2021-10-19 04:50:37 -04:00
|
|
|
|
|
|
|
source tests/support/cli.tcl
|
|
|
|
|
2021-10-20 08:40:28 -04:00
|
|
|
# cluster creation is complicated with TLS, and the current tests don't really need that coverage
|
|
|
|
tags {tls:skip external:skip cluster modules} {
|
|
|
|
|
2022-07-12 13:41:29 -04:00
|
|
|
set testmodule_nokey [file normalize tests/modules/blockonbackground.so]
|
|
|
|
set testmodule_blockedclient [file normalize tests/modules/blockedclient.so]
|
|
|
|
set testmodule [file normalize tests/modules/blockonkeys.so]
|
|
|
|
|
|
|
|
set modules [list loadmodule $testmodule loadmodule $testmodule_nokey loadmodule $testmodule_blockedclient]
|
|
|
|
start_cluster 3 0 [list config_lines $modules] {
|
2021-10-19 04:50:37 -04:00
|
|
|
|
|
|
|
set node1 [srv 0 client]
|
|
|
|
set node2 [srv -1 client]
|
|
|
|
set node3 [srv -2 client]
|
|
|
|
set node3_pid [srv -2 pid]
|
|
|
|
|
|
|
|
test "Run blocking command (blocked on key) on cluster node3" {
|
|
|
|
# key9184688 is mapped to slot 10923 (first slot of node 3)
|
|
|
|
set node3_rd [redis_deferring_client -2]
|
|
|
|
$node3_rd fsl.bpop key9184688 0
|
|
|
|
$node3_rd flush
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s -2 blocked_clients] eq {1}
|
|
|
|
} else {
|
|
|
|
fail "Client executing blocking command (blocked on key) not blocked"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Run blocking command (no keys) on cluster node2" {
|
|
|
|
set node2_rd [redis_deferring_client -1]
|
|
|
|
$node2_rd block.block 0
|
|
|
|
$node2_rd flush
|
|
|
|
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s -1 blocked_clients] eq {1}
|
|
|
|
} else {
|
|
|
|
fail "Client executing blocking command (no keys) not blocked"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
test "Perform a Resharding" {
|
|
|
|
exec src/redis-cli --cluster-yes --cluster reshard 127.0.0.1:[srv -2 port] \
|
|
|
|
--cluster-to [$node1 cluster myid] \
|
|
|
|
--cluster-from [$node3 cluster myid] \
|
|
|
|
--cluster-slots 1
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Verify command (no keys) is unaffected after resharding" {
|
|
|
|
# verify there are blocked clients on node2
|
|
|
|
assert_equal [s -1 blocked_clients] {1}
|
|
|
|
|
|
|
|
#release client
|
|
|
|
$node2 block.release 0
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Verify command (blocked on key) got unblocked after resharding" {
|
|
|
|
# this (read) will wait for the node3 to realize the new topology
|
|
|
|
assert_error {*MOVED*} {$node3_rd read}
|
|
|
|
|
|
|
|
# verify there are no blocked clients
|
|
|
|
assert_equal [s 0 blocked_clients] {0}
|
|
|
|
assert_equal [s -1 blocked_clients] {0}
|
|
|
|
assert_equal [s -2 blocked_clients] {0}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Wait for cluster to be stable" {
|
|
|
|
wait_for_condition 1000 50 {
|
2022-07-18 23:35:13 -04:00
|
|
|
[catch {exec src/redis-cli --cluster check 127.0.0.1:[srv 0 port]}] == 0 &&
|
|
|
|
[catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -1 port]}] == 0 &&
|
|
|
|
[catch {exec src/redis-cli --cluster check 127.0.0.1:[srv -2 port]}] == 0 &&
|
|
|
|
[CI 0 cluster_state] eq {ok} &&
|
|
|
|
[CI 1 cluster_state] eq {ok} &&
|
|
|
|
[CI 2 cluster_state] eq {ok}
|
2021-10-19 04:50:37 -04:00
|
|
|
} else {
|
|
|
|
fail "Cluster doesn't stabilize"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Sanity test push cmd after resharding" {
|
|
|
|
assert_error {*MOVED*} {$node3 fsl.push key9184688 1}
|
|
|
|
|
|
|
|
set node1_rd [redis_deferring_client 0]
|
|
|
|
$node1_rd fsl.bpop key9184688 0
|
|
|
|
$node1_rd flush
|
|
|
|
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s 0 blocked_clients] eq {1}
|
|
|
|
} else {
|
|
|
|
puts "Client not blocked"
|
|
|
|
puts "read from blocked client: [$node1_rd read]"
|
|
|
|
fail "Client not blocked"
|
|
|
|
}
|
|
|
|
|
|
|
|
$node1 fsl.push key9184688 2
|
|
|
|
assert_equal {2} [$node1_rd read]
|
|
|
|
}
|
|
|
|
|
|
|
|
$node1_rd close
|
|
|
|
$node2_rd close
|
|
|
|
$node3_rd close
|
|
|
|
|
|
|
|
test "Run blocking command (blocked on key) again on cluster node1" {
|
|
|
|
$node1 del key9184688
|
|
|
|
# key9184688 is mapped to slot 10923 which has been moved to node1
|
|
|
|
set node1_rd [redis_deferring_client 0]
|
|
|
|
$node1_rd fsl.bpop key9184688 0
|
|
|
|
$node1_rd flush
|
|
|
|
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s 0 blocked_clients] eq {1}
|
|
|
|
} else {
|
|
|
|
fail "Client executing blocking command (blocked on key) again not blocked"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Run blocking command (no keys) again on cluster node2" {
|
|
|
|
set node2_rd [redis_deferring_client -1]
|
|
|
|
|
|
|
|
$node2_rd block.block 0
|
|
|
|
$node2_rd flush
|
|
|
|
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s -1 blocked_clients] eq {1}
|
|
|
|
} else {
|
|
|
|
fail "Client executing blocking command (no keys) again not blocked"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Kill a cluster node and wait for fail state" {
|
|
|
|
# kill node3 in cluster
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
pause_process $node3_pid
|
2021-10-19 04:50:37 -04:00
|
|
|
|
|
|
|
wait_for_condition 1000 50 {
|
2022-07-12 13:41:29 -04:00
|
|
|
[CI 0 cluster_state] eq {fail} &&
|
|
|
|
[CI 1 cluster_state] eq {fail}
|
2021-10-19 04:50:37 -04:00
|
|
|
} else {
|
|
|
|
fail "Cluster doesn't fail"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Verify command (blocked on key) got unblocked after cluster failure" {
|
|
|
|
assert_error {*CLUSTERDOWN*} {$node1_rd read}
|
|
|
|
}
|
|
|
|
|
|
|
|
test "Verify command (no keys) got unblocked after cluster failure" {
|
|
|
|
assert_error {*CLUSTERDOWN*} {$node2_rd read}
|
|
|
|
|
|
|
|
# verify there are no blocked clients
|
|
|
|
assert_equal [s 0 blocked_clients] {0}
|
|
|
|
assert_equal [s -1 blocked_clients] {0}
|
|
|
|
}
|
|
|
|
|
Add new RM_Call flags for script mode, no writes, and error replies. (#10372)
The PR extends RM_Call with 3 new capabilities using new flags that
are given to RM_Call as part of the `fmt` argument.
It aims to assist modules that are getting a list of commands to be
executed from the user (not hard coded as part of the module logic),
think of a module that implements a new scripting language...
* `S` - Run the command in a script mode, this means that it will raise an
error if a command which are not allowed inside a script (flaged with the
`deny-script` flag) is invoked (like SHUTDOWN). In addition, on script mode,
write commands are not allowed if there is not enough good replicas (as
configured with `min-replicas-to-write`) and/or a disk error happened.
* `W` - no writes mode, Redis will reject any command that is marked with `write`
flag. Again can be useful to modules that implement a new scripting language
and wants to prevent any write commands.
* `E` - Return errors as RedisModuleCallReply. Today the errors that happened
before the command was invoked (like unknown commands or acl error) return
a NULL reply and set errno. This might be missing important information about
the failure and it is also impossible to just pass the error to the user using
RM_ReplyWithCallReply. This new flag allows you to get a RedisModuleCallReply
object with the relevant error message and treat it as if it was an error that was
raised by the command invocation.
Tests were added to verify the new code paths.
In addition small refactoring was done to share some code between modules,
scripts, and `processCommand` function:
1. `getAclErrorMessage` was added to `acl.c` to unified to log message extraction
from the acl result
2. `checkGoodReplicasStatus` was added to `replication.c` to check the status of
good replicas. It is used on `scriptVerifyWriteCommandAllow`, `RM_Call`, and
`processCommand`.
3. `writeCommandsGetDiskErrorMessage` was added to `server.c` to get the error
message on persistence failure. Again it is used on `scriptVerifyWriteCommandAllow`,
`RM_Call`, and `processCommand`.
2022-03-22 08:13:28 -04:00
|
|
|
test "Verify command RM_Call is rejected when cluster is down" {
|
|
|
|
assert_error "ERR Can not execute a command 'set' while the cluster is down" {$node1 do_rm_call set x 1}
|
|
|
|
}
|
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
resume_process $node3_pid
|
2021-10-19 04:50:37 -04:00
|
|
|
$node1_rd close
|
|
|
|
$node2_rd close
|
|
|
|
}
|
2022-10-16 01:30:01 -04:00
|
|
|
|
2023-12-11 13:15:19 -05:00
|
|
|
set testmodule_keyspace_events [file normalize tests/modules/keyspace_events.so]
|
|
|
|
set testmodule_postnotifications "[file normalize tests/modules/postnotifications.so] with_key_events"
|
|
|
|
set modules [list loadmodule $testmodule_keyspace_events loadmodule $testmodule_postnotifications]
|
2022-10-16 01:30:01 -04:00
|
|
|
start_cluster 2 2 [list config_lines $modules] {
|
|
|
|
|
|
|
|
set master1 [srv 0 client]
|
|
|
|
set master2 [srv -1 client]
|
|
|
|
set replica1 [srv -2 client]
|
|
|
|
set replica2 [srv -3 client]
|
2023-12-11 13:15:19 -05:00
|
|
|
|
2022-10-16 01:30:01 -04:00
|
|
|
test "Verify keys deletion and notification effects happened on cluster slots change are replicated inside multi exec" {
|
|
|
|
$master2 set count_dels_{4oi} 1
|
|
|
|
$master2 del count_dels_{4oi}
|
|
|
|
assert_equal 1 [$master2 keyspace.get_dels]
|
|
|
|
assert_equal 1 [$replica2 keyspace.get_dels]
|
|
|
|
$master2 set count_dels_{4oi} 1
|
2023-12-11 13:15:19 -05:00
|
|
|
|
2022-10-16 01:30:01 -04:00
|
|
|
set repl [attach_to_replication_stream_on_connection -3]
|
|
|
|
|
|
|
|
$master1 cluster bumpepoch
|
|
|
|
$master1 cluster setslot 16382 node [$master1 cluster myid]
|
|
|
|
|
|
|
|
wait_for_cluster_propagation
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[$master2 keyspace.get_dels] eq 2
|
|
|
|
} else {
|
|
|
|
fail "master did not delete the key"
|
|
|
|
}
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[$replica2 keyspace.get_dels] eq 2
|
|
|
|
} else {
|
|
|
|
fail "replica did not increase del counter"
|
|
|
|
}
|
|
|
|
|
2023-12-11 13:15:19 -05:00
|
|
|
# the {lpush before_deleted count_dels_{4oi}} is a post notification job registered when 'count_dels_{4oi}' was removed
|
2022-10-16 01:30:01 -04:00
|
|
|
assert_replication_stream $repl {
|
|
|
|
{multi}
|
|
|
|
{del count_dels_{4oi}}
|
|
|
|
{keyspace.incr_dels}
|
2023-12-11 13:15:19 -05:00
|
|
|
{lpush before_deleted count_dels_{4oi}}
|
2022-10-16 01:30:01 -04:00
|
|
|
{exec}
|
|
|
|
}
|
|
|
|
close_replication_stream $repl
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-19 04:50:37 -04:00
|
|
|
}
|
2022-10-12 06:09:51 -04:00
|
|
|
|
|
|
|
set testmodule [file normalize tests/modules/basics.so]
|
|
|
|
set modules [list loadmodule $testmodule]
|
|
|
|
start_cluster 3 0 [list config_lines $modules] {
|
|
|
|
set node1 [srv 0 client]
|
|
|
|
set node2 [srv -1 client]
|
|
|
|
set node3 [srv -2 client]
|
|
|
|
|
|
|
|
test "Verify RM_Call inside module load function on cluster mode" {
|
|
|
|
assert_equal {PONG} [$node1 PING]
|
|
|
|
assert_equal {PONG} [$node2 PING]
|
|
|
|
assert_equal {PONG} [$node3 PING]
|
|
|
|
}
|
2022-12-20 02:51:50 -05:00
|
|
|
}
|