redict/tests/unit/moduleapi/cluster.tcl

# Primitive tests on cluster-enabled redis with modules

source tests/support/cli.tcl

# cluster creation is complicated with TLS, and the current tests don't really need that coverage
tags {tls:skip external:skip cluster modules} {

set testmodule_nokey [file normalize tests/modules/blockonbackground.so]
set testmodule_blockedclient [file normalize tests/modules/blockedclient.so]
set testmodule [file normalize tests/modules/blockonkeys.so]

set modules [list loadmodule $testmodule loadmodule $testmodule_nokey loadmodule $testmodule_blockedclient]
start_cluster 3 0 [list config_lines $modules] {

    set node1 [srv 0 client]
    set node2 [srv -1 client]
    set node3 [srv -2 client]
    set node3_pid [srv -2 pid]

    test "Run blocking command (blocked on key) on cluster node3" {
        # key9184688 is mapped to slot 10923 (first slot of node 3)
        set node3_rd [redis_deferring_client -2]
        $node3_rd fsl.bpop key9184688 0
        $node3_rd flush
        wait_for_condition 50 100 {
            [s -2 blocked_clients] eq {1}
        } else {
            fail "Client executing blocking command (blocked on key) not blocked"
        }
    }

    test "Run blocking command (no keys) on cluster node2" {
        set node2_rd [redis_deferring_client -1]
        $node2_rd block.block 0
        $node2_rd flush

        wait_for_condition 50 100 {
            [s -1 blocked_clients] eq {1}
        } else {
            fail "Client executing blocking command (no keys) not blocked"
        }
    }


    test "Perform a Resharding" {
        exec src/redis-cli --cluster-yes --cluster reshard 127.0.0.1:[srv -2 port] \
                           --cluster-to [$node1 cluster myid] \
                           --cluster-from [$node3 cluster myid] \
                           --cluster-slots 1
    }

    test "Verify command (no keys) is unaffected after resharding" {
        # verify there are blocked clients on node2
        assert_equal [s -1 blocked_clients]  {1}

        #release client 
        $node2 block.release 0
    }

    test "Verify command (blocked on key) got unblocked after resharding" {
        # this (read) will wait for the node3 to realize the new topology
        assert_error {*MOVED*} {$node3_rd read}

        # verify there are no blocked clients
        assert_equal [s 0 blocked_clients]  {0}
        assert_equal [s -1 blocked_clients]  {0}
        assert_equal [s -2 blocked_clients]  {0}
    }

    test "Wait for cluster to be stable" {
        wait_for_condition 1000 50 {
            [catch {exec src/redis-cli --cluster \
            check 127.0.0.1:[srv 0 port] \
            }] == 0
        } else {
            fail "Cluster doesn't stabilize"
        }
    }

    test "Sanity test push cmd after resharding" {
        assert_error {*MOVED*} {$node3 fsl.push key9184688 1}

        set node1_rd [redis_deferring_client 0]
        $node1_rd fsl.bpop key9184688 0
        $node1_rd flush

        wait_for_condition 50 100 {
            [s 0 blocked_clients] eq {1}
        } else {
            puts "Client not blocked"
            puts "read from blocked client: [$node1_rd read]"
            fail "Client not blocked"
        }

        $node1 fsl.push key9184688 2
        assert_equal {2} [$node1_rd read]
    }

    $node1_rd close
    $node2_rd close
    $node3_rd close

    test "Run blocking command (blocked on key) again on cluster node1" {
        $node1 del key9184688
        # key9184688 is mapped to slot 10923 which has been moved to node1
        set node1_rd [redis_deferring_client 0]
        $node1_rd fsl.bpop key9184688 0
        $node1_rd flush

        wait_for_condition 50 100 {
            [s 0 blocked_clients] eq {1}
        } else {
            fail "Client executing blocking command (blocked on key) again not blocked"
        }
    }

    test "Run blocking command (no keys) again on cluster node2" {
        set node2_rd [redis_deferring_client -1]

        $node2_rd block.block 0
        $node2_rd flush

        wait_for_condition 50 100 {
            [s -1 blocked_clients] eq {1}
        } else {
            fail "Client executing blocking command (no keys) again not blocked"
        }
    }

    test "Kill a cluster node and wait for fail state" {
        # kill node3 in cluster
        exec kill -SIGSTOP $node3_pid

        wait_for_condition 1000 50 {
            [CI 0 cluster_state] eq {fail} &&
            [CI 1 cluster_state] eq {fail}
        } else {
            fail "Cluster doesn't fail"
        }
    }

    test "Verify command (blocked on key) got unblocked after cluster failure" {
        assert_error {*CLUSTERDOWN*} {$node1_rd read}
    }

    test "Verify command (no keys) got unblocked after cluster failure" {
        assert_error {*CLUSTERDOWN*} {$node2_rd read}

        # verify there are no blocked clients
        assert_equal [s 0 blocked_clients]  {0}
        assert_equal [s -1 blocked_clients]  {0}
    }

    test "Verify command RM_Call is rejected when cluster is down" {
        assert_error "ERR Can not execute a command 'set' while the cluster is down" {$node1 do_rm_call set x 1}
    }

    exec kill -SIGCONT $node3_pid
    $node1_rd close
    $node2_rd close
}
}
Cluster test improvements (#10920) * Restructured testing to allow running cluster tests easily as part of the normal testing 2022-07-12 13:41:29 -04:00			`# Primitive tests on cluster-enabled redis with modules`
Release clients blocked on module commands in cluster resharding and down state (#9483) Prevent clients from being blocked forever in cluster when they block with their own module command and the hash slot is migrated to another master at the same time. These will get a redirection message when unblocked. Also, release clients blocked on module commands when cluster is down (same as other blocked clients) This commit adds basic tests for the main (non-cluster) redis test infra that test the cluster. This was done because the cluster test infra can't handle some common test features, but most importantly we only build the test modules with the non-cluster test suite. note that rather than really supporting cluster operations by the test infra, it was added (as dup code) in two files, one for module tests and one for non-modules tests, maybe in the future we'll refactor that. Co-authored-by: Oran Agra <oran@redislabs.com> 2021-10-19 04:50:37 -04:00
			`source tests/support/cli.tcl`

fix new cluster tests issues (#9657) Following #9483 the daily CI exposed a few problems. * The cluster creation code (uses redis-cli) is complicated to test with TLS enabled. for now i'm just skipping them since the tests we run there don't really need that kind of coverage * cluster port binding failures note that `find_available_port` already looks for a free cluster port but the code in `wait_server_started` couldn't detect the failure of binding (the text it greps for wasn't found in the log) 2021-10-20 08:40:28 -04:00			`# cluster creation is complicated with TLS, and the current tests don't really need that coverage`
			`tags {tls:skip external:skip cluster modules} {`

Cluster test improvements (#10920) * Restructured testing to allow running cluster tests easily as part of the normal testing 2022-07-12 13:41:29 -04:00			`set testmodule_nokey [file normalize tests/modules/blockonbackground.so]`
			`set testmodule_blockedclient [file normalize tests/modules/blockedclient.so]`
			`set testmodule [file normalize tests/modules/blockonkeys.so]`

			`set modules [list loadmodule $testmodule loadmodule $testmodule_nokey loadmodule $testmodule_blockedclient]`
			`start_cluster 3 0 [list config_lines $modules] {`
Release clients blocked on module commands in cluster resharding and down state (#9483) Prevent clients from being blocked forever in cluster when they block with their own module command and the hash slot is migrated to another master at the same time. These will get a redirection message when unblocked. Also, release clients blocked on module commands when cluster is down (same as other blocked clients) This commit adds basic tests for the main (non-cluster) redis test infra that test the cluster. This was done because the cluster test infra can't handle some common test features, but most importantly we only build the test modules with the non-cluster test suite. note that rather than really supporting cluster operations by the test infra, it was added (as dup code) in two files, one for module tests and one for non-modules tests, maybe in the future we'll refactor that. Co-authored-by: Oran Agra <oran@redislabs.com> 2021-10-19 04:50:37 -04:00
			`set node1 [srv 0 client]`
			`set node2 [srv -1 client]`
			`set node3 [srv -2 client]`
			`set node3_pid [srv -2 pid]`

			`test "Run blocking command (blocked on key) on cluster node3" {`
			`# key9184688 is mapped to slot 10923 (first slot of node 3)`
			`set node3_rd [redis_deferring_client -2]`
			`$node3_rd fsl.bpop key9184688 0`
			`$node3_rd flush`
			`wait_for_condition 50 100 {`
			`[s -2 blocked_clients] eq {1}`
			`} else {`
			`fail "Client executing blocking command (blocked on key) not blocked"`
			`}`
			`}`

			`test "Run blocking command (no keys) on cluster node2" {`
			`set node2_rd [redis_deferring_client -1]`
			`$node2_rd block.block 0`
			`$node2_rd flush`

			`wait_for_condition 50 100 {`
			`[s -1 blocked_clients] eq {1}`
			`} else {`
			`fail "Client executing blocking command (no keys) not blocked"`
			`}`
			`}`


			`test "Perform a Resharding" {`
			`exec src/redis-cli --cluster-yes --cluster reshard 127.0.0.1:[srv -2 port] \`
			`--cluster-to [$node1 cluster myid] \`
			`--cluster-from [$node3 cluster myid] \`
			`--cluster-slots 1`
			`}`

			`test "Verify command (no keys) is unaffected after resharding" {`
			`# verify there are blocked clients on node2`
			`assert_equal [s -1 blocked_clients] {1}`

			`#release client`
			`$node2 block.release 0`
			`}`

			`test "Verify command (blocked on key) got unblocked after resharding" {`
			`# this (read) will wait for the node3 to realize the new topology`
			`assert_error {MOVED} {$node3_rd read}`

			`# verify there are no blocked clients`
			`assert_equal [s 0 blocked_clients] {0}`
			`assert_equal [s -1 blocked_clients] {0}`
			`assert_equal [s -2 blocked_clients] {0}`
			`}`

			`test "Wait for cluster to be stable" {`
			`wait_for_condition 1000 50 {`
			`[catch {exec src/redis-cli --cluster \`
			`check 127.0.0.1:[srv 0 port] \`
			`}] == 0`
			`} else {`
			`fail "Cluster doesn't stabilize"`
			`}`
			`}`

			`test "Sanity test push cmd after resharding" {`
			`assert_error {MOVED} {$node3 fsl.push key9184688 1}`

			`set node1_rd [redis_deferring_client 0]`
			`$node1_rd fsl.bpop key9184688 0`
			`$node1_rd flush`

			`wait_for_condition 50 100 {`
			`[s 0 blocked_clients] eq {1}`
			`} else {`
			`puts "Client not blocked"`
			`puts "read from blocked client: [$node1_rd read]"`
			`fail "Client not blocked"`
			`}`

			`$node1 fsl.push key9184688 2`
			`assert_equal {2} [$node1_rd read]`
			`}`

			`$node1_rd close`
			`$node2_rd close`
			`$node3_rd close`

			`test "Run blocking command (blocked on key) again on cluster node1" {`
			`$node1 del key9184688`
			`# key9184688 is mapped to slot 10923 which has been moved to node1`
			`set node1_rd [redis_deferring_client 0]`
			`$node1_rd fsl.bpop key9184688 0`
			`$node1_rd flush`

			`wait_for_condition 50 100 {`
			`[s 0 blocked_clients] eq {1}`
			`} else {`
			`fail "Client executing blocking command (blocked on key) again not blocked"`
			`}`
			`}`

			`test "Run blocking command (no keys) again on cluster node2" {`
			`set node2_rd [redis_deferring_client -1]`

			`$node2_rd block.block 0`
			`$node2_rd flush`

			`wait_for_condition 50 100 {`
			`[s -1 blocked_clients] eq {1}`
			`} else {`
			`fail "Client executing blocking command (no keys) again not blocked"`
			`}`
			`}`

			`test "Kill a cluster node and wait for fail state" {`
			`# kill node3 in cluster`
			`exec kill -SIGSTOP $node3_pid`

			`wait_for_condition 1000 50 {`
Cluster test improvements (#10920) * Restructured testing to allow running cluster tests easily as part of the normal testing 2022-07-12 13:41:29 -04:00			`[CI 0 cluster_state] eq {fail} &&`
			`[CI 1 cluster_state] eq {fail}`
Release clients blocked on module commands in cluster resharding and down state (#9483) Prevent clients from being blocked forever in cluster when they block with their own module command and the hash slot is migrated to another master at the same time. These will get a redirection message when unblocked. Also, release clients blocked on module commands when cluster is down (same as other blocked clients) This commit adds basic tests for the main (non-cluster) redis test infra that test the cluster. This was done because the cluster test infra can't handle some common test features, but most importantly we only build the test modules with the non-cluster test suite. note that rather than really supporting cluster operations by the test infra, it was added (as dup code) in two files, one for module tests and one for non-modules tests, maybe in the future we'll refactor that. Co-authored-by: Oran Agra <oran@redislabs.com> 2021-10-19 04:50:37 -04:00			`} else {`
			`fail "Cluster doesn't fail"`
			`}`
			`}`

			`test "Verify command (blocked on key) got unblocked after cluster failure" {`
			`assert_error {CLUSTERDOWN} {$node1_rd read}`
			`}`

			`test "Verify command (no keys) got unblocked after cluster failure" {`
			`assert_error {CLUSTERDOWN} {$node2_rd read}`

			`# verify there are no blocked clients`
			`assert_equal [s 0 blocked_clients] {0}`
			`assert_equal [s -1 blocked_clients] {0}`
			`}`

Add new RM_Call flags for script mode, no writes, and error replies. (#10372) The PR extends RM_Call with 3 new capabilities using new flags that are given to RM_Call as part of the `fmt` argument. It aims to assist modules that are getting a list of commands to be executed from the user (not hard coded as part of the module logic), think of a module that implements a new scripting language... * `S` - Run the command in a script mode, this means that it will raise an error if a command which are not allowed inside a script (flaged with the `deny-script` flag) is invoked (like SHUTDOWN). In addition, on script mode, write commands are not allowed if there is not enough good replicas (as configured with `min-replicas-to-write`) and/or a disk error happened. * `W` - no writes mode, Redis will reject any command that is marked with `write` flag. Again can be useful to modules that implement a new scripting language and wants to prevent any write commands. * `E` - Return errors as RedisModuleCallReply. Today the errors that happened before the command was invoked (like unknown commands or acl error) return a NULL reply and set errno. This might be missing important information about the failure and it is also impossible to just pass the error to the user using RM_ReplyWithCallReply. This new flag allows you to get a RedisModuleCallReply object with the relevant error message and treat it as if it was an error that was raised by the command invocation. Tests were added to verify the new code paths. In addition small refactoring was done to share some code between modules, scripts, and `processCommand` function: 1. `getAclErrorMessage` was added to `acl.c` to unified to log message extraction from the acl result 2. `checkGoodReplicasStatus` was added to `replication.c` to check the status of good replicas. It is used on `scriptVerifyWriteCommandAllow`, `RM_Call`, and `processCommand`. 3. `writeCommandsGetDiskErrorMessage` was added to `server.c` to get the error message on persistence failure. Again it is used on `scriptVerifyWriteCommandAllow`, `RM_Call`, and `processCommand`. 2022-03-22 08:13:28 -04:00			`test "Verify command RM_Call is rejected when cluster is down" {`
			`assert_error "ERR Can not execute a command 'set' while the cluster is down" {$node1 do_rm_call set x 1}`
			`}`

Release clients blocked on module commands in cluster resharding and down state (#9483) Prevent clients from being blocked forever in cluster when they block with their own module command and the hash slot is migrated to another master at the same time. These will get a redirection message when unblocked. Also, release clients blocked on module commands when cluster is down (same as other blocked clients) This commit adds basic tests for the main (non-cluster) redis test infra that test the cluster. This was done because the cluster test infra can't handle some common test features, but most importantly we only build the test modules with the non-cluster test suite. note that rather than really supporting cluster operations by the test infra, it was added (as dup code) in two files, one for module tests and one for non-modules tests, maybe in the future we'll refactor that. Co-authored-by: Oran Agra <oran@redislabs.com> 2021-10-19 04:50:37 -04:00			`exec kill -SIGCONT $node3_pid`
			`$node1_rd close`
			`$node2_rd close`
			`}`
			`}`