2015-10-15 11:23:13 +02:00
|
|
|
proc log_file_matches {log pattern} {
|
|
|
|
set fp [open $log r]
|
|
|
|
set content [read $fp]
|
|
|
|
close $fp
|
|
|
|
string match $pattern $content
|
|
|
|
}
|
|
|
|
|
2021-06-09 15:13:24 +03:00
|
|
|
start_server {tags {"repl network external:skip"}} {
|
2015-10-15 11:23:13 +02:00
|
|
|
set slave [srv 0 client]
|
|
|
|
set slave_host [srv 0 host]
|
|
|
|
set slave_port [srv 0 port]
|
|
|
|
set slave_log [srv 0 stdout]
|
|
|
|
start_server {} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
|
|
|
|
# Configure the master in order to hang waiting for the BGSAVE
|
|
|
|
# operation, so that the slave remains in the handshake state.
|
|
|
|
$master config set repl-diskless-sync yes
|
|
|
|
$master config set repl-diskless-sync-delay 1000
|
|
|
|
|
|
|
|
# Start the replication process...
|
|
|
|
$slave slaveof $master_host $master_port
|
|
|
|
|
|
|
|
test {Slave enters handshake} {
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
[string match *handshake* [$slave role]]
|
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Replica does not enter handshake state"
|
2015-10-15 11:23:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-02-01 20:48:16 +08:00
|
|
|
test {Slave enters wait_bgsave} {
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
[string match *state=wait_bgsave* [$master info replication]]
|
|
|
|
} else {
|
|
|
|
fail "Replica does not enter wait_bgsave state"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-04 10:56:09 +02:00
|
|
|
# Use a short replication timeout on the slave, so that if there
|
|
|
|
# are no bugs the timeout is triggered in a reasonable amount
|
|
|
|
# of time.
|
|
|
|
$slave config set repl-timeout 5
|
|
|
|
|
2015-10-15 11:23:13 +02:00
|
|
|
# But make the master unable to send
|
|
|
|
# the periodic newlines to refresh the connection. The slave
|
|
|
|
# should detect the timeout.
|
|
|
|
$master debug sleep 10
|
|
|
|
|
|
|
|
test {Slave is able to detect timeout during handshake} {
|
|
|
|
wait_for_condition 50 1000 {
|
|
|
|
[log_file_matches $slave_log "*Timeout connecting to the MASTER*"]
|
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Replica is not able to detect timeout"
|
2015-10-15 11:23:13 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-09 15:13:24 +03:00
|
|
|
start_server {tags {"repl external:skip"}} {
|
2015-03-27 12:10:46 +01:00
|
|
|
set A [srv 0 client]
|
|
|
|
set A_host [srv 0 host]
|
|
|
|
set A_port [srv 0 port]
|
2010-07-06 17:24:00 +02:00
|
|
|
start_server {} {
|
2015-03-27 12:10:46 +01:00
|
|
|
set B [srv 0 client]
|
|
|
|
set B_host [srv 0 host]
|
|
|
|
set B_port [srv 0 port]
|
|
|
|
|
|
|
|
test {Set instance A as slave of B} {
|
|
|
|
$A slaveof $B_host $B_port
|
2012-04-30 10:55:03 +02:00
|
|
|
wait_for_condition 50 100 {
|
2015-03-27 12:10:46 +01:00
|
|
|
[lindex [$A role] 0] eq {slave} &&
|
|
|
|
[string match {*master_link_status:up*} [$A info replication]]
|
2012-04-30 10:55:03 +02:00
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Can't turn the instance into a replica"
|
2012-04-30 10:55:03 +02:00
|
|
|
}
|
|
|
|
}
|
2010-07-06 17:24:00 +02:00
|
|
|
|
2019-12-18 15:44:51 +08:00
|
|
|
test {INCRBYFLOAT replication, should not remove expire} {
|
|
|
|
r set test 1 EX 100
|
|
|
|
r incrbyfloat test 0.1
|
Set repl-diskless-sync to yes by default, add repl-diskless-sync-max-replicas (#10092)
1. enable diskless replication by default
2. add a new config named repl-diskless-sync-max-replicas that enables
replication to start before the full repl-diskless-sync-delay was
reached.
3. put replica online sooner on the master (see below)
4. test suite uses repl-diskless-sync-delay of 0 to be faster
5. a few tests that use multiple replica on a pre-populated master, are
now using the new repl-diskless-sync-max-replicas
6. fix possible timing issues in a few cluster tests (see below)
put replica online sooner on the master
----------------------------------------------------
there were two tests that failed because they needed for the master to
realize that the replica is online, but the test code was actually only
waiting for the replica to realize it's online, and in diskless it could
have been before the master realized it.
changes include two things:
1. the tests wait on the right thing
2. issues in the master, putting the replica online in two steps.
the master used to put the replica as online in 2 steps. the first
step was to mark it as online, and the second step was to enable the
write event (only after getting ACK), but in fact the first step didn't
contains some of the tasks to put it online (like updating good slave
count, and sending the module event). this meant that if a test was
waiting to see that the replica is online form the point of view of the
master, and then confirm that the module got an event, or that the
master has enough good replicas, it could fail due to timing issues.
so now the full effect of putting the replica online, happens at once,
and only the part about enabling the writes is delayed till the ACK.
fix cluster tests
--------------------
I added some code to wait for the replica to sync and avoid race
conditions.
later realized the sentinel and cluster tests where using the original 5
seconds delay, so changed it to 0.
this means the other changes are probably not needed, but i suppose
they're still better (avoid race conditions)
2022-01-17 14:11:11 +02:00
|
|
|
wait_for_ofs_sync $A $B
|
2019-12-18 15:44:51 +08:00
|
|
|
assert_equal [$A debug digest] [$B debug digest]
|
|
|
|
}
|
|
|
|
|
2020-11-03 14:56:57 +02:00
|
|
|
test {GETSET replication} {
|
|
|
|
$A config resetstat
|
|
|
|
$A config set loglevel debug
|
|
|
|
$B config set loglevel debug
|
|
|
|
r set test foo
|
|
|
|
assert_equal [r getset test bar] foo
|
|
|
|
wait_for_condition 500 10 {
|
|
|
|
[$A get test] eq "bar"
|
|
|
|
} else {
|
|
|
|
fail "getset wasn't propagated"
|
|
|
|
}
|
|
|
|
assert_equal [r set test vaz get] bar
|
|
|
|
wait_for_condition 500 10 {
|
|
|
|
[$A get test] eq "vaz"
|
|
|
|
} else {
|
|
|
|
fail "set get wasn't propagated"
|
|
|
|
}
|
|
|
|
assert_match {*calls=3,*} [cmdrstat set $A]
|
|
|
|
assert_match {} [cmdrstat getset $A]
|
|
|
|
}
|
|
|
|
|
2011-06-20 17:07:18 +02:00
|
|
|
test {BRPOPLPUSH replication, when blocking against empty list} {
|
2020-10-08 02:33:17 -03:00
|
|
|
$A config resetstat
|
2011-06-20 17:07:18 +02:00
|
|
|
set rd [redis_deferring_client]
|
|
|
|
$rd brpoplpush a b 5
|
|
|
|
r lpush a foo
|
2012-04-26 11:25:13 +02:00
|
|
|
wait_for_condition 50 100 {
|
2015-03-27 12:10:46 +01:00
|
|
|
[$A debug digest] eq [$B debug digest]
|
2012-04-26 11:25:13 +02:00
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
|
2012-04-26 11:25:13 +02:00
|
|
|
}
|
2020-10-08 02:33:17 -03:00
|
|
|
assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
|
|
|
|
assert_match {} [cmdrstat lmove $A]
|
2011-06-20 17:07:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
test {BRPOPLPUSH replication, list exists} {
|
2020-10-08 02:33:17 -03:00
|
|
|
$A config resetstat
|
2011-06-20 17:07:18 +02:00
|
|
|
set rd [redis_deferring_client]
|
|
|
|
r lpush c 1
|
|
|
|
r lpush c 2
|
|
|
|
r lpush c 3
|
|
|
|
$rd brpoplpush c d 5
|
|
|
|
after 1000
|
2015-03-27 12:10:46 +01:00
|
|
|
assert_equal [$A debug digest] [$B debug digest]
|
2020-10-08 02:33:17 -03:00
|
|
|
assert_match {*calls=1,*} [cmdrstat rpoplpush $A]
|
|
|
|
assert_match {} [cmdrstat lmove $A]
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach wherefrom {left right} {
|
|
|
|
foreach whereto {left right} {
|
|
|
|
test "BLMOVE ($wherefrom, $whereto) replication, when blocking against empty list" {
|
|
|
|
$A config resetstat
|
|
|
|
set rd [redis_deferring_client]
|
|
|
|
$rd blmove a b $wherefrom $whereto 5
|
|
|
|
r lpush a foo
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[$A debug digest] eq [$B debug digest]
|
|
|
|
} else {
|
|
|
|
fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
|
|
|
|
}
|
|
|
|
assert_match {*calls=1,*} [cmdrstat lmove $A]
|
|
|
|
assert_match {} [cmdrstat rpoplpush $A]
|
|
|
|
}
|
|
|
|
|
|
|
|
test "BLMOVE ($wherefrom, $whereto) replication, list exists" {
|
|
|
|
$A config resetstat
|
|
|
|
set rd [redis_deferring_client]
|
|
|
|
r lpush c 1
|
|
|
|
r lpush c 2
|
|
|
|
r lpush c 3
|
|
|
|
$rd blmove c d $wherefrom $whereto 5
|
|
|
|
after 1000
|
|
|
|
assert_equal [$A debug digest] [$B debug digest]
|
|
|
|
assert_match {*calls=1,*} [cmdrstat lmove $A]
|
|
|
|
assert_match {} [cmdrstat rpoplpush $A]
|
|
|
|
}
|
|
|
|
}
|
2015-03-27 12:10:46 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
test {BLPOP followed by role change, issue #2473} {
|
|
|
|
set rd [redis_deferring_client]
|
|
|
|
$rd blpop foo 0 ; # Block while B is a master
|
|
|
|
|
|
|
|
# Turn B into master of A
|
|
|
|
$A slaveof no one
|
|
|
|
$B slaveof $A_host $A_port
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[lindex [$B role] 0] eq {slave} &&
|
|
|
|
[string match {*master_link_status:up*} [$B info replication]]
|
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Can't turn the instance into a replica"
|
2015-03-27 12:10:46 +01:00
|
|
|
}
|
|
|
|
|
2018-09-11 11:03:28 +02:00
|
|
|
# Push elements into the "foo" list of the new replica.
|
2015-03-27 12:10:46 +01:00
|
|
|
# If the client is still attached to the instance, we'll get
|
|
|
|
# a desync between the two instances.
|
|
|
|
$A rpush foo a b c
|
|
|
|
after 100
|
|
|
|
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[$A debug digest] eq [$B debug digest] &&
|
|
|
|
[$A lrange foo 0 -1] eq {a b c} &&
|
|
|
|
[$B lrange foo 0 -1] eq {a b c}
|
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Master and replica have different digest: [$A debug digest] VS [$B debug digest]"
|
reprocess command when client is unblocked on keys (#11012)
*TL;DR*
---------------------------------------
Following the discussion over the issue [#7551](https://github.com/redis/redis/issues/7551)
We decided to refactor the client blocking code to eliminate some of the code duplications
and to rebuild the infrastructure better for future key blocking cases.
*In this PR*
---------------------------------------
1. reprocess the command once a client becomes unblocked on key (instead of running
custom code for the unblocked path that's different than the one that would have run if
blocking wasn't needed)
2. eliminate some (now) irrelevant code for handling unblocking lists/zsets/streams etc...
3. modify some tests to intercept the error in cases of error on reprocess after unblock (see
details in the notes section below)
4. replace '$' on the client argv with current stream id. Since once we reprocess the stream
XREAD we need to read from the last msg and not wait for new msg in order to prevent
endless block loop.
5. Added statistics to the info "Clients" section to report the:
* `total_blocking_keys` - number of blocking keys
* `total_blocking_keys_on_nokey` - number of blocking keys which have at least 1 client
which would like
to be unblocked on when the key is deleted.
6. Avoid expiring unblocked key during unblock. Previously we used to lookup the unblocked key
which might have been expired during the lookup. Now we lookup the key using NOTOUCH and
NOEXPIRE to avoid deleting it at this point, so propagating commands in blocked.c is no longer needed.
7. deprecated command flags. We decided to remove the CMD_CALL_STATS and CMD_CALL_SLOWLOG
and make an explicit verification in the call() function in order to decide if stats update should take place.
This should simplify the logic and also mitigate existing issues: for example module calls which are
triggered as part of AOF loading might still report stats even though they are called during AOF loading.
*Behavior changes*
---------------------------------------------------
1. As this implementation prevents writing dedicated code handling unblocked streams/lists/zsets,
since we now re-process the command once the client is unblocked some errors will be reported differently.
The old implementation used to issue
``UNBLOCKED the stream key no longer exists``
in the following cases:
- The stream key has been deleted (ie. calling DEL)
- The stream and group existed but the key type was changed by overriding it (ie. with set command)
- The key not longer exists after we swapdb with a db which does not contains this key
- After swapdb when the new db has this key but with different type.
In the new implementation the reported errors will be the same as if the command was processed after effect:
**NOGROUP** - in case key no longer exists, or **WRONGTYPE** in case the key was overridden with a different type.
2. Reprocessing the command means that some checks will be reevaluated once the
client is unblocked.
For example, ACL rules might change since the command originally was executed and
will fail once the client is unblocked.
Another example is OOM condition checks which might enable the command to run and
block but fail the command reprocess once the client is unblocked.
3. One of the changes in this PR is that no command stats are being updated once the
command is blocked (all stats will be updated once the client is unblocked). This implies
that when we have many clients blocked, users will no longer be able to get that information
from the command stats. However the information can still be gathered from the client list.
**Client blocking**
---------------------------------------------------
the blocking on key will still be triggered the same way as it is done today.
in order to block the current client on list of keys, the call to
blockForKeys will still need to be made which will perform the same as it is today:
* add the client to the list of blocked clients on each key
* keep the key with a matching list node (position in the global blocking clients list for that key)
in the client private blocking key dict.
* flag the client with CLIENT_BLOCKED
* update blocking statistics
* register the client on the timeout table
**Key Unblock**
---------------------------------------------------
Unblocking a specific key will be triggered (same as today) by calling signalKeyAsReady.
the implementation in that part will stay the same as today - adding the key to the global readyList.
The reason to maintain the readyList (as apposed to iterating over all clients blocked on the specific key)
is in order to keep the signal operation as short as possible, since it is called during the command processing.
The main change is that instead of going through a dedicated code path that operates the blocked command
we will just call processPendingCommandsAndResetClient.
**ClientUnblock (keys)**
---------------------------------------------------
1. Unblocking clients on keys will be triggered after command is
processed and during the beforeSleep
8. the general schema is:
9. For each key *k* in the readyList:
```
For each client *c* which is blocked on *k*:
in case either:
1. *k* exists AND the *k* type matches the current client blocking type
OR
2. *k* exists and *c* is blocked on module command
OR
3. *k* does not exists and *c* was blocked with the flag
unblock_on_deleted_key
do:
1. remove the client from the list of clients blocked on this key
2. remove the blocking list node from the client blocking key dict
3. remove the client from the timeout list
10. queue the client on the unblocked_clients list
11. *NEW*: call processCommandAndResetClient(c);
```
*NOTE:* for module blocked clients we will still call the moduleUnblockClientByHandle
which will queue the client for processing in moduleUnblockedClients list.
**Process Unblocked clients**
---------------------------------------------------
The process of all unblocked clients is done in the beforeSleep and no change is planned
in that part.
The general schema will be:
For each client *c* in server.unblocked_clients:
* remove client from the server.unblocked_clients
* set back the client readHandler
* continue processing the pending command and input buffer.
*Some notes regarding the new implementation*
---------------------------------------------------
1. Although it was proposed, it is currently difficult to remove the
read handler from the client while it is blocked.
The reason is that a blocked client should be unblocked when it is
disconnected, or we might consume data into void.
2. While this PR mainly keep the current blocking logic as-is, there
might be some future additions to the infrastructure that we would
like to have:
- allow non-preemptive blocking of client - sometimes we can think
that a new kind of blocking can be expected to not be preempt. for
example lets imagine we hold some keys on disk and when a command
needs to process them it will block until the keys are uploaded.
in this case we will want the client to not disconnect or be
unblocked until the process is completed (remove the client read
handler, prevent client timeout, disable unblock via debug command etc...).
- allow generic blocking based on command declared keys - we might
want to add a hook before command processing to check if any of the
declared keys require the command to block. this way it would be
easier to add new kinds of key-based blocking mechanisms.
Co-authored-by: Oran Agra <oran@redislabs.com>
Signed-off-by: Ran Shidlansik <ranshid@amazon.com>
2023-01-01 23:35:42 +02:00
|
|
|
}
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1*} [cmdrstat blpop $B]
|
2011-06-20 17:07:18 +02:00
|
|
|
}
|
2010-07-06 17:24:00 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-06-09 15:13:24 +03:00
|
|
|
start_server {tags {"repl external:skip"}} {
|
2010-05-14 20:50:58 +02:00
|
|
|
r set mykey foo
|
2014-07-31 14:39:49 -04:00
|
|
|
|
2010-06-02 22:23:52 +02:00
|
|
|
start_server {} {
|
2010-05-14 20:50:58 +02:00
|
|
|
test {Second server should have role master at first} {
|
|
|
|
s role
|
|
|
|
} {master}
|
2014-07-31 14:39:49 -04:00
|
|
|
|
2010-05-14 20:50:58 +02:00
|
|
|
test {SLAVEOF should start with link status "down"} {
|
2021-03-22 10:50:39 +02:00
|
|
|
r multi
|
2010-05-14 20:50:58 +02:00
|
|
|
r slaveof [srv -1 host] [srv -1 port]
|
2021-03-22 10:50:39 +02:00
|
|
|
r info replication
|
|
|
|
r exec
|
|
|
|
} {*master_link_status:down*}
|
2014-07-31 14:39:49 -04:00
|
|
|
|
2018-09-11 11:03:28 +02:00
|
|
|
test {The role should immediately be changed to "replica"} {
|
2010-05-14 20:50:58 +02:00
|
|
|
s role
|
|
|
|
} {slave}
|
|
|
|
|
|
|
|
wait_for_sync r
|
|
|
|
test {Sync should have transferred keys from master} {
|
|
|
|
r get mykey
|
|
|
|
} {foo}
|
2014-07-31 14:39:49 -04:00
|
|
|
|
2010-05-14 20:50:58 +02:00
|
|
|
test {The link status should be up} {
|
|
|
|
s master_link_status
|
|
|
|
} {up}
|
2014-07-31 14:39:49 -04:00
|
|
|
|
2010-05-14 20:50:58 +02:00
|
|
|
test {SET on the master should immediately propagate} {
|
|
|
|
r -1 set mykey bar
|
A reimplementation of blocking operation internals.
Redis provides support for blocking operations such as BLPOP or BRPOP.
This operations are identical to normal LPOP and RPOP operations as long
as there are elements in the target list, but if the list is empty they
block waiting for new data to arrive to the list.
All the clients blocked waiting for th same list are served in a FIFO
way, so the first that blocked is the first to be served when there is
more data pushed by another client into the list.
The previous implementation of blocking operations was conceived to
serve clients in the context of push operations. For for instance:
1) There is a client "A" blocked on list "foo".
2) The client "B" performs `LPUSH foo somevalue`.
3) The client "A" is served in the context of the "B" LPUSH,
synchronously.
Processing things in a synchronous way was useful as if "A" pushes a
value that is served by "B", from the point of view of the database is a
NOP (no operation) thing, that is, nothing is replicated, nothing is
written in the AOF file, and so forth.
However later we implemented two things:
1) Variadic LPUSH that could add multiple values to a list in the
context of a single call.
2) BRPOPLPUSH that was a version of BRPOP that also provided a "PUSH"
side effect when receiving data.
This forced us to make the synchronous implementation more complex. If
client "B" is waiting for data, and "A" pushes three elemnents in a
single call, we needed to propagate an LPUSH with a missing argument
in the AOF and replication link. We also needed to make sure to
replicate the LPUSH side of BRPOPLPUSH, but only if in turn did not
happened to serve another blocking client into another list ;)
This were complex but with a few of mutually recursive functions
everything worked as expected... until one day we introduced scripting
in Redis.
Scripting + synchronous blocking operations = Issue #614.
Basically you can't "rewrite" a script to have just a partial effect on
the replicas and AOF file if the script happened to serve a few blocked
clients.
The solution to all this problems, implemented by this commit, is to
change the way we serve blocked clients. Instead of serving the blocked
clients synchronously, in the context of the command performing the PUSH
operation, it is now an asynchronous and iterative process:
1) If a key that has clients blocked waiting for data is the subject of
a list push operation, We simply mark keys as "ready" and put it into a
queue.
2) Every command pushing stuff on lists, as a variadic LPUSH, a script,
or whatever it is, is replicated verbatim without any rewriting.
3) Every time a Redis command, a MULTI/EXEC block, or a script,
completed its execution, we run the list of keys ready to serve blocked
clients (as more data arrived), and process this list serving the
blocked clients.
4) As a result of "3" maybe more keys are ready again for other clients
(as a result of BRPOPLPUSH we may have push operations), so we iterate
back to step "3" if it's needed.
The new code has a much simpler semantics, and a simpler to understand
implementation, with the disadvantage of not being able to "optmize out"
a PUSH+BPOP as a No OP.
This commit will be tested with care before the final merge, more tests
will be added likely.
2012-09-04 10:37:49 +02:00
|
|
|
|
|
|
|
wait_for_condition 500 100 {
|
|
|
|
[r 0 get mykey] eq {bar}
|
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "SET on master did not propagated on replica"
|
A reimplementation of blocking operation internals.
Redis provides support for blocking operations such as BLPOP or BRPOP.
This operations are identical to normal LPOP and RPOP operations as long
as there are elements in the target list, but if the list is empty they
block waiting for new data to arrive to the list.
All the clients blocked waiting for th same list are served in a FIFO
way, so the first that blocked is the first to be served when there is
more data pushed by another client into the list.
The previous implementation of blocking operations was conceived to
serve clients in the context of push operations. For for instance:
1) There is a client "A" blocked on list "foo".
2) The client "B" performs `LPUSH foo somevalue`.
3) The client "A" is served in the context of the "B" LPUSH,
synchronously.
Processing things in a synchronous way was useful as if "A" pushes a
value that is served by "B", from the point of view of the database is a
NOP (no operation) thing, that is, nothing is replicated, nothing is
written in the AOF file, and so forth.
However later we implemented two things:
1) Variadic LPUSH that could add multiple values to a list in the
context of a single call.
2) BRPOPLPUSH that was a version of BRPOP that also provided a "PUSH"
side effect when receiving data.
This forced us to make the synchronous implementation more complex. If
client "B" is waiting for data, and "A" pushes three elemnents in a
single call, we needed to propagate an LPUSH with a missing argument
in the AOF and replication link. We also needed to make sure to
replicate the LPUSH side of BRPOPLPUSH, but only if in turn did not
happened to serve another blocking client into another list ;)
This were complex but with a few of mutually recursive functions
everything worked as expected... until one day we introduced scripting
in Redis.
Scripting + synchronous blocking operations = Issue #614.
Basically you can't "rewrite" a script to have just a partial effect on
the replicas and AOF file if the script happened to serve a few blocked
clients.
The solution to all this problems, implemented by this commit, is to
change the way we serve blocked clients. Instead of serving the blocked
clients synchronously, in the context of the command performing the PUSH
operation, it is now an asynchronous and iterative process:
1) If a key that has clients blocked waiting for data is the subject of
a list push operation, We simply mark keys as "ready" and put it into a
queue.
2) Every command pushing stuff on lists, as a variadic LPUSH, a script,
or whatever it is, is replicated verbatim without any rewriting.
3) Every time a Redis command, a MULTI/EXEC block, or a script,
completed its execution, we run the list of keys ready to serve blocked
clients (as more data arrived), and process this list serving the
blocked clients.
4) As a result of "3" maybe more keys are ready again for other clients
(as a result of BRPOPLPUSH we may have push operations), so we iterate
back to step "3" if it's needed.
The new code has a much simpler semantics, and a simpler to understand
implementation, with the disadvantage of not being able to "optmize out"
a PUSH+BPOP as a No OP.
This commit will be tested with care before the final merge, more tests
will be added likely.
2012-09-04 10:37:49 +02:00
|
|
|
}
|
|
|
|
}
|
2011-10-17 10:40:11 +02:00
|
|
|
|
FLUSHDB and FLUSHALL add call forceCommandPropagation / FLUSHALL reset dirty counter to 0 if we enable save (#10691)
## FLUSHALL
We used to restore the dirty counter after `rdbSave` zeroed it if we enable save.
Otherwise FLUSHALL will not be replicated nor put into the AOF.
And then we do increment it again below.
Without that extra dirty++, when db was already empty, FLUSHALL
will not be replicated nor put into the AOF.
We now gonna replace all that dirty counter magic with a call
to forceCommandPropagation (REPL and AOF), instead of all the
messing around with the dirty counter.
Added tests to cover three part (dirty counter, REPL, AOF).
One benefit other than cleaner code is that the `rdb_changes_since_last_save` is correct in this case.
## FLUSHDB
FLUSHDB was not replicated nor put into the AOF when db was already empty.
Unlike DEL on a non-existing key, FLUSHDB always does something, and that's to call the module hook.
So basically FLUSHDB is never a NOP, and thus it should always be propagated.
Not doing that, could mean that if a module does something in that hook, and wants to
avoid issues of that hook being missing on the replica if the db is empty, it'll need to do complicated things.
So now FLUSHDB add call forceCommandPropagation, we will always propagate FLUSHDB.
Always propagating FLUSHDB seems like a safe approach that shouldn't have any drawbacks (other than looking odd)
This was mentioned in #8972
## Test section:
We actually found it while solving a race condition in the BGSAVE test (other.tcl).
It was found in extra_ci Daily Arm64 (test-libc-malloc).
```
[exception]: Executing test client: ERR Background save already in progress.
ERR Background save already in progress
```
It look like `r flushdb` trigger (schedule) a bgsave right after `waitForBgsave r` and before `r save`.
Changing flushdb to flushall, FLUSHALL will do a foreground save and then set the dirty counter to 0.
2022-05-11 16:21:16 +08:00
|
|
|
test {FLUSHDB / FLUSHALL should replicate} {
|
2022-12-12 18:10:48 +02:00
|
|
|
# we're attaching to a sub-replica, so we need to stop pings on the real master
|
|
|
|
r -1 config set repl-ping-replica-period 3600
|
|
|
|
|
FLUSHDB and FLUSHALL add call forceCommandPropagation / FLUSHALL reset dirty counter to 0 if we enable save (#10691)
## FLUSHALL
We used to restore the dirty counter after `rdbSave` zeroed it if we enable save.
Otherwise FLUSHALL will not be replicated nor put into the AOF.
And then we do increment it again below.
Without that extra dirty++, when db was already empty, FLUSHALL
will not be replicated nor put into the AOF.
We now gonna replace all that dirty counter magic with a call
to forceCommandPropagation (REPL and AOF), instead of all the
messing around with the dirty counter.
Added tests to cover three part (dirty counter, REPL, AOF).
One benefit other than cleaner code is that the `rdb_changes_since_last_save` is correct in this case.
## FLUSHDB
FLUSHDB was not replicated nor put into the AOF when db was already empty.
Unlike DEL on a non-existing key, FLUSHDB always does something, and that's to call the module hook.
So basically FLUSHDB is never a NOP, and thus it should always be propagated.
Not doing that, could mean that if a module does something in that hook, and wants to
avoid issues of that hook being missing on the replica if the db is empty, it'll need to do complicated things.
So now FLUSHDB add call forceCommandPropagation, we will always propagate FLUSHDB.
Always propagating FLUSHDB seems like a safe approach that shouldn't have any drawbacks (other than looking odd)
This was mentioned in #8972
## Test section:
We actually found it while solving a race condition in the BGSAVE test (other.tcl).
It was found in extra_ci Daily Arm64 (test-libc-malloc).
```
[exception]: Executing test client: ERR Background save already in progress.
ERR Background save already in progress
```
It look like `r flushdb` trigger (schedule) a bgsave right after `waitForBgsave r` and before `r save`.
Changing flushdb to flushall, FLUSHALL will do a foreground save and then set the dirty counter to 0.
2022-05-11 16:21:16 +08:00
|
|
|
set repl [attach_to_replication_stream]
|
|
|
|
|
|
|
|
r -1 set key value
|
|
|
|
r -1 flushdb
|
|
|
|
|
|
|
|
r -1 set key value2
|
|
|
|
r -1 flushall
|
|
|
|
|
|
|
|
wait_for_ofs_sync [srv 0 client] [srv -1 client]
|
|
|
|
assert_equal [r -1 dbsize] 0
|
|
|
|
assert_equal [r 0 dbsize] 0
|
|
|
|
|
|
|
|
# DB is empty.
|
|
|
|
r -1 flushdb
|
|
|
|
r -1 flushdb
|
2023-02-16 08:07:35 +02:00
|
|
|
r -1 eval {redis.call("flushdb")} 0
|
FLUSHDB and FLUSHALL add call forceCommandPropagation / FLUSHALL reset dirty counter to 0 if we enable save (#10691)
## FLUSHALL
We used to restore the dirty counter after `rdbSave` zeroed it if we enable save.
Otherwise FLUSHALL will not be replicated nor put into the AOF.
And then we do increment it again below.
Without that extra dirty++, when db was already empty, FLUSHALL
will not be replicated nor put into the AOF.
We now gonna replace all that dirty counter magic with a call
to forceCommandPropagation (REPL and AOF), instead of all the
messing around with the dirty counter.
Added tests to cover three part (dirty counter, REPL, AOF).
One benefit other than cleaner code is that the `rdb_changes_since_last_save` is correct in this case.
## FLUSHDB
FLUSHDB was not replicated nor put into the AOF when db was already empty.
Unlike DEL on a non-existing key, FLUSHDB always does something, and that's to call the module hook.
So basically FLUSHDB is never a NOP, and thus it should always be propagated.
Not doing that, could mean that if a module does something in that hook, and wants to
avoid issues of that hook being missing on the replica if the db is empty, it'll need to do complicated things.
So now FLUSHDB add call forceCommandPropagation, we will always propagate FLUSHDB.
Always propagating FLUSHDB seems like a safe approach that shouldn't have any drawbacks (other than looking odd)
This was mentioned in #8972
## Test section:
We actually found it while solving a race condition in the BGSAVE test (other.tcl).
It was found in extra_ci Daily Arm64 (test-libc-malloc).
```
[exception]: Executing test client: ERR Background save already in progress.
ERR Background save already in progress
```
It look like `r flushdb` trigger (schedule) a bgsave right after `waitForBgsave r` and before `r save`.
Changing flushdb to flushall, FLUSHALL will do a foreground save and then set the dirty counter to 0.
2022-05-11 16:21:16 +08:00
|
|
|
|
|
|
|
# DBs are empty.
|
2011-10-17 10:40:11 +02:00
|
|
|
r -1 flushall
|
FLUSHDB and FLUSHALL add call forceCommandPropagation / FLUSHALL reset dirty counter to 0 if we enable save (#10691)
## FLUSHALL
We used to restore the dirty counter after `rdbSave` zeroed it if we enable save.
Otherwise FLUSHALL will not be replicated nor put into the AOF.
And then we do increment it again below.
Without that extra dirty++, when db was already empty, FLUSHALL
will not be replicated nor put into the AOF.
We now gonna replace all that dirty counter magic with a call
to forceCommandPropagation (REPL and AOF), instead of all the
messing around with the dirty counter.
Added tests to cover three part (dirty counter, REPL, AOF).
One benefit other than cleaner code is that the `rdb_changes_since_last_save` is correct in this case.
## FLUSHDB
FLUSHDB was not replicated nor put into the AOF when db was already empty.
Unlike DEL on a non-existing key, FLUSHDB always does something, and that's to call the module hook.
So basically FLUSHDB is never a NOP, and thus it should always be propagated.
Not doing that, could mean that if a module does something in that hook, and wants to
avoid issues of that hook being missing on the replica if the db is empty, it'll need to do complicated things.
So now FLUSHDB add call forceCommandPropagation, we will always propagate FLUSHDB.
Always propagating FLUSHDB seems like a safe approach that shouldn't have any drawbacks (other than looking odd)
This was mentioned in #8972
## Test section:
We actually found it while solving a race condition in the BGSAVE test (other.tcl).
It was found in extra_ci Daily Arm64 (test-libc-malloc).
```
[exception]: Executing test client: ERR Background save already in progress.
ERR Background save already in progress
```
It look like `r flushdb` trigger (schedule) a bgsave right after `waitForBgsave r` and before `r save`.
Changing flushdb to flushall, FLUSHALL will do a foreground save and then set the dirty counter to 0.
2022-05-11 16:21:16 +08:00
|
|
|
r -1 flushall
|
2023-02-16 08:07:35 +02:00
|
|
|
r -1 eval {redis.call("flushall")} 0
|
|
|
|
|
|
|
|
# add another command to check nothing else was propagated after the above
|
|
|
|
r -1 incr x
|
FLUSHDB and FLUSHALL add call forceCommandPropagation / FLUSHALL reset dirty counter to 0 if we enable save (#10691)
## FLUSHALL
We used to restore the dirty counter after `rdbSave` zeroed it if we enable save.
Otherwise FLUSHALL will not be replicated nor put into the AOF.
And then we do increment it again below.
Without that extra dirty++, when db was already empty, FLUSHALL
will not be replicated nor put into the AOF.
We now gonna replace all that dirty counter magic with a call
to forceCommandPropagation (REPL and AOF), instead of all the
messing around with the dirty counter.
Added tests to cover three part (dirty counter, REPL, AOF).
One benefit other than cleaner code is that the `rdb_changes_since_last_save` is correct in this case.
## FLUSHDB
FLUSHDB was not replicated nor put into the AOF when db was already empty.
Unlike DEL on a non-existing key, FLUSHDB always does something, and that's to call the module hook.
So basically FLUSHDB is never a NOP, and thus it should always be propagated.
Not doing that, could mean that if a module does something in that hook, and wants to
avoid issues of that hook being missing on the replica if the db is empty, it'll need to do complicated things.
So now FLUSHDB add call forceCommandPropagation, we will always propagate FLUSHDB.
Always propagating FLUSHDB seems like a safe approach that shouldn't have any drawbacks (other than looking odd)
This was mentioned in #8972
## Test section:
We actually found it while solving a race condition in the BGSAVE test (other.tcl).
It was found in extra_ci Daily Arm64 (test-libc-malloc).
```
[exception]: Executing test client: ERR Background save already in progress.
ERR Background save already in progress
```
It look like `r flushdb` trigger (schedule) a bgsave right after `waitForBgsave r` and before `r save`.
Changing flushdb to flushall, FLUSHALL will do a foreground save and then set the dirty counter to 0.
2022-05-11 16:21:16 +08:00
|
|
|
|
|
|
|
# Assert that each FLUSHDB command is replicated even the DB is empty.
|
|
|
|
# Assert that each FLUSHALL command is replicated even the DBs are empty.
|
|
|
|
assert_replication_stream $repl {
|
|
|
|
{set key value}
|
|
|
|
{flushdb}
|
|
|
|
{set key value2}
|
|
|
|
{flushall}
|
|
|
|
{flushdb}
|
|
|
|
{flushdb}
|
|
|
|
{flushdb}
|
|
|
|
{flushall}
|
|
|
|
{flushall}
|
|
|
|
{flushall}
|
2023-02-16 08:07:35 +02:00
|
|
|
{incr x}
|
FLUSHDB and FLUSHALL add call forceCommandPropagation / FLUSHALL reset dirty counter to 0 if we enable save (#10691)
## FLUSHALL
We used to restore the dirty counter after `rdbSave` zeroed it if we enable save.
Otherwise FLUSHALL will not be replicated nor put into the AOF.
And then we do increment it again below.
Without that extra dirty++, when db was already empty, FLUSHALL
will not be replicated nor put into the AOF.
We now gonna replace all that dirty counter magic with a call
to forceCommandPropagation (REPL and AOF), instead of all the
messing around with the dirty counter.
Added tests to cover three part (dirty counter, REPL, AOF).
One benefit other than cleaner code is that the `rdb_changes_since_last_save` is correct in this case.
## FLUSHDB
FLUSHDB was not replicated nor put into the AOF when db was already empty.
Unlike DEL on a non-existing key, FLUSHDB always does something, and that's to call the module hook.
So basically FLUSHDB is never a NOP, and thus it should always be propagated.
Not doing that, could mean that if a module does something in that hook, and wants to
avoid issues of that hook being missing on the replica if the db is empty, it'll need to do complicated things.
So now FLUSHDB add call forceCommandPropagation, we will always propagate FLUSHDB.
Always propagating FLUSHDB seems like a safe approach that shouldn't have any drawbacks (other than looking odd)
This was mentioned in #8972
## Test section:
We actually found it while solving a race condition in the BGSAVE test (other.tcl).
It was found in extra_ci Daily Arm64 (test-libc-malloc).
```
[exception]: Executing test client: ERR Background save already in progress.
ERR Background save already in progress
```
It look like `r flushdb` trigger (schedule) a bgsave right after `waitForBgsave r` and before `r save`.
Changing flushdb to flushall, FLUSHALL will do a foreground save and then set the dirty counter to 0.
2022-05-11 16:21:16 +08:00
|
|
|
}
|
|
|
|
close_replication_stream $repl
|
|
|
|
}
|
2014-06-23 09:08:51 +02:00
|
|
|
|
|
|
|
test {ROLE in master reports master with a slave} {
|
|
|
|
set res [r -1 role]
|
|
|
|
lassign $res role offset slaves
|
|
|
|
assert {$role eq {master}}
|
|
|
|
assert {$offset > 0}
|
|
|
|
assert {[llength $slaves] == 1}
|
|
|
|
lassign [lindex $slaves 0] master_host master_port slave_offset
|
2014-06-26 22:13:46 +02:00
|
|
|
assert {$slave_offset <= $offset}
|
2014-06-23 09:08:51 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
test {ROLE in slave reports slave in connected state} {
|
|
|
|
set res [r role]
|
|
|
|
lassign $res role master_host master_port slave_state slave_offset
|
|
|
|
assert {$role eq {slave}}
|
|
|
|
assert {$slave_state eq {connected}}
|
|
|
|
}
|
2010-05-14 20:50:58 +02:00
|
|
|
}
|
|
|
|
}
|
2012-01-06 17:28:40 +01:00
|
|
|
|
2019-07-01 15:22:29 +03:00
|
|
|
foreach mdl {no yes} {
|
|
|
|
foreach sdl {disabled swapdb} {
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl external:skip"} overrides {save {}}} {
|
2019-07-01 15:22:29 +03:00
|
|
|
set master [srv 0 client]
|
|
|
|
$master config set repl-diskless-sync $mdl
|
Set repl-diskless-sync to yes by default, add repl-diskless-sync-max-replicas (#10092)
1. enable diskless replication by default
2. add a new config named repl-diskless-sync-max-replicas that enables
replication to start before the full repl-diskless-sync-delay was
reached.
3. put replica online sooner on the master (see below)
4. test suite uses repl-diskless-sync-delay of 0 to be faster
5. a few tests that use multiple replica on a pre-populated master, are
now using the new repl-diskless-sync-max-replicas
6. fix possible timing issues in a few cluster tests (see below)
put replica online sooner on the master
----------------------------------------------------
there were two tests that failed because they needed for the master to
realize that the replica is online, but the test code was actually only
waiting for the replica to realize it's online, and in diskless it could
have been before the master realized it.
changes include two things:
1. the tests wait on the right thing
2. issues in the master, putting the replica online in two steps.
the master used to put the replica as online in 2 steps. the first
step was to mark it as online, and the second step was to enable the
write event (only after getting ACK), but in fact the first step didn't
contains some of the tasks to put it online (like updating good slave
count, and sending the module event). this meant that if a test was
waiting to see that the replica is online form the point of view of the
master, and then confirm that the module got an event, or that the
master has enough good replicas, it could fail due to timing issues.
so now the full effect of putting the replica online, happens at once,
and only the part about enabling the writes is delayed till the ACK.
fix cluster tests
--------------------
I added some code to wait for the replica to sync and avoid race
conditions.
later realized the sentinel and cluster tests where using the original 5
seconds delay, so changed it to 0.
this means the other changes are probably not needed, but i suppose
they're still better (avoid race conditions)
2022-01-17 14:11:11 +02:00
|
|
|
$master config set repl-diskless-sync-delay 5
|
|
|
|
$master config set repl-diskless-sync-max-replicas 3
|
2019-07-01 15:22:29 +03:00
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
set slaves {}
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save {}}} {
|
2012-01-06 17:28:40 +01:00
|
|
|
lappend slaves [srv 0 client]
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save {}}} {
|
2014-10-24 09:49:22 +02:00
|
|
|
lappend slaves [srv 0 client]
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save {}}} {
|
2019-07-01 15:22:29 +03:00
|
|
|
lappend slaves [srv 0 client]
|
|
|
|
test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" {
|
2019-07-16 11:00:34 +03:00
|
|
|
# start load handles only inside the test, so that the test can be skipped
|
|
|
|
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000]
|
|
|
|
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000]
|
|
|
|
set load_handle2 [start_bg_complex_data $master_host $master_port 12 100000000]
|
|
|
|
set load_handle3 [start_write_load $master_host $master_port 8]
|
|
|
|
set load_handle4 [start_write_load $master_host $master_port 4]
|
|
|
|
after 5000 ;# wait for some data to accumulate so that we have RDB part for the fork
|
|
|
|
|
2019-07-01 15:22:29 +03:00
|
|
|
# Send SLAVEOF commands to slaves
|
|
|
|
[lindex $slaves 0] config set repl-diskless-load $sdl
|
|
|
|
[lindex $slaves 1] config set repl-diskless-load $sdl
|
|
|
|
[lindex $slaves 2] config set repl-diskless-load $sdl
|
|
|
|
[lindex $slaves 0] slaveof $master_host $master_port
|
|
|
|
[lindex $slaves 1] slaveof $master_host $master_port
|
|
|
|
[lindex $slaves 2] slaveof $master_host $master_port
|
|
|
|
|
|
|
|
# Wait for all the three slaves to reach the "online"
|
|
|
|
# state from the POV of the master.
|
|
|
|
set retry 500
|
|
|
|
while {$retry} {
|
|
|
|
set info [r -3 info]
|
|
|
|
if {[string match {*slave0:*state=online*slave1:*state=online*slave2:*state=online*} $info]} {
|
|
|
|
break
|
|
|
|
} else {
|
|
|
|
incr retry -1
|
|
|
|
after 100
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if {$retry == 0} {
|
|
|
|
error "assertion:Slaves not correctly synchronized"
|
|
|
|
}
|
|
|
|
|
|
|
|
# Wait that slaves acknowledge they are online so
|
|
|
|
# we are sure that DBSIZE and DEBUG DIGEST will not
|
|
|
|
# fail because of timing issues.
|
|
|
|
wait_for_condition 500 100 {
|
|
|
|
[lindex [[lindex $slaves 0] role] 3] eq {connected} &&
|
|
|
|
[lindex [[lindex $slaves 1] role] 3] eq {connected} &&
|
|
|
|
[lindex [[lindex $slaves 2] role] 3] eq {connected}
|
2014-10-24 09:49:22 +02:00
|
|
|
} else {
|
2019-07-01 15:22:29 +03:00
|
|
|
fail "Slaves still not connected after some time"
|
2014-10-24 09:49:22 +02:00
|
|
|
}
|
|
|
|
|
2019-07-01 15:22:29 +03:00
|
|
|
# Stop the write load
|
|
|
|
stop_bg_complex_data $load_handle0
|
|
|
|
stop_bg_complex_data $load_handle1
|
|
|
|
stop_bg_complex_data $load_handle2
|
|
|
|
stop_write_load $load_handle3
|
|
|
|
stop_write_load $load_handle4
|
|
|
|
|
stabilize tests that involved with load handlers (#8967)
When test stop 'load handler' by killing the process that generating the load,
some commands that already in the input buffer, still might be processed by the server.
This may cause some instability in tests, that count on that no more commands
processed after we stop the `load handler'
In this commit, new proc 'wait_load_handlers_disconnected' added, to verify that no more
cammands from any 'load handler' prossesed, by checking that the clients who
genreate the load is disconnceted.
Also, replacing check of dbsize with wait_for_ofs_sync before comparing debug digest, as
it would fail in case the last key the workload wrote was an overridden key (not a new one).
Affected tests
Race fix:
- failover command to specific replica works
- Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl
- AOF rewrite during write load: RDB preamble=$rdbpre
Cleanup and speedup:
- Test replication with blocking lists and sorted sets operations
- Test replication with parallel clients writing in different DBs
- Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect
2021-05-20 15:29:43 +03:00
|
|
|
# Make sure no more commands processed
|
2022-02-14 08:46:58 +02:00
|
|
|
wait_load_handlers_disconnected -3
|
stabilize tests that involved with load handlers (#8967)
When test stop 'load handler' by killing the process that generating the load,
some commands that already in the input buffer, still might be processed by the server.
This may cause some instability in tests, that count on that no more commands
processed after we stop the `load handler'
In this commit, new proc 'wait_load_handlers_disconnected' added, to verify that no more
cammands from any 'load handler' prossesed, by checking that the clients who
genreate the load is disconnceted.
Also, replacing check of dbsize with wait_for_ofs_sync before comparing debug digest, as
it would fail in case the last key the workload wrote was an overridden key (not a new one).
Affected tests
Race fix:
- failover command to specific replica works
- Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl
- AOF rewrite during write load: RDB preamble=$rdbpre
Cleanup and speedup:
- Test replication with blocking lists and sorted sets operations
- Test replication with parallel clients writing in different DBs
- Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect
2021-05-20 15:29:43 +03:00
|
|
|
|
|
|
|
wait_for_ofs_sync $master [lindex $slaves 0]
|
|
|
|
wait_for_ofs_sync $master [lindex $slaves 1]
|
|
|
|
wait_for_ofs_sync $master [lindex $slaves 2]
|
2014-11-24 11:54:53 +01:00
|
|
|
|
2019-07-01 15:22:29 +03:00
|
|
|
# Check digests
|
|
|
|
set digest [$master debug digest]
|
|
|
|
set digest0 [[lindex $slaves 0] debug digest]
|
|
|
|
set digest1 [[lindex $slaves 1] debug digest]
|
|
|
|
set digest2 [[lindex $slaves 2] debug digest]
|
|
|
|
assert {$digest ne 0000000000000000000000000000000000000000}
|
|
|
|
assert {$digest eq $digest0}
|
|
|
|
assert {$digest eq $digest1}
|
|
|
|
assert {$digest eq $digest2}
|
2014-10-24 09:49:22 +02:00
|
|
|
}
|
2019-07-01 15:22:29 +03:00
|
|
|
}
|
|
|
|
}
|
2014-10-24 09:49:22 +02:00
|
|
|
}
|
2012-01-06 17:28:40 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-08-31 16:43:38 +02:00
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl external:skip"} overrides {save {}}} {
|
2018-08-31 16:43:38 +02:00
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save {}}} {
|
2018-09-11 11:03:28 +02:00
|
|
|
test "Master stream is correctly processed while the replica has a script in -BUSY state" {
|
2019-07-16 11:00:34 +03:00
|
|
|
set load_handle0 [start_write_load $master_host $master_port 3]
|
2018-08-31 16:43:38 +02:00
|
|
|
set slave [srv 0 client]
|
|
|
|
$slave config set lua-time-limit 500
|
|
|
|
$slave slaveof $master_host $master_port
|
|
|
|
|
|
|
|
# Wait for the slave to be online
|
|
|
|
wait_for_condition 500 100 {
|
|
|
|
[lindex [$slave role] 3] eq {connected}
|
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Replica still not connected after some time"
|
2018-08-31 16:43:38 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
# Wait some time to make sure the master is sending data
|
|
|
|
# to the slave.
|
|
|
|
after 5000
|
|
|
|
|
|
|
|
# Stop the ability of the slave to process data by sendig
|
|
|
|
# a script that will put it in BUSY state.
|
|
|
|
$slave eval {for i=1,3000000000 do end} 0
|
|
|
|
|
|
|
|
# Wait some time again so that more master stream will
|
|
|
|
# be processed.
|
|
|
|
after 2000
|
|
|
|
|
|
|
|
# Stop the write load
|
|
|
|
stop_write_load $load_handle0
|
|
|
|
|
|
|
|
# number of keys
|
|
|
|
wait_for_condition 500 100 {
|
|
|
|
[$master debug digest] eq [$slave debug digest]
|
|
|
|
} else {
|
2018-09-11 11:03:28 +02:00
|
|
|
fail "Different datasets between replica and master"
|
2018-08-31 16:43:38 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-07-01 15:22:29 +03:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Diskless load swapdb when NOT async_loading (different master replid)
|
|
|
|
foreach testType {Successful Aborted} {
|
|
|
|
start_server {tags {"repl external:skip"}} {
|
|
|
|
set replica [srv 0 client]
|
|
|
|
set replica_host [srv 0 host]
|
|
|
|
set replica_port [srv 0 port]
|
|
|
|
set replica_log [srv 0 stdout]
|
2019-07-01 15:22:29 +03:00
|
|
|
start_server {} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Set master and replica to use diskless replication on swapdb mode
|
|
|
|
$master config set repl-diskless-sync yes
|
|
|
|
$master config set repl-diskless-sync-delay 0
|
|
|
|
$master config set save ""
|
|
|
|
$replica config set repl-diskless-load swapdb
|
|
|
|
$replica config set save ""
|
|
|
|
|
|
|
|
# Put different data sets on the master and replica
|
|
|
|
# We need to put large keys on the master since the replica replies to info only once in 2mb
|
|
|
|
$replica debug populate 200 slave 10
|
|
|
|
$master debug populate 1000 master 100000
|
2019-07-01 15:22:29 +03:00
|
|
|
$master config set rdbcompression no
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Set a key value on replica to check status on failure and after swapping db
|
|
|
|
$replica set mykey myvalue
|
|
|
|
|
|
|
|
switch $testType {
|
|
|
|
"Aborted" {
|
|
|
|
# Set master with a slow rdb generation, so that we can easily intercept loading
|
|
|
|
# 10ms per key, with 1000 keys is 10 seconds
|
|
|
|
$master config set rdb-key-save-delay 10000
|
|
|
|
|
|
|
|
# Start the replication process
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
|
|
|
|
test {Diskless load swapdb (different replid): replica enter loading} {
|
|
|
|
# Wait for the replica to start reading the rdb
|
|
|
|
wait_for_condition 100 100 {
|
|
|
|
[s -1 loading] eq 1
|
|
|
|
} else {
|
|
|
|
fail "Replica didn't get into loading mode"
|
|
|
|
}
|
2022-02-13 15:52:38 +08:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
assert_equal [s -1 async_loading] 0
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure that next sync will not start immediately so that we can catch the replica in between syncs
|
|
|
|
$master config set repl-diskless-sync-delay 5
|
|
|
|
|
|
|
|
# Kill the replica connection on the master
|
|
|
|
set killed [$master client kill type replica]
|
|
|
|
|
|
|
|
# Wait for loading to stop (fail)
|
|
|
|
wait_for_condition 100 100 {
|
|
|
|
[s -1 loading] eq 0
|
|
|
|
} else {
|
|
|
|
fail "Replica didn't disconnect"
|
|
|
|
}
|
2022-02-13 15:52:38 +08:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
test {Diskless load swapdb (different replid): old database is exposed after replication fails} {
|
|
|
|
# Ensure we see old values from replica
|
|
|
|
assert_equal [$replica get mykey] "myvalue"
|
|
|
|
|
|
|
|
# Make sure amount of replica keys didn't change
|
|
|
|
assert_equal [$replica dbsize] 201
|
|
|
|
}
|
|
|
|
|
|
|
|
# Speed up shutdown
|
|
|
|
$master config set rdb-key-save-delay 0
|
|
|
|
}
|
|
|
|
"Successful" {
|
|
|
|
# Start the replication process
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
|
|
|
|
# Let replica finish sync with master
|
|
|
|
wait_for_condition 100 100 {
|
|
|
|
[s -1 master_link_status] eq "up"
|
|
|
|
} else {
|
|
|
|
fail "Master <-> Replica didn't finish sync"
|
|
|
|
}
|
|
|
|
|
|
|
|
test {Diskless load swapdb (different replid): new database is exposed after swapping} {
|
|
|
|
# Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status
|
|
|
|
assert_equal [$replica GET mykey] ""
|
|
|
|
|
|
|
|
# Make sure amount of keys matches master
|
|
|
|
assert_equal [$replica dbsize] 1000
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
# Diskless load swapdb when async_loading (matching master replid)
|
|
|
|
foreach testType {Successful Aborted} {
|
|
|
|
start_server {tags {"repl external:skip"}} {
|
|
|
|
set replica [srv 0 client]
|
|
|
|
set replica_host [srv 0 host]
|
|
|
|
set replica_port [srv 0 port]
|
|
|
|
set replica_log [srv 0 stdout]
|
|
|
|
start_server {} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
|
|
|
|
# Set master and replica to use diskless replication on swapdb mode
|
2019-07-01 15:22:29 +03:00
|
|
|
$master config set repl-diskless-sync yes
|
|
|
|
$master config set repl-diskless-sync-delay 0
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
$master config set save ""
|
|
|
|
$replica config set repl-diskless-load swapdb
|
|
|
|
$replica config set save ""
|
2019-07-01 15:22:29 +03:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Set replica writable so we can check that a key we manually added is served
|
|
|
|
# during replication and after failure, but disappears on success
|
|
|
|
$replica config set replica-read-only no
|
2019-07-01 15:22:29 +03:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Initial sync to have matching replids between master and replica
|
|
|
|
$replica replicaof $master_host $master_port
|
2019-07-01 15:22:29 +03:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Let replica finish initial sync with master
|
2021-10-18 10:45:45 +03:00
|
|
|
wait_for_condition 100 100 {
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
[s -1 master_link_status] eq "up"
|
2019-07-01 15:22:29 +03:00
|
|
|
} else {
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
fail "Master <-> Replica didn't finish sync"
|
2019-07-01 15:22:29 +03:00
|
|
|
}
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Put different data sets on the master and replica
|
|
|
|
# We need to put large keys on the master since the replica replies to info only once in 2mb
|
|
|
|
$replica debug populate 2000 slave 10
|
2021-12-22 23:37:12 +02:00
|
|
|
$master debug populate 2000 master 100000
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
$master config set rdbcompression no
|
2019-07-01 15:22:29 +03:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Set a key value on replica to check status during loading, on failure and after swapping db
|
|
|
|
$replica set mykey myvalue
|
|
|
|
|
2021-10-07 14:41:26 +03:00
|
|
|
# Set a function value on replica to check status during loading, on failure and after swapping db
|
2022-04-05 10:27:24 +03:00
|
|
|
$replica function load {#!lua name=test
|
|
|
|
redis.register_function('test', function() return 'hello1' end)
|
|
|
|
}
|
2021-10-07 14:41:26 +03:00
|
|
|
|
|
|
|
# Set a function value on master to check it reaches the replica when replication ends
|
2022-04-05 10:27:24 +03:00
|
|
|
$master function load {#!lua name=test
|
|
|
|
redis.register_function('test', function() return 'hello2' end)
|
|
|
|
}
|
2021-10-07 14:41:26 +03:00
|
|
|
|
Fix races in swapdb async_loading test (#11613)
There is a race in the test:
```
*** [err]: Diskless load swapdb (async_loading): new database is exposed after swapping in tests/integration/replication.tcl
Expected 'myvalue' to be equal to '' (context: type eval line 3 cmd {assert_equal [$replica GET mykey] ""} proc ::test)
```
When doing `$replica GET mykey`, the replica is using the old database.
The reason may be that when doing `master client kill type replica`,
the replica did not yet realize it got disconnected from the master.
So the check of master_link_status fails, and the replica did not
finish the swapdb and the loading.
In that case, i think the solution is to check the sync_full stat on
the master and wait for it to get incremented from the previous value.
i.e. the way to know that we're done with the full sync is not to check
that our state is up (could be up if we check too early), but rather
check that the sync_full counter got incremented.
During the reviewing, we found another race, in Aborted testType,
the `$master config set rdb-key-save-delay 10000` is done after we
already initiated the disconnection, so there's a chance that the replica
will attempt to reconnect before that call, in which case if we fork() before
it, the config will not take effect. Move it to above the disconnection.
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-12-13 13:59:43 +08:00
|
|
|
# Remember the sync_full stat before the client kill.
|
|
|
|
set sync_full [s 0 sync_full]
|
|
|
|
|
|
|
|
if {$testType == "Aborted"} {
|
|
|
|
# Set master with a slow rdb generation, so that we can easily intercept loading
|
|
|
|
# 10ms per key, with 2000 keys is 20 seconds
|
|
|
|
$master config set rdb-key-save-delay 10000
|
|
|
|
}
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Force the replica to try another full sync (this time it will have matching master replid)
|
|
|
|
$master multi
|
|
|
|
$master client kill type replica
|
|
|
|
# Fill replication backlog with new content
|
|
|
|
$master config set repl-backlog-size 16384
|
|
|
|
for {set keyid 0} {$keyid < 10} {incr keyid} {
|
|
|
|
$master set "$keyid string_$keyid" [string repeat A 16384]
|
2019-07-01 15:22:29 +03:00
|
|
|
}
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
$master exec
|
|
|
|
|
Fix races in swapdb async_loading test (#11613)
There is a race in the test:
```
*** [err]: Diskless load swapdb (async_loading): new database is exposed after swapping in tests/integration/replication.tcl
Expected 'myvalue' to be equal to '' (context: type eval line 3 cmd {assert_equal [$replica GET mykey] ""} proc ::test)
```
When doing `$replica GET mykey`, the replica is using the old database.
The reason may be that when doing `master client kill type replica`,
the replica did not yet realize it got disconnected from the master.
So the check of master_link_status fails, and the replica did not
finish the swapdb and the loading.
In that case, i think the solution is to check the sync_full stat on
the master and wait for it to get incremented from the previous value.
i.e. the way to know that we're done with the full sync is not to check
that our state is up (could be up if we check too early), but rather
check that the sync_full counter got incremented.
During the reviewing, we found another race, in Aborted testType,
the `$master config set rdb-key-save-delay 10000` is done after we
already initiated the disconnection, so there's a chance that the replica
will attempt to reconnect before that call, in which case if we fork() before
it, the config will not take effect. Move it to above the disconnection.
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-12-13 13:59:43 +08:00
|
|
|
# Wait for sync_full to get incremented from the previous value.
|
|
|
|
# After the client kill, make sure we do a reconnect, and do a FULL SYNC.
|
|
|
|
wait_for_condition 100 100 {
|
|
|
|
[s 0 sync_full] > $sync_full
|
|
|
|
} else {
|
|
|
|
fail "Master <-> Replica didn't start the full sync"
|
|
|
|
}
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
switch $testType {
|
|
|
|
"Aborted" {
|
|
|
|
test {Diskless load swapdb (async_loading): replica enter async_loading} {
|
|
|
|
# Wait for the replica to start reading the rdb
|
|
|
|
wait_for_condition 100 100 {
|
|
|
|
[s -1 async_loading] eq 1
|
|
|
|
} else {
|
|
|
|
fail "Replica didn't get into async_loading mode"
|
|
|
|
}
|
2022-02-13 15:52:38 +08:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
assert_equal [s -1 loading] 0
|
|
|
|
}
|
2022-02-13 15:52:38 +08:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
test {Diskless load swapdb (async_loading): old database is exposed while async replication is in progress} {
|
|
|
|
# Ensure we still see old values while async_loading is in progress and also not LOADING status
|
|
|
|
assert_equal [$replica get mykey] "myvalue"
|
2019-07-01 15:22:29 +03:00
|
|
|
|
2021-10-07 14:41:26 +03:00
|
|
|
# Ensure we still can call old function while async_loading is in progress
|
|
|
|
assert_equal [$replica fcall test 0] "hello1"
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Make sure we're still async_loading to validate previous assertion
|
|
|
|
assert_equal [s -1 async_loading] 1
|
2019-07-01 15:22:29 +03:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Make sure amount of replica keys didn't change
|
|
|
|
assert_equal [$replica dbsize] 2001
|
|
|
|
}
|
2019-07-01 15:22:29 +03:00
|
|
|
|
Allow most CONFIG SET during loading, block some commands in async-loading (#9878)
## background
Till now CONFIG SET was blocked during loading.
(In the not so distant past, GET was disallowed too)
We recently (not released yet) added an async-loading mode, see #9323,
and during that time it'll serve CONFIG SET and any other command.
And now we realized (#9770) that some configs, and commands are dangerous
during async-loading.
## changes
* Allow most CONFIG SET during loading (both on async-loading and normal loading)
* Allow CONFIG REWRITE and CONFIG RESETSTAT during loading
* Block a few config during loading (`appendonly`, `repl-diskless-load`, and `dir`)
* Block a few commands during loading (list below)
## the blocked commands:
* SAVE - obviously we don't wanna start a foregreound save during loading 8-)
* BGSAVE - we don't mind to schedule one, but we don't wanna fork now
* BGREWRITEAOF - we don't mind to schedule one, but we don't wanna fork now
* MODULE - we obviously don't wanna unload a module during replication / rdb loading
(MODULE HELP and MODULE LIST are not blocked)
* SYNC / PSYNC - we're in the middle of RDB loading from master, must not allow sync
requests now.
* REPLICAOF / SLAVEOF - we're in the middle of replicating, maybe it makes sense to let
the user abort it, but he couldn't do that so far, i don't wanna take any risk of bugs due to odd state.
* CLUSTER - only allow [HELP, SLOTS, NODES, INFO, MYID, LINKS, KEYSLOT, COUNTKEYSINSLOT,
GETKEYSINSLOT, RESET, REPLICAS, COUNT_FAILURE_REPORTS], for others, preserve the status quo
## other fixes
* processEventsWhileBlocked had an issue when being nested, this could happen with a busy script
during async loading (new), but also in a busy script during AOF loading (old). this lead to a crash in
the scenario described in #6988
2021-12-22 14:11:16 +02:00
|
|
|
test {Busy script during async loading} {
|
|
|
|
set rd_replica [redis_deferring_client -1]
|
|
|
|
$replica config set lua-time-limit 10
|
|
|
|
$rd_replica eval {while true do end} 0
|
|
|
|
after 200
|
|
|
|
assert_error {BUSY*} {$replica ping}
|
|
|
|
$replica script kill
|
|
|
|
after 200 ; # Give some time to Lua to call the hook again...
|
|
|
|
assert_equal [$replica ping] "PONG"
|
|
|
|
$rd_replica close
|
|
|
|
}
|
|
|
|
|
|
|
|
test {Blocked commands and configs during async-loading} {
|
|
|
|
assert_error {LOADING*} {$replica config set appendonly no}
|
|
|
|
assert_error {LOADING*} {$replica REPLICAOF no one}
|
|
|
|
}
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Make sure that next sync will not start immediately so that we can catch the replica in between syncs
|
|
|
|
$master config set repl-diskless-sync-delay 5
|
|
|
|
|
|
|
|
# Kill the replica connection on the master
|
|
|
|
set killed [$master client kill type replica]
|
|
|
|
|
|
|
|
# Wait for loading to stop (fail)
|
|
|
|
wait_for_condition 100 100 {
|
|
|
|
[s -1 async_loading] eq 0
|
|
|
|
} else {
|
|
|
|
fail "Replica didn't disconnect"
|
|
|
|
}
|
2022-02-13 15:52:38 +08:00
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
test {Diskless load swapdb (async_loading): old database is exposed after async replication fails} {
|
|
|
|
# Ensure we see old values from replica
|
|
|
|
assert_equal [$replica get mykey] "myvalue"
|
|
|
|
|
2021-10-07 14:41:26 +03:00
|
|
|
# Ensure we still can call old function
|
|
|
|
assert_equal [$replica fcall test 0] "hello1"
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Make sure amount of replica keys didn't change
|
|
|
|
assert_equal [$replica dbsize] 2001
|
|
|
|
}
|
|
|
|
|
|
|
|
# Speed up shutdown
|
|
|
|
$master config set rdb-key-save-delay 0
|
|
|
|
}
|
|
|
|
"Successful" {
|
|
|
|
# Let replica finish sync with master
|
|
|
|
wait_for_condition 100 100 {
|
|
|
|
[s -1 master_link_status] eq "up"
|
|
|
|
} else {
|
|
|
|
fail "Master <-> Replica didn't finish sync"
|
|
|
|
}
|
|
|
|
|
|
|
|
test {Diskless load swapdb (async_loading): new database is exposed after swapping} {
|
|
|
|
# Ensure we don't see anymore the key that was stored only to replica and also that we don't get LOADING status
|
|
|
|
assert_equal [$replica GET mykey] ""
|
2019-07-01 15:22:29 +03:00
|
|
|
|
2021-10-07 14:41:26 +03:00
|
|
|
# Ensure we got the new function
|
|
|
|
assert_equal [$replica fcall test 0] "hello2"
|
|
|
|
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
# Make sure amount of keys matches master
|
2021-12-22 23:37:12 +02:00
|
|
|
assert_equal [$replica dbsize] 2010
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-07-01 15:22:29 +03:00
|
|
|
}
|
|
|
|
}
|
Replica keep serving data during repl-diskless-load=swapdb for better availability (#9323)
For diskless replication in swapdb mode, considering we already spend replica memory
having a backup of current db to restore in case of failure, we can have the following benefits
by instead swapping database only in case we succeeded in transferring db from master:
- Avoid `LOADING` response during failed and successful synchronization for cases where the
replica is already up and running with data.
- Faster total time of diskless replication, because now we're moving from Transfer + Flush + Load
time to Transfer + Load only. Flushing the tempDb is done asynchronously after swapping.
- This could be implemented also for disk replication with similar benefits if consumers are willing
to spend the extra memory usage.
General notes:
- The concept of `backupDb` becomes `tempDb` for clarity.
- Async loading mode will only kick in if the replica is syncing from a master that has the same
repl-id the one it had before. i.e. the data it's getting belongs to a different time of the same timeline.
- New property in INFO: `async_loading` to differentiate from the blocking loading
- Slot to Key mapping is now a field of `redisDb` as it's more natural to access it from both server.db
and the tempDb that is passed around.
- Because this is affecting replicas only, we assume that if they are not readonly and write commands
during replication, they are lost after SYNC same way as before, but we're still denying CONFIG SET
here anyways to avoid complications.
Considerations for review:
- We have many cases where server.loading flag is used and even though I tried my best, there may
be cases where async_loading should be checked as well and cases where it shouldn't (would require
very good understanding of whole code)
- Several places that had different behavior depending on the loading flag where actually meant to just
handle commands coming from the AOF client differently than ones coming from real clients, changed
to check CLIENT_ID_AOF instead.
**Additional for Release Notes**
- Bugfix - server.dirty was not incremented for any kind of diskless replication, as effect it wouldn't
contribute on triggering next database SAVE
- New flag for RM_GetContextFlags module API: REDISMODULE_CTX_FLAGS_ASYNC_LOADING
- Deprecated RedisModuleEvent_ReplBackup. Starting from Redis 7.0, we don't fire this event.
Instead, we have the new RedisModuleEvent_ReplAsyncLoad holding 3 sub-events: STARTED,
ABORTED and COMPLETED.
- New module flag REDISMODULE_OPTIONS_HANDLE_REPL_ASYNC_LOAD for RedisModule_SetModuleOptions
to allow modules to declare they support the diskless replication with async loading (when absent, we fall
back to disk-based loading).
Co-authored-by: Eduardo Semprebon <edus@saxobank.com>
Co-authored-by: Oran Agra <oran@redislabs.com>
2021-11-04 09:46:50 +01:00
|
|
|
}
|
2019-07-16 11:00:34 +03:00
|
|
|
|
|
|
|
test {diskless loading short read} {
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl"} overrides {save ""}} {
|
2019-07-16 11:00:34 +03:00
|
|
|
set replica [srv 0 client]
|
|
|
|
set replica_host [srv 0 host]
|
|
|
|
set replica_port [srv 0 port]
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
2019-07-16 11:00:34 +03:00
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
|
|
|
|
# Set master and replica to use diskless replication
|
|
|
|
$master config set repl-diskless-sync yes
|
|
|
|
$master config set rdbcompression no
|
|
|
|
$replica config set repl-diskless-load swapdb
|
Accelerate diskless master connections, and general re-connections (#6271)
Diskless master has some inherent latencies.
1) fork starts with delay from cron rather than immediately
2) replica is put online only after an ACK. but the ACK
was sent only once a second.
3) but even if it would arrive immediately, it will not
register in case cron didn't yet detect that the fork is done.
Besides that, when a replica disconnects, it doesn't immediately
attempts to re-connect, it waits for replication cron (one per second).
in case it was already online, it may be important to try to re-connect
as soon as possible, so that the backlog at the master doesn't vanish.
In case it disconnected during rdb transfer, one can argue that it's
not very important to re-connect immediately, but this is needed for the
"diskless loading short read" test to be able to run 100 iterations in 5
seconds, rather than 3 (waiting for replication cron re-connection)
changes in this commit:
1) sync command starts a fork immediately if no sync_delay is configured
2) replica sends REPLCONF ACK when done reading the rdb (rather than on 1s cron)
3) when a replica unexpectedly disconnets, it immediately tries to
re-connect rather than waiting 1s
4) when when a child exits, if there is another replica waiting, we spawn a new
one right away, instead of waiting for 1s replicationCron.
5) added a call to connectWithMaster from replicationSetMaster. which is called
from the REPLICAOF command but also in 3 places in cluster.c, in all of
these the connection attempt will now be immediate instead of delayed by 1
second.
side note:
we can add a call to rdbPipeReadHandler in replconfCommand when getting
a REPLCONF ACK from the replica to solve a race where the replica got
the entire rdb and EOF marker before we detected that the pipe was
closed.
in the test i did see this race happens in one about of some 300 runs,
but i concluded that this race is unlikely in real life (where the
replica is on another host and we're more likely to first detect the
pipe was closed.
the test runs 100 iterations in 3 seconds, so in some cases it'll take 4
seconds instead (waiting for another REPLCONF ACK).
Removing unneeded startBgsaveForReplication from updateSlavesWaitingForBgsave
Now that CheckChildrenDone is calling the new replicationStartPendingFork
(extracted from serverCron) there's actually no need to call
startBgsaveForReplication from updateSlavesWaitingForBgsave anymore,
since as soon as updateSlavesWaitingForBgsave returns, CheckChildrenDone is
calling replicationStartPendingFork that handles that anyway.
The code in updateSlavesWaitingForBgsave had a bug in which it ignored
repl-diskless-sync-delay, but removing that code shows that this bug was
hiding another bug, which is that the max_idle should have used >= and
not >, this one second delay has a big impact on my new test.
2020-08-06 16:53:06 +03:00
|
|
|
$master config set hz 500
|
|
|
|
$replica config set hz 500
|
|
|
|
$master config set dynamic-hz no
|
|
|
|
$replica config set dynamic-hz no
|
2019-07-16 11:00:34 +03:00
|
|
|
# Try to fill the master with all types of data types / encodings
|
Accelerate diskless master connections, and general re-connections (#6271)
Diskless master has some inherent latencies.
1) fork starts with delay from cron rather than immediately
2) replica is put online only after an ACK. but the ACK
was sent only once a second.
3) but even if it would arrive immediately, it will not
register in case cron didn't yet detect that the fork is done.
Besides that, when a replica disconnects, it doesn't immediately
attempts to re-connect, it waits for replication cron (one per second).
in case it was already online, it may be important to try to re-connect
as soon as possible, so that the backlog at the master doesn't vanish.
In case it disconnected during rdb transfer, one can argue that it's
not very important to re-connect immediately, but this is needed for the
"diskless loading short read" test to be able to run 100 iterations in 5
seconds, rather than 3 (waiting for replication cron re-connection)
changes in this commit:
1) sync command starts a fork immediately if no sync_delay is configured
2) replica sends REPLCONF ACK when done reading the rdb (rather than on 1s cron)
3) when a replica unexpectedly disconnets, it immediately tries to
re-connect rather than waiting 1s
4) when when a child exits, if there is another replica waiting, we spawn a new
one right away, instead of waiting for 1s replicationCron.
5) added a call to connectWithMaster from replicationSetMaster. which is called
from the REPLICAOF command but also in 3 places in cluster.c, in all of
these the connection attempt will now be immediate instead of delayed by 1
second.
side note:
we can add a call to rdbPipeReadHandler in replconfCommand when getting
a REPLCONF ACK from the replica to solve a race where the replica got
the entire rdb and EOF marker before we detected that the pipe was
closed.
in the test i did see this race happens in one about of some 300 runs,
but i concluded that this race is unlikely in real life (where the
replica is on another host and we're more likely to first detect the
pipe was closed.
the test runs 100 iterations in 3 seconds, so in some cases it'll take 4
seconds instead (waiting for another REPLCONF ACK).
Removing unneeded startBgsaveForReplication from updateSlavesWaitingForBgsave
Now that CheckChildrenDone is calling the new replicationStartPendingFork
(extracted from serverCron) there's actually no need to call
startBgsaveForReplication from updateSlavesWaitingForBgsave anymore,
since as soon as updateSlavesWaitingForBgsave returns, CheckChildrenDone is
calling replicationStartPendingFork that handles that anyway.
The code in updateSlavesWaitingForBgsave had a bug in which it ignored
repl-diskless-sync-delay, but removing that code shows that this bug was
hiding another bug, which is that the max_idle should have used >= and
not >, this one second delay has a big impact on my new test.
2020-08-06 16:53:06 +03:00
|
|
|
set start [clock clicks -milliseconds]
|
2021-10-07 14:41:26 +03:00
|
|
|
|
|
|
|
# Set a function value to check short read handling on functions
|
2022-04-05 10:27:24 +03:00
|
|
|
r function load {#!lua name=test
|
|
|
|
redis.register_function('test', function() return 'hello1' end)
|
|
|
|
}
|
2021-10-07 14:41:26 +03:00
|
|
|
|
2019-07-16 11:00:34 +03:00
|
|
|
for {set k 0} {$k < 3} {incr k} {
|
|
|
|
for {set i 0} {$i < 10} {incr i} {
|
|
|
|
r set "$k int_$i" [expr {int(rand()*10000)}]
|
|
|
|
r expire "$k int_$i" [expr {int(rand()*10000)}]
|
|
|
|
r set "$k string_$i" [string repeat A [expr {int(rand()*1000000)}]]
|
|
|
|
r hset "$k hash_small" [string repeat A [expr {int(rand()*10)}]] 0[string repeat A [expr {int(rand()*10)}]]
|
|
|
|
r hset "$k hash_large" [string repeat A [expr {int(rand()*10000)}]] [string repeat A [expr {int(rand()*1000000)}]]
|
|
|
|
r sadd "$k set_small" [string repeat A [expr {int(rand()*10)}]]
|
|
|
|
r sadd "$k set_large" [string repeat A [expr {int(rand()*1000000)}]]
|
|
|
|
r zadd "$k zset_small" [expr {rand()}] [string repeat A [expr {int(rand()*10)}]]
|
|
|
|
r zadd "$k zset_large" [expr {rand()}] [string repeat A [expr {int(rand()*1000000)}]]
|
|
|
|
r lpush "$k list_small" [string repeat A [expr {int(rand()*10)}]]
|
|
|
|
r lpush "$k list_large" [string repeat A [expr {int(rand()*1000000)}]]
|
|
|
|
for {set j 0} {$j < 10} {incr j} {
|
|
|
|
r xadd "$k stream" * foo "asdf" bar "1234"
|
|
|
|
}
|
|
|
|
r xgroup create "$k stream" "mygroup_$i" 0
|
|
|
|
r xreadgroup GROUP "mygroup_$i" Alice COUNT 1 STREAMS "$k stream" >
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Accelerate diskless master connections, and general re-connections (#6271)
Diskless master has some inherent latencies.
1) fork starts with delay from cron rather than immediately
2) replica is put online only after an ACK. but the ACK
was sent only once a second.
3) but even if it would arrive immediately, it will not
register in case cron didn't yet detect that the fork is done.
Besides that, when a replica disconnects, it doesn't immediately
attempts to re-connect, it waits for replication cron (one per second).
in case it was already online, it may be important to try to re-connect
as soon as possible, so that the backlog at the master doesn't vanish.
In case it disconnected during rdb transfer, one can argue that it's
not very important to re-connect immediately, but this is needed for the
"diskless loading short read" test to be able to run 100 iterations in 5
seconds, rather than 3 (waiting for replication cron re-connection)
changes in this commit:
1) sync command starts a fork immediately if no sync_delay is configured
2) replica sends REPLCONF ACK when done reading the rdb (rather than on 1s cron)
3) when a replica unexpectedly disconnets, it immediately tries to
re-connect rather than waiting 1s
4) when when a child exits, if there is another replica waiting, we spawn a new
one right away, instead of waiting for 1s replicationCron.
5) added a call to connectWithMaster from replicationSetMaster. which is called
from the REPLICAOF command but also in 3 places in cluster.c, in all of
these the connection attempt will now be immediate instead of delayed by 1
second.
side note:
we can add a call to rdbPipeReadHandler in replconfCommand when getting
a REPLCONF ACK from the replica to solve a race where the replica got
the entire rdb and EOF marker before we detected that the pipe was
closed.
in the test i did see this race happens in one about of some 300 runs,
but i concluded that this race is unlikely in real life (where the
replica is on another host and we're more likely to first detect the
pipe was closed.
the test runs 100 iterations in 3 seconds, so in some cases it'll take 4
seconds instead (waiting for another REPLCONF ACK).
Removing unneeded startBgsaveForReplication from updateSlavesWaitingForBgsave
Now that CheckChildrenDone is calling the new replicationStartPendingFork
(extracted from serverCron) there's actually no need to call
startBgsaveForReplication from updateSlavesWaitingForBgsave anymore,
since as soon as updateSlavesWaitingForBgsave returns, CheckChildrenDone is
calling replicationStartPendingFork that handles that anyway.
The code in updateSlavesWaitingForBgsave had a bug in which it ignored
repl-diskless-sync-delay, but removing that code shows that this bug was
hiding another bug, which is that the max_idle should have used >= and
not >, this one second delay has a big impact on my new test.
2020-08-06 16:53:06 +03:00
|
|
|
if {$::verbose} {
|
|
|
|
set end [clock clicks -milliseconds]
|
|
|
|
set duration [expr $end - $start]
|
|
|
|
puts "filling took $duration ms (TODO: use pipeline)"
|
|
|
|
set start [clock clicks -milliseconds]
|
|
|
|
}
|
|
|
|
|
2019-07-16 11:00:34 +03:00
|
|
|
# Start the replication process...
|
2020-07-10 08:28:22 +03:00
|
|
|
set loglines [count_log_lines -1]
|
2019-07-16 11:00:34 +03:00
|
|
|
$master config set repl-diskless-sync-delay 0
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
|
|
|
|
# kill the replication at various points
|
Accelerate diskless master connections, and general re-connections (#6271)
Diskless master has some inherent latencies.
1) fork starts with delay from cron rather than immediately
2) replica is put online only after an ACK. but the ACK
was sent only once a second.
3) but even if it would arrive immediately, it will not
register in case cron didn't yet detect that the fork is done.
Besides that, when a replica disconnects, it doesn't immediately
attempts to re-connect, it waits for replication cron (one per second).
in case it was already online, it may be important to try to re-connect
as soon as possible, so that the backlog at the master doesn't vanish.
In case it disconnected during rdb transfer, one can argue that it's
not very important to re-connect immediately, but this is needed for the
"diskless loading short read" test to be able to run 100 iterations in 5
seconds, rather than 3 (waiting for replication cron re-connection)
changes in this commit:
1) sync command starts a fork immediately if no sync_delay is configured
2) replica sends REPLCONF ACK when done reading the rdb (rather than on 1s cron)
3) when a replica unexpectedly disconnets, it immediately tries to
re-connect rather than waiting 1s
4) when when a child exits, if there is another replica waiting, we spawn a new
one right away, instead of waiting for 1s replicationCron.
5) added a call to connectWithMaster from replicationSetMaster. which is called
from the REPLICAOF command but also in 3 places in cluster.c, in all of
these the connection attempt will now be immediate instead of delayed by 1
second.
side note:
we can add a call to rdbPipeReadHandler in replconfCommand when getting
a REPLCONF ACK from the replica to solve a race where the replica got
the entire rdb and EOF marker before we detected that the pipe was
closed.
in the test i did see this race happens in one about of some 300 runs,
but i concluded that this race is unlikely in real life (where the
replica is on another host and we're more likely to first detect the
pipe was closed.
the test runs 100 iterations in 3 seconds, so in some cases it'll take 4
seconds instead (waiting for another REPLCONF ACK).
Removing unneeded startBgsaveForReplication from updateSlavesWaitingForBgsave
Now that CheckChildrenDone is calling the new replicationStartPendingFork
(extracted from serverCron) there's actually no need to call
startBgsaveForReplication from updateSlavesWaitingForBgsave anymore,
since as soon as updateSlavesWaitingForBgsave returns, CheckChildrenDone is
calling replicationStartPendingFork that handles that anyway.
The code in updateSlavesWaitingForBgsave had a bug in which it ignored
repl-diskless-sync-delay, but removing that code shows that this bug was
hiding another bug, which is that the max_idle should have used >= and
not >, this one second delay has a big impact on my new test.
2020-08-06 16:53:06 +03:00
|
|
|
set attempts 100
|
|
|
|
if {$::accurate} { set attempts 500 }
|
2019-07-16 11:00:34 +03:00
|
|
|
for {set i 0} {$i < $attempts} {incr i} {
|
|
|
|
# wait for the replica to start reading the rdb
|
|
|
|
# using the log file since the replica only responds to INFO once in 2mb
|
2020-07-28 11:15:29 +03:00
|
|
|
set res [wait_for_log_messages -1 {"*Loading DB in memory*"} $loglines 2000 1]
|
|
|
|
set loglines [lindex $res 1]
|
2019-07-16 11:00:34 +03:00
|
|
|
|
|
|
|
# add some additional random sleep so that we kill the master on a different place each time
|
2020-07-28 11:15:29 +03:00
|
|
|
after [expr {int(rand()*50)}]
|
2019-07-16 11:00:34 +03:00
|
|
|
|
|
|
|
# kill the replica connection on the master
|
|
|
|
set killed [$master client kill type replica]
|
|
|
|
|
2021-11-09 22:37:18 +02:00
|
|
|
set res [wait_for_log_messages -1 {"*Internal error in RDB*" "*Finished with success*" "*Successful partial resynchronization*"} $loglines 500 10]
|
2020-07-28 11:15:29 +03:00
|
|
|
if {$::verbose} { puts $res }
|
|
|
|
set log_text [lindex $res 0]
|
|
|
|
set loglines [lindex $res 1]
|
|
|
|
if {![string match "*Internal error in RDB*" $log_text]} {
|
2019-07-16 11:00:34 +03:00
|
|
|
# force the replica to try another full sync
|
2020-07-28 11:15:29 +03:00
|
|
|
$master multi
|
2019-07-16 11:00:34 +03:00
|
|
|
$master client kill type replica
|
|
|
|
$master set asdf asdf
|
Replication backlog and replicas use one global shared replication buffer (#9166)
## Background
For redis master, one replica uses one copy of replication buffer, that is a big waste of memory,
more replicas more waste, and allocate/free memory for every reply list also cost much.
If we set client-output-buffer-limit small and write traffic is heavy, master may disconnect with
replicas and can't finish synchronization with replica. If we set client-output-buffer-limit big,
master may be OOM when there are many replicas that separately keep much memory.
Because replication buffers of different replica client are the same, one simple idea is that
all replicas only use one replication buffer, that will effectively save memory.
Since replication backlog content is the same as replicas' output buffer, now we
can discard replication backlog memory and use global shared replication buffer
to implement replication backlog mechanism.
## Implementation
I create one global "replication buffer" which contains content of replication stream.
The structure of "replication buffer" is similar to the reply list that exists in every client.
But the node of list is `replBufBlock`, which has `id, repl_offset, refcount` fields.
```c
/* Replication buffer blocks is the list of replBufBlock.
*
* +--------------+ +--------------+ +--------------+
* | refcount = 1 | ... | refcount = 0 | ... | refcount = 2 |
* +--------------+ +--------------+ +--------------+
* | / \
* | / \
* | / \
* Repl Backlog Replia_A Replia_B
*
* Each replica or replication backlog increments only the refcount of the
* 'ref_repl_buf_node' which it points to. So when replica walks to the next
* node, it should first increase the next node's refcount, and when we trim
* the replication buffer nodes, we remove node always from the head node which
* refcount is 0. If the refcount of the head node is not 0, we must stop
* trimming and never iterate the next node. */
/* Similar with 'clientReplyBlock', it is used for shared buffers between
* all replica clients and replication backlog. */
typedef struct replBufBlock {
int refcount; /* Number of replicas or repl backlog using. */
long long id; /* The unique incremental number. */
long long repl_offset; /* Start replication offset of the block. */
size_t size, used;
char buf[];
} replBufBlock;
```
So now when we feed replication stream into replication backlog and all replicas, we only need
to feed stream into replication buffer `feedReplicationBuffer`. In this function, we set some fields of
replication backlog and replicas to references of the global replication buffer blocks. And we also
need to check replicas' output buffer limit to free if exceeding `client-output-buffer-limit`, and trim
replication backlog if exceeding `repl-backlog-size`.
When sending reply to replicas, we also need to iterate replication buffer blocks and send its
content, when totally sending one block for replica, we decrease current node count and
increase the next current node count, and then free the block which reference is 0 from the
head of replication buffer blocks.
Since now we use linked list to manage replication backlog, it may cost much time for iterating
all linked list nodes to find corresponding replication buffer node. So we create a rax tree to
store some nodes for index, but to avoid rax tree occupying too much memory, i record
one per 64 nodes for index.
Currently, to make partial resynchronization as possible as much, we always let replication
backlog as the last reference of replication buffer blocks, backlog size may exceeds our setting
if slow replicas that reference vast replication buffer blocks, and this method doesn't increase
memory usage since they share replication buffer. To avoid freezing server for freeing unreferenced
replication buffer blocks when we need to trim backlog for exceeding backlog size setting,
we trim backlog incrementally (free 64 blocks per call now), and make it faster in
`beforeSleep` (free 640 blocks).
### Other changes
- `mem_total_replication_buffers`: we add this field in INFO command, it means the total
memory of replication buffers used.
- `mem_clients_slaves`: now even replica is slow to replicate, and its output buffer memory
is not 0, but it still may be 0, since replication backlog and replicas share one global replication
buffer, only if replication buffer memory is more than the repl backlog setting size, we consider
the excess as replicas' memory. Otherwise, we think replication buffer memory is the consumption
of repl backlog.
- Key eviction
Since all replicas and replication backlog share global replication buffer, we think only the
part of exceeding backlog size the extra separate consumption of replicas.
Because we trim backlog incrementally in the background, backlog size may exceeds our
setting if slow replicas that reference vast replication buffer blocks disconnect.
To avoid massive eviction loop, we don't count the delayed freed replication backlog into
used memory even if there are no replicas, i.e. we also regard this memory as replicas's memory.
- `client-output-buffer-limit` check for replica clients
It doesn't make sense to set the replica clients output buffer limit lower than the repl-backlog-size
config (partial sync will succeed and then replica will get disconnected). Such a configuration is
ignored (the size of repl-backlog-size will be used). This doesn't have memory consumption
implications since the replica client will share the backlog buffers memory.
- Drop replication backlog after loading data if needed
We always create replication backlog if server is a master, we need it because we put DELs in
it when loading expired keys in RDB, but if RDB doesn't have replication info or there is no rdb,
it is not possible to support partial resynchronization, to avoid extra memory of replication backlog,
we drop it.
- Multi IO threads
Since all replicas and replication backlog use global replication buffer, if I/O threads are enabled,
to guarantee data accessing thread safe, we must let main thread handle sending the output buffer
to all replicas. But before, other IO threads could handle sending output buffer of all replicas.
## Other optimizations
This solution resolve some other problem:
- When replicas disconnect with master since of out of output buffer limit, releasing the output
buffer of replicas may freeze server if we set big `client-output-buffer-limit` for replicas, but now,
it doesn't cause freezing.
- This implementation may mitigate reply list copy cost time(also freezes server) when one replication
has huge reply buffer and another replica can copy buffer for full synchronization. now, we just copy
reference info, it is very light.
- If we set replication backlog size big, it also may cost much time to copy replication backlog into
replica's output buffer. But this commit eliminates this problem.
- Resizing replication backlog size doesn't empty current replication backlog content.
2021-10-25 14:24:31 +08:00
|
|
|
# fill replication backlog with new content
|
|
|
|
$master config set repl-backlog-size 16384
|
|
|
|
for {set keyid 0} {$keyid < 10} {incr keyid} {
|
|
|
|
$master set "$keyid string_$keyid" [string repeat A 16384]
|
|
|
|
}
|
2020-07-28 11:15:29 +03:00
|
|
|
$master exec
|
2019-07-16 11:00:34 +03:00
|
|
|
}
|
2021-11-24 18:46:43 +08:00
|
|
|
|
2019-07-16 11:00:34 +03:00
|
|
|
# wait for loading to stop (fail)
|
2021-11-24 18:46:43 +08:00
|
|
|
# After a loading successfully, next loop will enter `async_loading`
|
2020-07-28 11:15:29 +03:00
|
|
|
wait_for_condition 1000 1 {
|
2021-11-24 18:46:43 +08:00
|
|
|
[s -1 async_loading] eq 0 &&
|
2019-07-16 11:00:34 +03:00
|
|
|
[s -1 loading] eq 0
|
|
|
|
} else {
|
|
|
|
fail "Replica didn't disconnect"
|
|
|
|
}
|
|
|
|
}
|
Accelerate diskless master connections, and general re-connections (#6271)
Diskless master has some inherent latencies.
1) fork starts with delay from cron rather than immediately
2) replica is put online only after an ACK. but the ACK
was sent only once a second.
3) but even if it would arrive immediately, it will not
register in case cron didn't yet detect that the fork is done.
Besides that, when a replica disconnects, it doesn't immediately
attempts to re-connect, it waits for replication cron (one per second).
in case it was already online, it may be important to try to re-connect
as soon as possible, so that the backlog at the master doesn't vanish.
In case it disconnected during rdb transfer, one can argue that it's
not very important to re-connect immediately, but this is needed for the
"diskless loading short read" test to be able to run 100 iterations in 5
seconds, rather than 3 (waiting for replication cron re-connection)
changes in this commit:
1) sync command starts a fork immediately if no sync_delay is configured
2) replica sends REPLCONF ACK when done reading the rdb (rather than on 1s cron)
3) when a replica unexpectedly disconnets, it immediately tries to
re-connect rather than waiting 1s
4) when when a child exits, if there is another replica waiting, we spawn a new
one right away, instead of waiting for 1s replicationCron.
5) added a call to connectWithMaster from replicationSetMaster. which is called
from the REPLICAOF command but also in 3 places in cluster.c, in all of
these the connection attempt will now be immediate instead of delayed by 1
second.
side note:
we can add a call to rdbPipeReadHandler in replconfCommand when getting
a REPLCONF ACK from the replica to solve a race where the replica got
the entire rdb and EOF marker before we detected that the pipe was
closed.
in the test i did see this race happens in one about of some 300 runs,
but i concluded that this race is unlikely in real life (where the
replica is on another host and we're more likely to first detect the
pipe was closed.
the test runs 100 iterations in 3 seconds, so in some cases it'll take 4
seconds instead (waiting for another REPLCONF ACK).
Removing unneeded startBgsaveForReplication from updateSlavesWaitingForBgsave
Now that CheckChildrenDone is calling the new replicationStartPendingFork
(extracted from serverCron) there's actually no need to call
startBgsaveForReplication from updateSlavesWaitingForBgsave anymore,
since as soon as updateSlavesWaitingForBgsave returns, CheckChildrenDone is
calling replicationStartPendingFork that handles that anyway.
The code in updateSlavesWaitingForBgsave had a bug in which it ignored
repl-diskless-sync-delay, but removing that code shows that this bug was
hiding another bug, which is that the max_idle should have used >= and
not >, this one second delay has a big impact on my new test.
2020-08-06 16:53:06 +03:00
|
|
|
if {$::verbose} {
|
|
|
|
set end [clock clicks -milliseconds]
|
|
|
|
set duration [expr $end - $start]
|
|
|
|
puts "test took $duration ms"
|
|
|
|
}
|
2019-07-16 11:00:34 +03:00
|
|
|
# enable fast shutdown
|
|
|
|
$master config set rdb-key-save-delay 0
|
|
|
|
}
|
|
|
|
}
|
2021-06-09 15:13:24 +03:00
|
|
|
} {} {external:skip}
|
2019-07-16 11:00:34 +03:00
|
|
|
|
2019-08-11 16:07:53 +03:00
|
|
|
# get current stime and utime metrics for a thread (since it's creation)
|
|
|
|
proc get_cpu_metrics { statfile } {
|
|
|
|
if { [ catch {
|
|
|
|
set fid [ open $statfile r ]
|
|
|
|
set data [ read $fid 1024 ]
|
|
|
|
::close $fid
|
|
|
|
set data [ split $data ]
|
|
|
|
|
|
|
|
;## number of jiffies it has been scheduled...
|
|
|
|
set utime [ lindex $data 13 ]
|
|
|
|
set stime [ lindex $data 14 ]
|
|
|
|
} err ] } {
|
|
|
|
error "assertion:can't parse /proc: $err"
|
|
|
|
}
|
|
|
|
set mstime [clock milliseconds]
|
|
|
|
return [ list $mstime $utime $stime ]
|
|
|
|
}
|
|
|
|
|
|
|
|
# compute %utime and %stime of a thread between two measurements
|
|
|
|
proc compute_cpu_usage {start end} {
|
|
|
|
set clock_ticks [exec getconf CLK_TCK]
|
|
|
|
# convert ms time to jiffies and calc delta
|
|
|
|
set dtime [ expr { ([lindex $end 0] - [lindex $start 0]) * double($clock_ticks) / 1000 } ]
|
|
|
|
set utime [ expr { [lindex $end 1] - [lindex $start 1] } ]
|
|
|
|
set stime [ expr { [lindex $end 2] - [lindex $start 2] } ]
|
|
|
|
set pucpu [ expr { ($utime / $dtime) * 100 } ]
|
|
|
|
set pscpu [ expr { ($stime / $dtime) * 100 } ]
|
|
|
|
return [ list $pucpu $pscpu ]
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# test diskless rdb pipe with multiple replicas, which may drop half way
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl external:skip"} overrides {save ""}} {
|
2019-08-11 16:07:53 +03:00
|
|
|
set master [srv 0 client]
|
|
|
|
$master config set repl-diskless-sync yes
|
Set repl-diskless-sync to yes by default, add repl-diskless-sync-max-replicas (#10092)
1. enable diskless replication by default
2. add a new config named repl-diskless-sync-max-replicas that enables
replication to start before the full repl-diskless-sync-delay was
reached.
3. put replica online sooner on the master (see below)
4. test suite uses repl-diskless-sync-delay of 0 to be faster
5. a few tests that use multiple replica on a pre-populated master, are
now using the new repl-diskless-sync-max-replicas
6. fix possible timing issues in a few cluster tests (see below)
put replica online sooner on the master
----------------------------------------------------
there were two tests that failed because they needed for the master to
realize that the replica is online, but the test code was actually only
waiting for the replica to realize it's online, and in diskless it could
have been before the master realized it.
changes include two things:
1. the tests wait on the right thing
2. issues in the master, putting the replica online in two steps.
the master used to put the replica as online in 2 steps. the first
step was to mark it as online, and the second step was to enable the
write event (only after getting ACK), but in fact the first step didn't
contains some of the tasks to put it online (like updating good slave
count, and sending the module event). this meant that if a test was
waiting to see that the replica is online form the point of view of the
master, and then confirm that the module got an event, or that the
master has enough good replicas, it could fail due to timing issues.
so now the full effect of putting the replica online, happens at once,
and only the part about enabling the writes is delayed till the ACK.
fix cluster tests
--------------------
I added some code to wait for the replica to sync and avoid race
conditions.
later realized the sentinel and cluster tests where using the original 5
seconds delay, so changed it to 0.
this means the other changes are probably not needed, but i suppose
they're still better (avoid race conditions)
2022-01-17 14:11:11 +02:00
|
|
|
$master config set repl-diskless-sync-delay 5
|
|
|
|
$master config set repl-diskless-sync-max-replicas 2
|
2019-08-11 16:07:53 +03:00
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
2019-09-12 11:10:22 +03:00
|
|
|
set master_pid [srv 0 pid]
|
2019-08-11 16:07:53 +03:00
|
|
|
# put enough data in the db that the rdb file will be bigger than the socket buffers
|
2020-05-12 08:59:09 +03:00
|
|
|
# and since we'll have key-load-delay of 100, 20000 keys will take at least 2 seconds
|
2019-08-11 16:07:53 +03:00
|
|
|
# we also need the replica to process requests during transfer (which it does only once in 2mb)
|
2020-05-12 08:59:09 +03:00
|
|
|
$master debug populate 20000 test 10000
|
2019-08-11 16:07:53 +03:00
|
|
|
$master config set rdbcompression no
|
2019-09-12 11:10:22 +03:00
|
|
|
# If running on Linux, we also measure utime/stime to detect possible I/O handling issues
|
2021-04-18 15:12:34 +03:00
|
|
|
set os [catch {exec uname}]
|
2019-09-12 11:10:22 +03:00
|
|
|
set measure_time [expr {$os == "Linux"} ? 1 : 0]
|
2021-04-15 16:18:51 +02:00
|
|
|
foreach all_drop {no slow fast all timeout} {
|
2019-08-11 16:07:53 +03:00
|
|
|
test "diskless $all_drop replicas drop during rdb pipe" {
|
|
|
|
set replicas {}
|
|
|
|
set replicas_alive {}
|
|
|
|
# start one replica that will read the rdb fast, and one that will be slow
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
2019-08-11 16:07:53 +03:00
|
|
|
lappend replicas [srv 0 client]
|
|
|
|
lappend replicas_alive [srv 0 client]
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
2019-08-11 16:07:53 +03:00
|
|
|
lappend replicas [srv 0 client]
|
|
|
|
lappend replicas_alive [srv 0 client]
|
|
|
|
|
|
|
|
# start replication
|
|
|
|
# it's enough for just one replica to be slow, and have it's write handler enabled
|
|
|
|
# so that the whole rdb generation process is bound to that
|
2021-11-02 16:32:01 +08:00
|
|
|
set loglines [count_log_lines -2]
|
2019-08-11 16:07:53 +03:00
|
|
|
[lindex $replicas 0] config set repl-diskless-load swapdb
|
2021-04-18 15:12:34 +03:00
|
|
|
[lindex $replicas 0] config set key-load-delay 100 ;# 20k keys and 100 microseconds sleep means at least 2 seconds
|
2019-08-11 16:07:53 +03:00
|
|
|
[lindex $replicas 0] replicaof $master_host $master_port
|
|
|
|
[lindex $replicas 1] replicaof $master_host $master_port
|
|
|
|
|
|
|
|
# wait for the replicas to start reading the rdb
|
|
|
|
# using the log file since the replica only responds to INFO once in 2mb
|
2022-12-12 23:38:12 +08:00
|
|
|
wait_for_log_messages -1 {"*Loading DB in memory*"} 0 1500 10
|
2019-08-11 16:07:53 +03:00
|
|
|
|
2019-09-12 11:10:22 +03:00
|
|
|
if {$measure_time} {
|
|
|
|
set master_statfile "/proc/$master_pid/stat"
|
|
|
|
set master_start_metrics [get_cpu_metrics $master_statfile]
|
|
|
|
set start_time [clock seconds]
|
|
|
|
}
|
2019-08-11 16:07:53 +03:00
|
|
|
|
|
|
|
# wait a while so that the pipe socket writer will be
|
|
|
|
# blocked on write (since replica 0 is slow to read from the socket)
|
|
|
|
after 500
|
|
|
|
|
|
|
|
# add some command to be present in the command stream after the rdb.
|
|
|
|
$master incr $all_drop
|
|
|
|
|
|
|
|
# disconnect replicas depending on the current test
|
|
|
|
if {$all_drop == "all" || $all_drop == "fast"} {
|
|
|
|
exec kill [srv 0 pid]
|
|
|
|
set replicas_alive [lreplace $replicas_alive 1 1]
|
|
|
|
}
|
|
|
|
if {$all_drop == "all" || $all_drop == "slow"} {
|
|
|
|
exec kill [srv -1 pid]
|
|
|
|
set replicas_alive [lreplace $replicas_alive 0 0]
|
|
|
|
}
|
2021-04-15 16:18:51 +02:00
|
|
|
if {$all_drop == "timeout"} {
|
2021-04-18 15:12:34 +03:00
|
|
|
$master config set repl-timeout 2
|
|
|
|
# we want the slow replica to hang on a key for very long so it'll reach repl-timeout
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
pause_process [srv -1 pid]
|
2021-04-18 15:12:34 +03:00
|
|
|
after 2000
|
2021-04-15 16:18:51 +02:00
|
|
|
}
|
2019-08-11 16:07:53 +03:00
|
|
|
|
|
|
|
# wait for rdb child to exit
|
|
|
|
wait_for_condition 500 100 {
|
|
|
|
[s -2 rdb_bgsave_in_progress] == 0
|
|
|
|
} else {
|
|
|
|
fail "rdb child didn't terminate"
|
|
|
|
}
|
|
|
|
|
|
|
|
# make sure we got what we were aiming for, by looking for the message in the log file
|
|
|
|
if {$all_drop == "all"} {
|
2020-07-28 11:15:29 +03:00
|
|
|
wait_for_log_messages -2 {"*Diskless rdb transfer, last replica dropped, killing fork child*"} $loglines 1 1
|
2019-08-11 16:07:53 +03:00
|
|
|
}
|
|
|
|
if {$all_drop == "no"} {
|
2020-07-28 11:15:29 +03:00
|
|
|
wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 2 replicas still up*"} $loglines 1 1
|
2019-08-11 16:07:53 +03:00
|
|
|
}
|
|
|
|
if {$all_drop == "slow" || $all_drop == "fast"} {
|
2020-07-28 11:15:29 +03:00
|
|
|
wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
|
2019-08-11 16:07:53 +03:00
|
|
|
}
|
2021-04-15 16:18:51 +02:00
|
|
|
if {$all_drop == "timeout"} {
|
|
|
|
wait_for_log_messages -2 {"*Disconnecting timedout replica (full sync)*"} $loglines 1 1
|
|
|
|
wait_for_log_messages -2 {"*Diskless rdb transfer, done reading from pipe, 1 replicas still up*"} $loglines 1 1
|
|
|
|
# master disconnected the slow replica, remove from array
|
|
|
|
set replicas_alive [lreplace $replicas_alive 0 0]
|
|
|
|
# release it
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
resume_process [srv -1 pid]
|
2021-04-15 16:18:51 +02:00
|
|
|
}
|
2019-08-11 16:07:53 +03:00
|
|
|
|
|
|
|
# make sure we don't have a busy loop going thought epoll_wait
|
2019-09-12 11:10:22 +03:00
|
|
|
if {$measure_time} {
|
|
|
|
set master_end_metrics [get_cpu_metrics $master_statfile]
|
|
|
|
set time_elapsed [expr {[clock seconds]-$start_time}]
|
|
|
|
set master_cpu [compute_cpu_usage $master_start_metrics $master_end_metrics]
|
|
|
|
set master_utime [lindex $master_cpu 0]
|
|
|
|
set master_stime [lindex $master_cpu 1]
|
|
|
|
if {$::verbose} {
|
|
|
|
puts "elapsed: $time_elapsed"
|
|
|
|
puts "master utime: $master_utime"
|
|
|
|
puts "master stime: $master_stime"
|
|
|
|
}
|
2021-04-15 16:18:51 +02:00
|
|
|
if {!$::no_latency && ($all_drop == "all" || $all_drop == "slow" || $all_drop == "timeout")} {
|
2019-09-12 11:10:22 +03:00
|
|
|
assert {$master_utime < 70}
|
|
|
|
assert {$master_stime < 70}
|
|
|
|
}
|
2020-10-22 11:10:53 +03:00
|
|
|
if {!$::no_latency && ($all_drop == "none" || $all_drop == "fast")} {
|
2019-09-12 11:10:22 +03:00
|
|
|
assert {$master_utime < 15}
|
|
|
|
assert {$master_stime < 15}
|
|
|
|
}
|
2019-08-11 16:07:53 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
# verify the data integrity
|
|
|
|
foreach replica $replicas_alive {
|
|
|
|
# Wait that replicas acknowledge they are online so
|
|
|
|
# we are sure that DBSIZE and DEBUG DIGEST will not
|
|
|
|
# fail because of timing issues.
|
2020-05-18 10:01:30 +03:00
|
|
|
wait_for_condition 150 100 {
|
2019-08-11 16:07:53 +03:00
|
|
|
[lindex [$replica role] 3] eq {connected}
|
|
|
|
} else {
|
|
|
|
fail "replicas still not connected after some time"
|
|
|
|
}
|
|
|
|
|
|
|
|
# Make sure that replicas and master have same
|
|
|
|
# number of keys
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[$master dbsize] == [$replica dbsize]
|
|
|
|
} else {
|
|
|
|
fail "Different number of keys between master and replicas after too long time."
|
|
|
|
}
|
|
|
|
|
|
|
|
# Check digests
|
|
|
|
set digest [$master debug digest]
|
|
|
|
set digest0 [$replica debug digest]
|
|
|
|
assert {$digest ne 0000000000000000000000000000000000000000}
|
|
|
|
assert {$digest eq $digest0}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-05-17 18:26:02 +03:00
|
|
|
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
test "diskless replication child being killed is collected" {
|
|
|
|
# when diskless master is waiting for the replica to become writable
|
|
|
|
# it removes the read event from the rdb pipe so if the child gets killed
|
2021-03-24 08:41:05 -07:00
|
|
|
# the replica will hung. and the master may not collect the pid with waitpid
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl"} overrides {save ""}} {
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
set master_pid [srv 0 pid]
|
|
|
|
$master config set repl-diskless-sync yes
|
|
|
|
$master config set repl-diskless-sync-delay 0
|
|
|
|
# put enough data in the db that the rdb file will be bigger than the socket buffers
|
|
|
|
$master debug populate 20000 test 10000
|
|
|
|
$master config set rdbcompression no
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
set replica [srv 0 client]
|
|
|
|
set loglines [count_log_lines 0]
|
|
|
|
$replica config set repl-diskless-load swapdb
|
|
|
|
$replica config set key-load-delay 1000000
|
2022-01-01 23:45:13 +08:00
|
|
|
$replica config set loading-process-events-interval-bytes 1024
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
|
|
|
|
# wait for the replicas to start reading the rdb
|
2022-12-12 23:38:12 +08:00
|
|
|
wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 1500 10
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
|
diskless master, avoid bgsave child hung when fork parent crashes (#11463)
During a diskless sync, if the master main process crashes, the child would
have hung in `write`. This fix closes the read fd on the child side, so that if the
parent crashes, the child will get a write error and exit.
This change also fixes disk-based replication, BGSAVE and AOFRW.
In that case the child wouldn't have been hang, it would have just kept
running until done which may be pointless.
There is a certain degree of risk here. in case there's a BGSAVE child that could
maybe succeed and the parent dies for some reason, the old code would have let
the child keep running and maybe succeed and avoid data loss.
On the other hand, if the parent is restarted, it would have loaded an old rdb file
(or none), and then the child could reach the end and rename the rdb file (data
conflicting with what the parent has), or also have a race with another BGSAVE
child that the new parent started.
Note that i removed a comment saying a write error will be ignored in the child
and handled by the parent (this comment was very old and i don't think relevant).
2022-11-09 10:02:18 +02:00
|
|
|
# wait to be sure the replica is hung and the master is blocked on write
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
after 500
|
|
|
|
|
|
|
|
# simulate the OOM killer or anyone else kills the child
|
|
|
|
set fork_child_pid [get_child_pid -1]
|
|
|
|
exec kill -9 $fork_child_pid
|
|
|
|
|
|
|
|
# wait for the parent to notice the child have exited
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s -1 rdb_bgsave_in_progress] == 0
|
|
|
|
} else {
|
|
|
|
fail "rdb child didn't terminate"
|
|
|
|
}
|
2022-01-01 23:45:13 +08:00
|
|
|
|
|
|
|
# Speed up shutdown
|
|
|
|
$replica config set key-load-delay 0
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
}
|
|
|
|
}
|
2021-06-09 15:13:24 +03:00
|
|
|
} {} {external:skip}
|
if diskless repl child is killed, make sure to reap the pid (#7742)
Starting redis 6.0 and the changes we made to the diskless master to be
suitable for TLS, I made the master avoid reaping (wait3) the pid of the
child until we know all replicas are done reading their rdb.
I did that in order to avoid a state where the rdb_child_pid is -1 but
we don't yet want to start another fork (still busy serving that data to
replicas).
It turns out that the solution used so far was problematic in case the
fork child was being killed (e.g. by the kernel OOM killer), in that
case there's a chance that we currently disabled the read event on the
rdb pipe, since we're waiting for a replica to become writable again.
and in that scenario the master would have never realized the child
exited, and the replica will remain hung too.
Note that there's no mechanism to detect a hung replica while it's in
rdb transfer state.
The solution here is to add another pipe which is used by the parent to
tell the child it is safe to exit. this mean that when the child exits,
for whatever reason, it is safe to reap it.
Besides that, i'm re-introducing an adjustment to REPLCONF ACK which was
part of #6271 (Accelerate diskless master connections) but was dropped
when that PR was rebased after the TLS fork/pipe changes (5a47794).
Now that RdbPipeCleanup no longer calls checkChildrenDone, and the ACK
has chance to detect that the child exited, it should be the one to call
it so that we don't have to wait for cron (server.hz) to do that.
2020-09-06 16:43:57 +03:00
|
|
|
|
diskless master, avoid bgsave child hung when fork parent crashes (#11463)
During a diskless sync, if the master main process crashes, the child would
have hung in `write`. This fix closes the read fd on the child side, so that if the
parent crashes, the child will get a write error and exit.
This change also fixes disk-based replication, BGSAVE and AOFRW.
In that case the child wouldn't have been hang, it would have just kept
running until done which may be pointless.
There is a certain degree of risk here. in case there's a BGSAVE child that could
maybe succeed and the parent dies for some reason, the old code would have let
the child keep running and maybe succeed and avoid data loss.
On the other hand, if the parent is restarted, it would have loaded an old rdb file
(or none), and then the child could reach the end and rename the rdb file (data
conflicting with what the parent has), or also have a race with another BGSAVE
child that the new parent started.
Note that i removed a comment saying a write error will be ignored in the child
and handled by the parent (this comment was very old and i don't think relevant).
2022-11-09 10:02:18 +02:00
|
|
|
foreach mdl {yes no} {
|
2022-11-12 20:35:34 +02:00
|
|
|
test "replication child dies when parent is killed - diskless: $mdl" {
|
diskless master, avoid bgsave child hung when fork parent crashes (#11463)
During a diskless sync, if the master main process crashes, the child would
have hung in `write`. This fix closes the read fd on the child side, so that if the
parent crashes, the child will get a write error and exit.
This change also fixes disk-based replication, BGSAVE and AOFRW.
In that case the child wouldn't have been hang, it would have just kept
running until done which may be pointless.
There is a certain degree of risk here. in case there's a BGSAVE child that could
maybe succeed and the parent dies for some reason, the old code would have let
the child keep running and maybe succeed and avoid data loss.
On the other hand, if the parent is restarted, it would have loaded an old rdb file
(or none), and then the child could reach the end and rename the rdb file (data
conflicting with what the parent has), or also have a race with another BGSAVE
child that the new parent started.
Note that i removed a comment saying a write error will be ignored in the child
and handled by the parent (this comment was very old and i don't think relevant).
2022-11-09 10:02:18 +02:00
|
|
|
# when master is killed, make sure the fork child can detect that and exit
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl"} overrides {save ""}} {
|
diskless master, avoid bgsave child hung when fork parent crashes (#11463)
During a diskless sync, if the master main process crashes, the child would
have hung in `write`. This fix closes the read fd on the child side, so that if the
parent crashes, the child will get a write error and exit.
This change also fixes disk-based replication, BGSAVE and AOFRW.
In that case the child wouldn't have been hang, it would have just kept
running until done which may be pointless.
There is a certain degree of risk here. in case there's a BGSAVE child that could
maybe succeed and the parent dies for some reason, the old code would have let
the child keep running and maybe succeed and avoid data loss.
On the other hand, if the parent is restarted, it would have loaded an old rdb file
(or none), and then the child could reach the end and rename the rdb file (data
conflicting with what the parent has), or also have a race with another BGSAVE
child that the new parent started.
Note that i removed a comment saying a write error will be ignored in the child
and handled by the parent (this comment was very old and i don't think relevant).
2022-11-09 10:02:18 +02:00
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
set master_pid [srv 0 pid]
|
|
|
|
$master config set repl-diskless-sync $mdl
|
|
|
|
$master config set repl-diskless-sync-delay 0
|
|
|
|
# create keys that will take 10 seconds to save
|
|
|
|
$master config set rdb-key-save-delay 1000
|
|
|
|
$master debug populate 10000
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
diskless master, avoid bgsave child hung when fork parent crashes (#11463)
During a diskless sync, if the master main process crashes, the child would
have hung in `write`. This fix closes the read fd on the child side, so that if the
parent crashes, the child will get a write error and exit.
This change also fixes disk-based replication, BGSAVE and AOFRW.
In that case the child wouldn't have been hang, it would have just kept
running until done which may be pointless.
There is a certain degree of risk here. in case there's a BGSAVE child that could
maybe succeed and the parent dies for some reason, the old code would have let
the child keep running and maybe succeed and avoid data loss.
On the other hand, if the parent is restarted, it would have loaded an old rdb file
(or none), and then the child could reach the end and rename the rdb file (data
conflicting with what the parent has), or also have a race with another BGSAVE
child that the new parent started.
Note that i removed a comment saying a write error will be ignored in the child
and handled by the parent (this comment was very old and i don't think relevant).
2022-11-09 10:02:18 +02:00
|
|
|
set replica [srv 0 client]
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
|
|
|
|
# wait for rdb child to start
|
|
|
|
wait_for_condition 5000 10 {
|
|
|
|
[s -1 rdb_bgsave_in_progress] == 1
|
|
|
|
} else {
|
|
|
|
fail "rdb child didn't start"
|
|
|
|
}
|
|
|
|
set fork_child_pid [get_child_pid -1]
|
|
|
|
|
|
|
|
# simulate the OOM killer or anyone else kills the parent
|
|
|
|
exec kill -9 $master_pid
|
|
|
|
|
|
|
|
# wait for the child to notice the parent died have exited
|
|
|
|
wait_for_condition 500 10 {
|
|
|
|
[process_is_alive $fork_child_pid] == 0
|
|
|
|
} else {
|
|
|
|
fail "rdb child didn't terminate"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} {} {external:skip}
|
|
|
|
}
|
|
|
|
|
2021-05-26 14:51:53 +03:00
|
|
|
test "diskless replication read pipe cleanup" {
|
|
|
|
# In diskless replication, we create a read pipe for the RDB, between the child and the parent.
|
|
|
|
# When we close this pipe (fd), the read handler also needs to be removed from the event loop (if it still registered).
|
|
|
|
# Otherwise, next time we will use the same fd, the registration will be fail (panic), because
|
|
|
|
# we will use EPOLL_CTL_MOD (the fd still register in the event loop), on fd that already removed from epoll_ctl
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl"} overrides {save ""}} {
|
2021-05-26 14:51:53 +03:00
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
set master_pid [srv 0 pid]
|
|
|
|
$master config set repl-diskless-sync yes
|
|
|
|
$master config set repl-diskless-sync-delay 0
|
|
|
|
|
|
|
|
# put enough data in the db, and slowdown the save, to keep the parent busy at the read process
|
|
|
|
$master config set rdb-key-save-delay 100000
|
|
|
|
$master debug populate 20000 test 10000
|
|
|
|
$master config set rdbcompression no
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
2021-05-26 14:51:53 +03:00
|
|
|
set replica [srv 0 client]
|
|
|
|
set loglines [count_log_lines 0]
|
|
|
|
$replica config set repl-diskless-load swapdb
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
|
|
|
|
# wait for the replicas to start reading the rdb
|
2022-12-12 23:38:12 +08:00
|
|
|
wait_for_log_messages 0 {"*Loading DB in memory*"} $loglines 1500 10
|
2021-05-26 14:51:53 +03:00
|
|
|
|
2021-09-19 12:07:04 +03:00
|
|
|
set loglines [count_log_lines -1]
|
2021-05-26 14:51:53 +03:00
|
|
|
# send FLUSHALL so the RDB child will be killed
|
|
|
|
$master flushall
|
|
|
|
|
|
|
|
# wait for another RDB child process to be started
|
|
|
|
wait_for_log_messages -1 {"*Background RDB transfer started by pid*"} $loglines 800 10
|
|
|
|
|
|
|
|
# make sure master is alive
|
|
|
|
$master ping
|
|
|
|
}
|
|
|
|
}
|
2021-06-09 15:13:24 +03:00
|
|
|
} {} {external:skip}
|
2021-05-26 14:51:53 +03:00
|
|
|
|
2020-05-17 18:26:02 +03:00
|
|
|
test {replicaof right after disconnection} {
|
|
|
|
# this is a rare race condition that was reproduced sporadically by the psync2 unit.
|
|
|
|
# see details in #7205
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {tags {"repl"} overrides {save ""}} {
|
2020-05-17 18:26:02 +03:00
|
|
|
set replica1 [srv 0 client]
|
|
|
|
set replica1_host [srv 0 host]
|
|
|
|
set replica1_port [srv 0 port]
|
|
|
|
set replica1_log [srv 0 stdout]
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
2020-05-17 18:26:02 +03:00
|
|
|
set replica2 [srv 0 client]
|
|
|
|
set replica2_host [srv 0 host]
|
|
|
|
set replica2_port [srv 0 port]
|
|
|
|
set replica2_log [srv 0 stdout]
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 09:19:21 +03:00
|
|
|
start_server {overrides {save ""}} {
|
2020-05-17 18:26:02 +03:00
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
$replica1 replicaof $master_host $master_port
|
|
|
|
$replica2 replicaof $master_host $master_port
|
|
|
|
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[string match {*master_link_status:up*} [$replica1 info replication]] &&
|
|
|
|
[string match {*master_link_status:up*} [$replica2 info replication]]
|
|
|
|
} else {
|
|
|
|
fail "Can't turn the instance into a replica"
|
|
|
|
}
|
|
|
|
|
|
|
|
set rd [redis_deferring_client -1]
|
|
|
|
$rd debug sleep 1
|
|
|
|
after 100
|
|
|
|
|
|
|
|
# when replica2 will wake up from the sleep it will find both disconnection
|
|
|
|
# from it's master and also a replicaof command at the same event loop
|
|
|
|
$master client kill type replica
|
|
|
|
$replica2 replicaof $replica1_host $replica1_port
|
|
|
|
$rd read
|
|
|
|
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[string match {*master_link_status:up*} [$replica2 info replication]]
|
|
|
|
} else {
|
|
|
|
fail "role change failed."
|
|
|
|
}
|
|
|
|
|
|
|
|
# make sure psync succeeded, and there were no unexpected full syncs.
|
|
|
|
assert_equal [status $master sync_full] 2
|
|
|
|
assert_equal [status $replica1 sync_full] 0
|
|
|
|
assert_equal [status $replica2 sync_full] 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-06-09 15:13:24 +03:00
|
|
|
} {} {external:skip}
|
2020-09-22 14:47:58 +08:00
|
|
|
|
|
|
|
test {Kill rdb child process if its dumping RDB is not useful} {
|
|
|
|
start_server {tags {"repl"}} {
|
|
|
|
set slave1 [srv 0 client]
|
|
|
|
start_server {} {
|
|
|
|
set slave2 [srv 0 client]
|
|
|
|
start_server {} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
for {set i 0} {$i < 10} {incr i} {
|
|
|
|
$master set $i $i
|
|
|
|
}
|
|
|
|
# Generating RDB will cost 10s(10 * 1s)
|
|
|
|
$master config set rdb-key-save-delay 1000000
|
|
|
|
$master config set repl-diskless-sync no
|
|
|
|
$master config set save ""
|
|
|
|
|
|
|
|
$slave1 slaveof $master_host $master_port
|
|
|
|
$slave2 slaveof $master_host $master_port
|
|
|
|
|
|
|
|
# Wait for starting child
|
|
|
|
wait_for_condition 50 100 {
|
2020-10-27 15:36:42 +08:00
|
|
|
([s 0 rdb_bgsave_in_progress] == 1) &&
|
|
|
|
([string match "*wait_bgsave*" [s 0 slave0]]) &&
|
|
|
|
([string match "*wait_bgsave*" [s 0 slave1]])
|
2020-09-22 14:47:58 +08:00
|
|
|
} else {
|
|
|
|
fail "rdb child didn't start"
|
|
|
|
}
|
|
|
|
|
|
|
|
# Slave1 disconnect with master
|
|
|
|
$slave1 slaveof no one
|
|
|
|
# Shouldn't kill child since another slave wait for rdb
|
|
|
|
after 100
|
|
|
|
assert {[s 0 rdb_bgsave_in_progress] == 1}
|
|
|
|
|
|
|
|
# Slave2 disconnect with master
|
|
|
|
$slave2 slaveof no one
|
|
|
|
# Should kill child
|
2021-05-18 17:13:59 +03:00
|
|
|
wait_for_condition 100 10 {
|
2020-09-22 14:47:58 +08:00
|
|
|
[s 0 rdb_bgsave_in_progress] eq 0
|
|
|
|
} else {
|
|
|
|
fail "can't kill rdb child"
|
|
|
|
}
|
|
|
|
|
|
|
|
# If have save parameters, won't kill child
|
|
|
|
$master config set save "900 1"
|
|
|
|
$slave1 slaveof $master_host $master_port
|
|
|
|
$slave2 slaveof $master_host $master_port
|
|
|
|
wait_for_condition 50 100 {
|
2020-10-27 15:36:42 +08:00
|
|
|
([s 0 rdb_bgsave_in_progress] == 1) &&
|
|
|
|
([string match "*wait_bgsave*" [s 0 slave0]]) &&
|
|
|
|
([string match "*wait_bgsave*" [s 0 slave1]])
|
2020-09-22 14:47:58 +08:00
|
|
|
} else {
|
|
|
|
fail "rdb child didn't start"
|
|
|
|
}
|
|
|
|
$slave1 slaveof no one
|
|
|
|
$slave2 slaveof no one
|
|
|
|
after 200
|
|
|
|
assert {[s 0 rdb_bgsave_in_progress] == 1}
|
|
|
|
catch {$master shutdown nosave}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-06-09 15:13:24 +03:00
|
|
|
} {} {external:skip}
|
2021-09-09 16:32:29 +08:00
|
|
|
|
|
|
|
start_server {tags {"repl external:skip"}} {
|
|
|
|
set master1_host [srv 0 host]
|
|
|
|
set master1_port [srv 0 port]
|
|
|
|
r set a b
|
|
|
|
|
|
|
|
start_server {} {
|
|
|
|
set master2 [srv 0 client]
|
|
|
|
set master2_host [srv 0 host]
|
|
|
|
set master2_port [srv 0 port]
|
|
|
|
# Take 10s for dumping RDB
|
|
|
|
$master2 debug populate 10 master2 10
|
|
|
|
$master2 config set rdb-key-save-delay 1000000
|
|
|
|
|
|
|
|
start_server {} {
|
|
|
|
set sub_replica [srv 0 client]
|
|
|
|
|
|
|
|
start_server {} {
|
|
|
|
# Full sync with master1
|
|
|
|
r slaveof $master1_host $master1_port
|
|
|
|
wait_for_sync r
|
|
|
|
assert_equal "b" [r get a]
|
|
|
|
|
|
|
|
# Let sub replicas sync with me
|
|
|
|
$sub_replica slaveof [srv 0 host] [srv 0 port]
|
|
|
|
wait_for_sync $sub_replica
|
|
|
|
assert_equal "b" [$sub_replica get a]
|
|
|
|
|
|
|
|
# Full sync with master2, and then kill master2 before finishing dumping RDB
|
|
|
|
r slaveof $master2_host $master2_port
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
([s -2 rdb_bgsave_in_progress] == 1) &&
|
|
|
|
([string match "*wait_bgsave*" [s -2 slave0]])
|
|
|
|
} else {
|
|
|
|
fail "full sync didn't start"
|
|
|
|
}
|
|
|
|
catch {$master2 shutdown nosave}
|
|
|
|
|
|
|
|
test {Don't disconnect with replicas before loading transferred RDB when full sync} {
|
|
|
|
assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"]
|
|
|
|
# The replication id is not changed in entire replication chain
|
|
|
|
assert_equal [s master_replid] [s -3 master_replid]
|
|
|
|
assert_equal [s master_replid] [s -1 master_replid]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {Discard cache master before loading transferred RDB when full sync} {
|
|
|
|
set full_sync [s -3 sync_full]
|
|
|
|
set partial_sync [s -3 sync_partial_ok]
|
|
|
|
# Partial sync with master1
|
|
|
|
r slaveof $master1_host $master1_port
|
|
|
|
wait_for_sync r
|
|
|
|
# master1 accepts partial sync instead of full sync
|
|
|
|
assert_equal $full_sync [s -3 sync_full]
|
|
|
|
assert_equal [expr $partial_sync+1] [s -3 sync_partial_ok]
|
|
|
|
|
|
|
|
# Since master only partially sync replica, and repl id is not changed,
|
|
|
|
# the replica doesn't disconnect with its sub-replicas
|
|
|
|
assert_equal [s master_replid] [s -3 master_replid]
|
|
|
|
assert_equal [s master_replid] [s -1 master_replid]
|
|
|
|
assert ![log_file_matches [srv -1 stdout] "*Connection with master lost*"]
|
|
|
|
# Sub replica just has one full sync, no partial resync.
|
|
|
|
assert_equal 1 [s sync_full]
|
|
|
|
assert_equal 0 [s sync_partial_ok]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-11-04 15:09:28 +08:00
|
|
|
|
|
|
|
test {replica can handle EINTR if use diskless load} {
|
|
|
|
start_server {tags {"repl"}} {
|
|
|
|
set replica [srv 0 client]
|
|
|
|
set replica_log [srv 0 stdout]
|
|
|
|
start_server {} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
|
|
|
|
$master debug populate 100 master 100000
|
|
|
|
$master config set rdbcompression no
|
|
|
|
$master config set repl-diskless-sync yes
|
|
|
|
$master config set repl-diskless-sync-delay 0
|
|
|
|
$replica config set repl-diskless-load on-empty-db
|
|
|
|
# Construct EINTR error by using the built in watchdog
|
|
|
|
$replica config set watchdog-period 200
|
|
|
|
# Block replica in read()
|
|
|
|
$master config set rdb-key-save-delay 10000
|
|
|
|
# set speedy shutdown
|
|
|
|
$master config set save ""
|
|
|
|
# Start the replication process...
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
|
|
|
|
# Wait for the replica to start reading the rdb
|
|
|
|
set res [wait_for_log_messages -1 {"*Loading DB in memory*"} 0 200 10]
|
|
|
|
set loglines [lindex $res 1]
|
2022-02-13 15:52:38 +08:00
|
|
|
|
2021-11-04 15:09:28 +08:00
|
|
|
# Wait till we see the watchgod log line AFTER the loading started
|
|
|
|
wait_for_log_messages -1 {"*WATCHDOG TIMER EXPIRED*"} $loglines 200 10
|
2022-02-13 15:52:38 +08:00
|
|
|
|
2021-11-04 15:09:28 +08:00
|
|
|
# Make sure we're still loading, and that there was just one full sync attempt
|
2022-02-13 15:52:38 +08:00
|
|
|
assert ![log_file_matches [srv -1 stdout] "*Reconnecting to MASTER*"]
|
2021-11-04 15:09:28 +08:00
|
|
|
assert_equal 1 [s 0 sync_full]
|
|
|
|
assert_equal 1 [s -1 loading]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} {} {external:skip}
|
2022-02-13 15:52:38 +08:00
|
|
|
|
|
|
|
start_server {tags {"repl" "external:skip"}} {
|
|
|
|
test "replica do not write the reply to the replication link - SYNC (_addReplyToBufferOrList)" {
|
|
|
|
set rd [redis_deferring_client]
|
|
|
|
set lines [count_log_lines 0]
|
|
|
|
|
|
|
|
$rd sync
|
|
|
|
$rd ping
|
|
|
|
catch {$rd read} e
|
|
|
|
if {$::verbose} { puts "SYNC _addReplyToBufferOrList: $e" }
|
|
|
|
assert_equal "PONG" [r ping]
|
|
|
|
|
|
|
|
# Check we got the warning logs about the PING command.
|
|
|
|
verify_log_message 0 "*Replica generated a reply to command 'ping', disconnecting it: *" $lines
|
|
|
|
|
|
|
|
$rd close
|
|
|
|
waitForBgsave r
|
|
|
|
}
|
|
|
|
|
|
|
|
test "replica do not write the reply to the replication link - SYNC (addReplyDeferredLen)" {
|
|
|
|
set rd [redis_deferring_client]
|
|
|
|
set lines [count_log_lines 0]
|
|
|
|
|
|
|
|
$rd sync
|
|
|
|
$rd xinfo help
|
|
|
|
catch {$rd read} e
|
|
|
|
if {$::verbose} { puts "SYNC addReplyDeferredLen: $e" }
|
|
|
|
assert_equal "PONG" [r ping]
|
|
|
|
|
|
|
|
# Check we got the warning logs about the XINFO HELP command.
|
|
|
|
verify_log_message 0 "*Replica generated a reply to command 'xinfo|help', disconnecting it: *" $lines
|
|
|
|
|
|
|
|
$rd close
|
|
|
|
waitForBgsave r
|
|
|
|
}
|
|
|
|
|
|
|
|
test "replica do not write the reply to the replication link - PSYNC (_addReplyToBufferOrList)" {
|
|
|
|
set rd [redis_deferring_client]
|
|
|
|
set lines [count_log_lines 0]
|
|
|
|
|
|
|
|
$rd psync replicationid -1
|
|
|
|
assert_match {FULLRESYNC * 0} [$rd read]
|
|
|
|
$rd get foo
|
|
|
|
catch {$rd read} e
|
|
|
|
if {$::verbose} { puts "PSYNC _addReplyToBufferOrList: $e" }
|
|
|
|
assert_equal "PONG" [r ping]
|
|
|
|
|
|
|
|
# Check we got the warning logs about the GET command.
|
|
|
|
verify_log_message 0 "*Replica generated a reply to command 'get', disconnecting it: *" $lines
|
|
|
|
verify_log_message 0 "*== CRITICAL == This master is sending an error to its replica: *" $lines
|
|
|
|
verify_log_message 0 "*Replica can't interact with the keyspace*" $lines
|
|
|
|
|
|
|
|
$rd close
|
|
|
|
waitForBgsave r
|
|
|
|
}
|
|
|
|
|
|
|
|
test "replica do not write the reply to the replication link - PSYNC (addReplyDeferredLen)" {
|
|
|
|
set rd [redis_deferring_client]
|
|
|
|
set lines [count_log_lines 0]
|
|
|
|
|
|
|
|
$rd psync replicationid -1
|
|
|
|
assert_match {FULLRESYNC * 0} [$rd read]
|
|
|
|
$rd slowlog get
|
|
|
|
catch {$rd read} e
|
|
|
|
if {$::verbose} { puts "PSYNC addReplyDeferredLen: $e" }
|
|
|
|
assert_equal "PONG" [r ping]
|
|
|
|
|
|
|
|
# Check we got the warning logs about the SLOWLOG GET command.
|
|
|
|
verify_log_message 0 "*Replica generated a reply to command 'slowlog|get', disconnecting it: *" $lines
|
|
|
|
|
|
|
|
$rd close
|
|
|
|
waitForBgsave r
|
|
|
|
}
|
|
|
|
|
|
|
|
test "PSYNC with wrong offset should throw error" {
|
|
|
|
# It used to accept the FULL SYNC, but also replied with an error.
|
|
|
|
assert_error {ERR value is not an integer or out of range} {r psync replicationid offset_str}
|
|
|
|
set logs [exec tail -n 100 < [srv 0 stdout]]
|
|
|
|
assert_match {*Replica * asks for synchronization but with a wrong offset} $logs
|
|
|
|
assert_equal "PONG" [r ping]
|
|
|
|
}
|
|
|
|
}
|
2022-08-24 12:51:36 +03:00
|
|
|
|
|
|
|
start_server {tags {"repl external:skip"}} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
$master debug SET-ACTIVE-EXPIRE 0
|
|
|
|
start_server {} {
|
|
|
|
set slave [srv 0 client]
|
|
|
|
$slave debug SET-ACTIVE-EXPIRE 0
|
|
|
|
$slave slaveof $master_host $master_port
|
|
|
|
|
|
|
|
test "Test replication with lazy expire" {
|
|
|
|
# wait for replication to be in sync
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[lindex [$slave role] 0] eq {slave} &&
|
|
|
|
[string match {*master_link_status:up*} [$slave info replication]]
|
|
|
|
} else {
|
|
|
|
fail "Can't turn the instance into a replica"
|
|
|
|
}
|
|
|
|
|
|
|
|
$master sadd s foo
|
|
|
|
$master pexpire s 1
|
|
|
|
after 10
|
|
|
|
$master sadd s foo
|
2022-08-24 13:38:55 +03:00
|
|
|
assert_equal 1 [$master wait 1 0]
|
2022-08-24 12:51:36 +03:00
|
|
|
|
|
|
|
assert_equal "set" [$master type s]
|
|
|
|
assert_equal "set" [$slave type s]
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|