2024-03-21 09:30:47 -04:00
|
|
|
# SPDX-FileCopyrightText: 2024 Redict Contributors
|
|
|
|
# SPDX-FileCopyrightText: 2024 Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
#
|
|
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
2024-03-21 15:11:44 -04:00
|
|
|
# SPDX-License-Identifier: LGPL-3.0-only
|
2024-03-21 09:30:47 -04:00
|
|
|
|
2019-09-12 03:56:54 -04:00
|
|
|
source tests/support/cli.tcl
|
|
|
|
|
2021-06-09 08:13:24 -04:00
|
|
|
start_server {tags {"wait network external:skip"}} {
|
2016-11-18 07:10:29 -05:00
|
|
|
start_server {} {
|
|
|
|
set slave [srv 0 client]
|
|
|
|
set slave_host [srv 0 host]
|
|
|
|
set slave_port [srv 0 port]
|
2021-01-12 02:46:24 -05:00
|
|
|
set slave_pid [srv 0 pid]
|
2016-11-18 07:10:29 -05:00
|
|
|
set master [srv -1 client]
|
|
|
|
set master_host [srv -1 host]
|
|
|
|
set master_port [srv -1 port]
|
|
|
|
|
|
|
|
test {Setup slave} {
|
|
|
|
$slave slaveof $master_host $master_port
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s 0 master_link_status] eq {up}
|
|
|
|
} else {
|
|
|
|
fail "Replication not started."
|
|
|
|
}
|
|
|
|
}
|
2023-03-15 05:07:04 -04:00
|
|
|
|
2022-10-06 05:12:05 -04:00
|
|
|
test {WAIT out of range timeout (milliseconds)} {
|
|
|
|
# Timeout is parsed as milliseconds by getLongLongFromObjectOrReply().
|
|
|
|
# Verify we get out of range message if value is behind LLONG_MAX
|
|
|
|
# (decimal value equals to 0x8000000000000000)
|
|
|
|
assert_error "*or out of range*" {$master wait 2 9223372036854775808}
|
2023-03-15 05:07:04 -04:00
|
|
|
|
2022-10-06 05:12:05 -04:00
|
|
|
# expected to fail by later overflow condition after addition
|
|
|
|
# of mstime(). (decimal value equals to 0x7FFFFFFFFFFFFFFF)
|
|
|
|
assert_error "*timeout is out of range*" {$master wait 2 9223372036854775807}
|
2023-03-15 05:07:04 -04:00
|
|
|
|
|
|
|
assert_error "*timeout is negative*" {$master wait 2 -1}
|
2022-10-06 05:12:05 -04:00
|
|
|
}
|
2023-03-15 05:07:04 -04:00
|
|
|
|
2016-11-18 07:10:29 -05:00
|
|
|
test {WAIT should acknowledge 1 additional copy of the data} {
|
|
|
|
$master set foo 0
|
|
|
|
$master incr foo
|
|
|
|
$master incr foo
|
|
|
|
$master incr foo
|
|
|
|
assert {[$master wait 1 5000] == 1}
|
|
|
|
assert {[$slave get foo] == 3}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAIT should not acknowledge 2 additional copies of the data} {
|
|
|
|
$master incr foo
|
|
|
|
assert {[$master wait 2 1000] <= 1}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAIT should not acknowledge 1 additional copy if slave is blocked} {
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
pause_process $slave_pid
|
2016-11-18 07:10:29 -05:00
|
|
|
$master set foo 0
|
|
|
|
$master incr foo
|
|
|
|
$master incr foo
|
|
|
|
$master incr foo
|
2021-01-12 02:46:24 -05:00
|
|
|
assert {[$master wait 1 1000] == 0}
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
resume_process $slave_pid
|
2021-01-12 02:46:24 -05:00
|
|
|
assert {[$master wait 1 1000] == 1}
|
2016-11-18 07:10:29 -05:00
|
|
|
}
|
2021-01-08 02:36:54 -05:00
|
|
|
|
|
|
|
test {WAIT implicitly blocks on client pause since ACKs aren't sent} {
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
pause_process $slave_pid
|
2021-01-08 02:36:54 -05:00
|
|
|
$master multi
|
|
|
|
$master incr foo
|
|
|
|
$master client pause 10000 write
|
|
|
|
$master exec
|
|
|
|
assert {[$master wait 1 1000] == 0}
|
|
|
|
$master client unpause
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
resume_process $slave_pid
|
2021-01-08 02:36:54 -05:00
|
|
|
assert {[$master wait 1 1000] == 1}
|
|
|
|
}
|
2023-03-15 05:07:04 -04:00
|
|
|
|
|
|
|
test {WAIT replica multiple clients unblock - reuse last result} {
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client -1]
|
|
|
|
set rd2 [redict_deferring_client -1]
|
2023-03-15 05:07:04 -04:00
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
pause_process $slave_pid
|
2023-03-15 05:07:04 -04:00
|
|
|
|
|
|
|
$rd incr foo
|
|
|
|
$rd read
|
|
|
|
|
|
|
|
$rd2 incr foo
|
|
|
|
$rd2 read
|
|
|
|
|
|
|
|
$rd wait 1 0
|
|
|
|
$rd2 wait 1 0
|
|
|
|
wait_for_blocked_clients_count 2 100 10 -1
|
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
resume_process $slave_pid
|
2023-03-15 05:07:04 -04:00
|
|
|
|
|
|
|
assert_equal [$rd read] {1}
|
|
|
|
assert_equal [$rd2 read] {1}
|
|
|
|
|
|
|
|
$rd ping
|
|
|
|
assert_equal [$rd read] {PONG}
|
|
|
|
$rd2 ping
|
|
|
|
assert_equal [$rd2 read] {PONG}
|
|
|
|
|
|
|
|
$rd close
|
|
|
|
$rd2 close
|
|
|
|
}
|
2016-11-18 07:10:29 -05:00
|
|
|
}}
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
|
|
|
|
|
|
|
|
tags {"wait aof network external:skip"} {
|
|
|
|
start_server {overrides {appendonly {yes} auto-aof-rewrite-percentage {0}}} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
|
|
|
|
test {WAITAOF local copy before fsync} {
|
|
|
|
r config set appendfsync no
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 1 0 50] {0 0} ;# exits on timeout
|
|
|
|
r config set appendfsync everysec
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF local copy everysec} {
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 1 0 0] {1 0}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF local copy with appendfsync always} {
|
|
|
|
r config set appendfsync always
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 1 0 0] {1 0}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF local wait and then stop aof} {
|
2023-09-28 10:19:20 -04:00
|
|
|
r config set appendfsync no
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client]
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$rd incr foo
|
|
|
|
$rd read
|
|
|
|
$rd waitaof 1 0 0
|
|
|
|
wait_for_blocked_client
|
|
|
|
r config set appendonly no ;# this should release the blocked client as an error
|
2023-03-15 05:07:04 -04:00
|
|
|
assert_error {ERR WAITAOF cannot be used when numlocal is set but appendonly is disabled.} {$rd read}
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$rd close
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF local on server with aof disabled} {
|
|
|
|
$master incr foo
|
2023-03-15 05:07:04 -04:00
|
|
|
assert_error {ERR WAITAOF cannot be used when numlocal is set but appendonly is disabled.} {$master waitaof 1 0 0}
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
}
|
|
|
|
|
2023-09-28 10:05:53 -04:00
|
|
|
test {WAITAOF local if AOFRW was postponed} {
|
|
|
|
r config set appendfsync everysec
|
|
|
|
|
|
|
|
# turn off AOF
|
|
|
|
r config set appendonly no
|
|
|
|
|
|
|
|
# create an RDB child that takes a lot of time to run
|
|
|
|
r set x y
|
|
|
|
r config set rdb-key-save-delay 100000000 ;# 100 seconds
|
|
|
|
r bgsave
|
|
|
|
assert_equal [s rdb_bgsave_in_progress] 1
|
|
|
|
|
|
|
|
# turn on AOF
|
|
|
|
r config set appendonly yes
|
|
|
|
assert_equal [s aof_rewrite_scheduled] 1
|
|
|
|
|
|
|
|
# create a write command (to increment master_repl_offset)
|
|
|
|
r set x y
|
|
|
|
|
|
|
|
# reset save_delay and kill RDB child
|
|
|
|
r config set rdb-key-save-delay 0
|
|
|
|
catch {exec kill -9 [get_child_pid 0]}
|
|
|
|
|
|
|
|
# wait for AOF (will unblock after AOFRW finishes)
|
|
|
|
assert_equal [r waitaof 1 0 10000] {1 0}
|
|
|
|
|
|
|
|
# make sure AOFRW finished
|
|
|
|
assert_equal [s aof_rewrite_in_progress] 0
|
|
|
|
assert_equal [s aof_rewrite_scheduled] 0
|
|
|
|
}
|
|
|
|
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$master config set appendonly yes
|
|
|
|
waitForBgrewriteaof $master
|
|
|
|
|
|
|
|
start_server {overrides {appendonly {yes} auto-aof-rewrite-percentage {0}}} {
|
|
|
|
set master_host [srv -1 host]
|
|
|
|
set master_port [srv -1 port]
|
|
|
|
set replica [srv 0 client]
|
|
|
|
set replica_host [srv 0 host]
|
|
|
|
set replica_port [srv 0 port]
|
|
|
|
set replica_pid [srv 0 pid]
|
|
|
|
|
|
|
|
# make sure the master always fsyncs first (easier to test)
|
|
|
|
$master config set appendfsync always
|
|
|
|
$replica config set appendfsync no
|
|
|
|
|
|
|
|
test {WAITAOF on demoted master gets unblocked with an error} {
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client]
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$rd incr foo
|
|
|
|
$rd read
|
|
|
|
$rd waitaof 0 1 0
|
|
|
|
wait_for_blocked_client
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
assert_error {UNBLOCKED force unblock from blocking operation,*} {$rd read}
|
|
|
|
$rd close
|
|
|
|
}
|
|
|
|
|
|
|
|
wait_for_ofs_sync $master $replica
|
|
|
|
|
|
|
|
test {WAITAOF replica copy before fsync} {
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 0 1 50] {1 0} ;# exits on timeout
|
|
|
|
}
|
|
|
|
$replica config set appendfsync everysec
|
|
|
|
|
|
|
|
test {WAITAOF replica copy everysec} {
|
Fix fork done handler wrongly update fsync metrics and enhance AOF_ FSYNC_ALWAYS (#11973)
This PR fix several unrelated bugs that were discovered by the same set of tests
(WAITAOF tests in #11713), could make the `WAITAOF` test hang.
The change in `backgroundRewriteDoneHandler` is about MP-AOF.
That leftover / old code assumes that we started a new AOF file just now
(when we have a new base into which we're gonna incrementally write), but
the fact is that with MP-AOF, the fork done handler doesn't really affect the
incremental file being maintained by the parent process, there's no reason to
re-issue `SELECT`, and no reason to update any of the fsync variables in that flow.
This should have been deleted with MP-AOF (introduced in #9788, 7.0).
The damage is that the update to `aof_fsync_offset` will cause us to miss an fsync
in `flushAppendOnlyFile`, that happens if we stop write commands in `AOF_FSYNC_EVERYSEC`
while an AOFRW is in progress. This caused a new `WAITAOF` test to sometime hang forever.
Also because of MP-AOF, we needed to change `aof_fsync_offset` to `aof_last_incr_fsync_offset`
and match it to `aof_last_incr_size` in `flushAppendOnlyFile`. This is because in the past we compared
`aof_fsync_offset` and `aof_current_size`, but with MP-AOF it could be the total AOF file will be
smaller after AOFRW, and the (already existing) incr file still has data that needs to be fsynced.
The change in `flushAppendOnlyFile`, about the `AOF_FSYNC_ALWAYS`, it is follow #6053
(the details is in #5985), we also check `AOF_FSYNC_ALWAYS` to handle a case where
appendfsync is changed from everysec to always while there is data that's written but not yet fsynced.
2023-03-29 08:17:05 -04:00
|
|
|
$replica config set appendfsync everysec
|
|
|
|
waitForBgrewriteaof $replica ;# Make sure there is no AOFRW
|
|
|
|
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 0 1 0] {1 1}
|
|
|
|
}
|
|
|
|
|
Fix fork done handler wrongly update fsync metrics and enhance AOF_ FSYNC_ALWAYS (#11973)
This PR fix several unrelated bugs that were discovered by the same set of tests
(WAITAOF tests in #11713), could make the `WAITAOF` test hang.
The change in `backgroundRewriteDoneHandler` is about MP-AOF.
That leftover / old code assumes that we started a new AOF file just now
(when we have a new base into which we're gonna incrementally write), but
the fact is that with MP-AOF, the fork done handler doesn't really affect the
incremental file being maintained by the parent process, there's no reason to
re-issue `SELECT`, and no reason to update any of the fsync variables in that flow.
This should have been deleted with MP-AOF (introduced in #9788, 7.0).
The damage is that the update to `aof_fsync_offset` will cause us to miss an fsync
in `flushAppendOnlyFile`, that happens if we stop write commands in `AOF_FSYNC_EVERYSEC`
while an AOFRW is in progress. This caused a new `WAITAOF` test to sometime hang forever.
Also because of MP-AOF, we needed to change `aof_fsync_offset` to `aof_last_incr_fsync_offset`
and match it to `aof_last_incr_size` in `flushAppendOnlyFile`. This is because in the past we compared
`aof_fsync_offset` and `aof_current_size`, but with MP-AOF it could be the total AOF file will be
smaller after AOFRW, and the (already existing) incr file still has data that needs to be fsynced.
The change in `flushAppendOnlyFile`, about the `AOF_FSYNC_ALWAYS`, it is follow #6053
(the details is in #5985), we also check `AOF_FSYNC_ALWAYS` to handle a case where
appendfsync is changed from everysec to always while there is data that's written but not yet fsynced.
2023-03-29 08:17:05 -04:00
|
|
|
test {WAITAOF replica copy everysec with AOFRW} {
|
|
|
|
$replica config set appendfsync everysec
|
|
|
|
|
|
|
|
# When we trigger an AOFRW, a fsync is triggered when closing the old INCR file,
|
|
|
|
# so with the everysec, we will skip that second of fsync, and in the next second
|
|
|
|
# after that, we will eventually do the fsync.
|
|
|
|
$replica bgrewriteaof
|
|
|
|
waitForBgrewriteaof $replica
|
|
|
|
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 0 1 0] {1 1}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF replica copy everysec with slow AOFRW} {
|
|
|
|
$replica config set appendfsync everysec
|
|
|
|
$replica config set rdb-key-save-delay 1000000 ;# 1 sec
|
|
|
|
|
|
|
|
$replica bgrewriteaof
|
|
|
|
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 0 1 0] {1 1}
|
|
|
|
|
|
|
|
$replica config set rdb-key-save-delay 0
|
|
|
|
waitForBgrewriteaof $replica
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF replica copy everysec->always with AOFRW} {
|
|
|
|
$replica config set appendfsync everysec
|
|
|
|
|
|
|
|
# Try to fit all of them in the same round second, although there's no way to guarantee
|
|
|
|
# that, it can be done on fast machine. In any case, the test shouldn't fail either.
|
|
|
|
$replica bgrewriteaof
|
|
|
|
$master incr foo
|
|
|
|
waitForBgrewriteaof $replica
|
|
|
|
$replica config set appendfsync always
|
|
|
|
|
|
|
|
assert_equal [$master waitaof 0 1 0] {1 1}
|
|
|
|
}
|
|
|
|
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
test {WAITAOF replica copy appendfsync always} {
|
|
|
|
$replica config set appendfsync always
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 0 1 0] {1 1}
|
|
|
|
$replica config set appendfsync everysec
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF replica copy if replica is blocked} {
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
pause_process $replica_pid
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 0 1 50] {1 0} ;# exits on timeout
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
resume_process $replica_pid
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
assert_equal [$master waitaof 0 1 0] {1 1}
|
|
|
|
}
|
|
|
|
|
2023-03-15 05:07:04 -04:00
|
|
|
test {WAITAOF replica multiple clients unblock - reuse last result} {
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client -1]
|
|
|
|
set rd2 [redict_deferring_client -1]
|
2023-03-15 05:07:04 -04:00
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
pause_process $replica_pid
|
2023-03-15 05:07:04 -04:00
|
|
|
|
|
|
|
$rd incr foo
|
|
|
|
$rd read
|
|
|
|
|
|
|
|
$rd2 incr foo
|
|
|
|
$rd2 read
|
|
|
|
|
|
|
|
$rd waitaof 0 1 0
|
|
|
|
$rd2 waitaof 0 1 0
|
|
|
|
wait_for_blocked_clients_count 2 100 10 -1
|
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
resume_process $replica_pid
|
2023-03-15 05:07:04 -04:00
|
|
|
|
|
|
|
assert_equal [$rd read] {1 1}
|
|
|
|
assert_equal [$rd2 read] {1 1}
|
|
|
|
|
|
|
|
$rd ping
|
|
|
|
assert_equal [$rd read] {PONG}
|
|
|
|
$rd2 ping
|
|
|
|
assert_equal [$rd2 read] {PONG}
|
|
|
|
|
|
|
|
$rd close
|
|
|
|
$rd2 close
|
|
|
|
}
|
|
|
|
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
test {WAITAOF on promoted replica} {
|
|
|
|
$replica replicaof no one
|
|
|
|
$replica incr foo
|
|
|
|
assert_equal [$replica waitaof 1 0 0] {1 0}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF master that loses a replica and backlog is dropped} {
|
|
|
|
$master config set repl-backlog-ttl 1
|
|
|
|
after 2000 ;# wait for backlog to expire
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 1 0 0] {1 0}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF master without backlog, wait is released when the replica finishes full-sync} {
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client -1]
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$rd incr foo
|
|
|
|
$rd read
|
|
|
|
$rd waitaof 0 1 0
|
|
|
|
wait_for_blocked_client -1
|
|
|
|
$replica replicaof $master_host $master_port
|
|
|
|
assert_equal [$rd read] {1 1}
|
|
|
|
$rd close
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF master isn't configured to do AOF} {
|
|
|
|
$master config set appendonly no
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 0 1 0] {0 1}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF replica isn't configured to do AOF} {
|
|
|
|
$master config set appendonly yes
|
|
|
|
waitForBgrewriteaof $master
|
|
|
|
$replica config set appendonly no
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 1 0 0] {1 0}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF both local and replica got AOF enabled at runtime} {
|
|
|
|
$replica config set appendonly yes
|
|
|
|
waitForBgrewriteaof $replica
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$master waitaof 1 1 0] {1 1}
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF master sends PING after last write} {
|
|
|
|
$master config set repl-ping-replica-period 1
|
|
|
|
$master incr foo
|
|
|
|
after 1200 ;# wait for PING
|
|
|
|
$master get foo
|
|
|
|
assert_equal [$master waitaof 1 1 0] {1 1}
|
|
|
|
$master config set repl-ping-replica-period 10
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF master client didn't send any write command} {
|
|
|
|
$master config set repl-ping-replica-period 1
|
2024-03-21 05:56:59 -04:00
|
|
|
set client [redict_client -1]
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
after 1200 ;# wait for PING
|
|
|
|
assert_equal [$master waitaof 1 1 0] {1 1}
|
|
|
|
$client close
|
|
|
|
$master config set repl-ping-replica-period 10
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAITAOF master client didn't send any command} {
|
|
|
|
$master config set repl-ping-replica-period 1
|
2024-03-21 05:56:59 -04:00
|
|
|
set client [redict [srv -1 "host"] [srv -1 "port"] 0 $::tls]
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
after 1200 ;# wait for PING
|
|
|
|
assert_equal [$master waitaof 1 1 0] {1 1}
|
|
|
|
$client close
|
|
|
|
$master config set repl-ping-replica-period 10
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach fsync {no everysec always} {
|
|
|
|
test "WAITAOF when replica switches between masters, fsync: $fsync" {
|
|
|
|
# test a case where a replica is moved from one master to the other
|
|
|
|
# between two replication streams with different offsets that should
|
|
|
|
# not be mixed. done to smoke-test race conditions with bio thread.
|
|
|
|
start_server {overrides {appendonly {yes} auto-aof-rewrite-percentage {0}}} {
|
|
|
|
start_server {overrides {appendonly {yes} auto-aof-rewrite-percentage {0}}} {
|
|
|
|
set master2 [srv -1 client]
|
|
|
|
set master2_host [srv -1 host]
|
|
|
|
set master2_port [srv -1 port]
|
|
|
|
set replica2 [srv 0 client]
|
|
|
|
set replica2_host [srv 0 host]
|
|
|
|
set replica2_port [srv 0 port]
|
|
|
|
set replica2_pid [srv 0 pid]
|
|
|
|
|
|
|
|
$replica2 replicaof $master2_host $master2_port
|
|
|
|
wait_for_ofs_sync $master2 $replica2
|
|
|
|
|
|
|
|
$master config set appendfsync $fsync
|
|
|
|
$master2 config set appendfsync $fsync
|
|
|
|
$replica config set appendfsync $fsync
|
|
|
|
$replica2 config set appendfsync $fsync
|
|
|
|
if {$fsync eq "no"} {
|
|
|
|
after 2000 ;# wait for any previous fsync to finish
|
|
|
|
# can't afford "no" on the masters
|
|
|
|
$master config set appendfsync always
|
|
|
|
$master2 config set appendfsync always
|
|
|
|
} elseif {$fsync eq "everysec"} {
|
|
|
|
after 990 ;# hoping to hit a race
|
|
|
|
}
|
|
|
|
|
|
|
|
# add some writes and block a client on each master
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client -3]
|
|
|
|
set rd2 [redict_deferring_client -1]
|
Implementing the WAITAOF command (issue #10505) (#11713)
Implementing the WAITAOF functionality which would allow the user to
block until a specified number of Redises have fsynced all previous write
commands to the AOF.
Syntax: `WAITAOF <num_local> <num_replicas> <timeout>`
Response: Array containing two elements: num_local, num_replicas
num_local is always either 0 or 1 representing the local AOF on the master.
num_replicas is the number of replicas that acknowledged the a replication
offset of the last write being fsynced to the AOF.
Returns an error when called on replicas, or when called with non-zero
num_local on a master with AOF disabled, in all other cases the response
just contains number of fsync copies.
Main changes:
* Added code to keep track of replication offsets that are confirmed to have
been fsynced to disk.
* Keep advancing master_repl_offset even when replication is disabled (and
there's no replication backlog, only if there's an AOF enabled).
This way we can use this command and it's mechanisms even when replication
is disabled.
* Extend REPLCONF ACK to `REPLCONF ACK <ofs> FACK <ofs>`, the FACK
will be appended only if there's an AOF on the replica, and already ignored on
old masters (thus backwards compatible)
* WAIT now no longer wait for the replication offset after your last command, but
rather the replication offset after your last write (or read command that caused
propagation, e.g. lazy expiry).
Unrelated changes:
* WAIT command respects CLIENT_DENY_BLOCKING (not just CLIENT_MULTI)
Implementation details:
* Add an atomic var named `fsynced_reploff_pending` that's updated
(usually by the bio thread) and later copied to the main `fsynced_reploff`
variable (only if the AOF base file exists).
I.e. during the initial AOF rewrite it will not be used as the fsynced offset
since the AOF base is still missing.
* Replace close+fsync bio job with new BIO_CLOSE_AOF (AOF specific)
job that will also update fsync offset the field.
* Handle all AOF jobs (BIO_CLOSE_AOF, BIO_AOF_FSYNC) in the same bio
worker thread, to impose ordering on their execution. This solves a
race condition where a job could set `fsynced_reploff_pending` to a higher
value than another pending fsync job, resulting in indicating an offset
for which parts of the data have not yet actually been fsynced.
Imposing an ordering on the jobs guarantees that fsync jobs are executed
in increasing order of replication offset.
* Drain bio jobs when switching `appendfsync` to "always"
This should prevent a write race between updates to `fsynced_reploff_pending`
in the main thread (`flushAppendOnlyFile` when set to ALWAYS fsync), and
those done in the bio thread.
* Drain the pending fsync when starting over a new AOF to avoid race conditions
with the previous AOF offsets overriding the new one (e.g. after switching to
replicate from a new master).
* Make sure to update the fsynced offset at the end of the initial AOF rewrite.
a must in case there are no additional writes that trigger a periodic fsync,
specifically for a replica that does a full sync.
Limitations:
It is possible to write a module and a Lua script that propagate to the AOF and doesn't
propagate to the replication stream. see REDISMODULE_ARGV_NO_REPLICAS and luaRedisSetReplCommand.
These features are incompatible with the WAITAOF command, and can result
in two bad cases. The scenario is that the user executes command that only
propagates to AOF, and then immediately
issues a WAITAOF, and there's no further writes on the replication stream after that.
1. if the the last thing that happened on the replication stream is a PING
(which increased the replication offset but won't trigger an fsync on the replica),
then the client would hang forever (will wait for an fack that the replica will never
send sine it doesn't trigger any fsyncs).
2. if the last thing that happened is a write command that got propagated properly,
then WAITAOF will be released immediately, without waiting for an fsync (since
the offset didn't change)
Refactoring:
* Plumbing to allow bio worker to handle multiple job types
This introduces infrastructure necessary to allow BIO workers to
not have a 1-1 mapping of worker to job-type. This allows in the
future to assign multiple job types to a single worker, either as
a performance/resource optimization, or as a way of enforcing
ordering between specific classes of jobs.
Co-authored-by: Oran Agra <oran@redislabs.com>
2023-03-14 14:26:21 -04:00
|
|
|
$rd set boo 11
|
|
|
|
$rd2 set boo 22
|
|
|
|
$rd read
|
|
|
|
$rd2 read
|
|
|
|
$rd waitaof 1 1 0
|
|
|
|
$rd2 waitaof 1 1 0
|
|
|
|
|
|
|
|
if {$fsync eq "no"} {
|
|
|
|
# since appendfsync is disabled in the replicas, the client
|
|
|
|
# will get released only with full sync
|
|
|
|
wait_for_blocked_client -1
|
|
|
|
wait_for_blocked_client -3
|
|
|
|
}
|
|
|
|
# switch between the two replicas
|
|
|
|
$replica2 replicaof $master_host $master_port
|
|
|
|
$replica replicaof $master2_host $master2_port
|
|
|
|
assert_equal [$rd read] {1 1}
|
|
|
|
assert_equal [$rd2 read] {1 1}
|
|
|
|
$rd close
|
|
|
|
$rd2 close
|
|
|
|
|
|
|
|
assert_equal [$replica get boo] 22
|
|
|
|
assert_equal [$replica2 get boo] 11
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-03-15 12:16:16 -04:00
|
|
|
|
|
|
|
start_server {tags {"failover external:skip"}} {
|
|
|
|
start_server {} {
|
|
|
|
start_server {} {
|
|
|
|
set master [srv 0 client]
|
|
|
|
set master_host [srv 0 host]
|
|
|
|
set master_port [srv 0 port]
|
|
|
|
|
|
|
|
set replica1 [srv -1 client]
|
|
|
|
set replica1_pid [srv -1 pid]
|
|
|
|
|
|
|
|
set replica2 [srv -2 client]
|
|
|
|
|
|
|
|
test {setup replication for following tests} {
|
|
|
|
$replica1 replicaof $master_host $master_port
|
|
|
|
$replica2 replicaof $master_host $master_port
|
|
|
|
wait_for_sync $replica1
|
|
|
|
wait_for_sync $replica2
|
|
|
|
}
|
|
|
|
|
|
|
|
test {WAIT and WAITAOF replica multiple clients unblock - reuse last result} {
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client]
|
|
|
|
set rd2 [redict_deferring_client]
|
2023-03-15 12:16:16 -04:00
|
|
|
|
|
|
|
$master config set appendonly yes
|
|
|
|
$replica1 config set appendonly yes
|
|
|
|
$replica2 config set appendonly yes
|
|
|
|
|
|
|
|
$master config set appendfsync always
|
|
|
|
$replica1 config set appendfsync no
|
|
|
|
$replica2 config set appendfsync no
|
|
|
|
|
|
|
|
waitForBgrewriteaof $master
|
|
|
|
waitForBgrewriteaof $replica1
|
|
|
|
waitForBgrewriteaof $replica2
|
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
pause_process $replica1_pid
|
2023-03-15 12:16:16 -04:00
|
|
|
|
|
|
|
$rd incr foo
|
|
|
|
$rd read
|
|
|
|
$rd waitaof 0 1 0
|
|
|
|
|
|
|
|
# rd2 has a newer repl_offset
|
|
|
|
$rd2 incr foo
|
|
|
|
$rd2 read
|
|
|
|
$rd2 wait 2 0
|
|
|
|
|
|
|
|
wait_for_blocked_clients_count 2
|
|
|
|
|
Attempt to solve MacOS CI issues in GH Actions (#12013)
The MacOS CI in github actions often hangs without any logs. GH argues that
it's due to resource utilization, either running out of disk space, memory, or CPU
starvation, and thus the runner is terminated.
This PR contains multiple attempts to resolve this:
1. introducing pause_process instead of SIGSTOP, which waits for the process
to stop before resuming the test, possibly resolving race conditions in some tests,
this was a suspect since there was one test that could result in an infinite loop in that
case, in practice this didn't help, but still a good idea to keep.
2. disable the `save` config in many tests that don't need it, specifically ones that use
heavy writes and could create large files.
3. change the `populate` proc to use short pipeline rather than an infinite one.
4. use `--clients 1` in the macos CI so that we don't risk running multiple resource
demanding tests in parallel.
5. enable `--verbose` to be repeated to elevate verbosity and print more info to stdout
when a test or a server starts.
2023-04-12 02:19:21 -04:00
|
|
|
resume_process $replica1_pid
|
2023-03-15 12:16:16 -04:00
|
|
|
|
|
|
|
# WAIT will unblock the client first.
|
|
|
|
assert_equal [$rd2 read] {2}
|
|
|
|
|
|
|
|
# Make $replica1 catch up the repl_aof_off, then WAITAOF will unblock the client.
|
|
|
|
$replica1 config set appendfsync always
|
|
|
|
$master incr foo
|
|
|
|
assert_equal [$rd read] {1 1}
|
|
|
|
|
|
|
|
$rd ping
|
|
|
|
assert_equal [$rd read] {PONG}
|
|
|
|
$rd2 ping
|
|
|
|
assert_equal [$rd2 read] {PONG}
|
|
|
|
|
|
|
|
$rd close
|
|
|
|
$rd2 close
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|