2024-03-21 09:30:47 -04:00
|
|
|
# SPDX-FileCopyrightText: 2024 Redict Contributors
|
|
|
|
# SPDX-FileCopyrightText: 2024 Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
#
|
|
|
|
# SPDX-License-Identifier: BSD-3-Clause
|
2024-03-21 15:11:44 -04:00
|
|
|
# SPDX-License-Identifier: LGPL-3.0-only
|
2024-03-21 09:30:47 -04:00
|
|
|
|
2020-12-31 09:53:43 -05:00
|
|
|
proc cmdstat {cmd} {
|
|
|
|
return [cmdrstat $cmd r]
|
|
|
|
}
|
|
|
|
|
|
|
|
proc errorstat {cmd} {
|
|
|
|
return [errorrstat $cmd r]
|
|
|
|
}
|
|
|
|
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
proc latency_percentiles_usec {cmd} {
|
|
|
|
return [latencyrstat_percentiles $cmd r]
|
|
|
|
}
|
|
|
|
|
2021-06-09 08:13:24 -04:00
|
|
|
start_server {tags {"info" "external:skip"}} {
|
2020-12-31 09:53:43 -05:00
|
|
|
start_server {} {
|
|
|
|
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
test {latencystats: disable/enable} {
|
|
|
|
r config resetstat
|
|
|
|
r CONFIG SET latency-tracking no
|
|
|
|
r set a b
|
|
|
|
assert_match {} [latency_percentiles_usec set]
|
|
|
|
r CONFIG SET latency-tracking yes
|
|
|
|
r set a b
|
2022-01-09 20:04:18 -05:00
|
|
|
assert_match {*p50=*,p99=*,p99.9=*} [latency_percentiles_usec set]
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
r config resetstat
|
|
|
|
assert_match {} [latency_percentiles_usec set]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {latencystats: configure percentiles} {
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [latency_percentiles_usec set]
|
|
|
|
r CONFIG SET latency-tracking yes
|
|
|
|
r SET a b
|
|
|
|
r GET a
|
2022-01-09 20:04:18 -05:00
|
|
|
assert_match {*p50=*,p99=*,p99.9=*} [latency_percentiles_usec set]
|
|
|
|
assert_match {*p50=*,p99=*,p99.9=*} [latency_percentiles_usec get]
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
r CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"
|
2022-01-09 20:04:18 -05:00
|
|
|
assert_match [r config get latency-tracking-info-percentiles] {latency-tracking-info-percentiles {0 50 100}}
|
|
|
|
assert_match {*p0=*,p50=*,p100=*} [latency_percentiles_usec set]
|
|
|
|
assert_match {*p0=*,p50=*,p100=*} [latency_percentiles_usec get]
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
r config resetstat
|
|
|
|
assert_match {} [latency_percentiles_usec set]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {latencystats: bad configure percentiles} {
|
|
|
|
r config resetstat
|
|
|
|
set configlatencyline [r config get latency-tracking-info-percentiles]
|
|
|
|
catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 a"} e
|
|
|
|
assert_match {ERR CONFIG SET failed*} $e
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
|
|
|
|
catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 101.0"} e
|
|
|
|
assert_match {ERR CONFIG SET failed*} $e
|
|
|
|
assert_equal [s total_error_replies] 2
|
|
|
|
assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {latencystats: blocking commands} {
|
|
|
|
r config resetstat
|
|
|
|
r CONFIG SET latency-tracking yes
|
|
|
|
r CONFIG SET latency-tracking-info-percentiles "50.0 99.0 99.9"
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client]
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
r del list1{t}
|
|
|
|
|
|
|
|
$rd blpop list1{t} 0
|
|
|
|
wait_for_blocked_client
|
|
|
|
r lpush list1{t} a
|
|
|
|
assert_equal [$rd read] {list1{t} a}
|
|
|
|
$rd blpop list1{t} 0
|
|
|
|
wait_for_blocked_client
|
|
|
|
r lpush list1{t} b
|
|
|
|
assert_equal [$rd read] {list1{t} b}
|
2022-01-09 20:04:18 -05:00
|
|
|
assert_match {*p50=*,p99=*,p99.9=*} [latency_percentiles_usec blpop]
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
$rd close
|
|
|
|
}
|
|
|
|
|
2022-01-17 05:32:32 -05:00
|
|
|
test {latencystats: subcommands} {
|
|
|
|
r config resetstat
|
|
|
|
r CONFIG SET latency-tracking yes
|
|
|
|
r CONFIG SET latency-tracking-info-percentiles "50.0 99.0 99.9"
|
|
|
|
r client id
|
|
|
|
|
|
|
|
assert_match {*p50=*,p99=*,p99.9=*} [latency_percentiles_usec client\\|id]
|
|
|
|
assert_match {*p50=*,p99=*,p99.9=*} [latency_percentiles_usec config\\|set]
|
|
|
|
}
|
|
|
|
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
test {latencystats: measure latency} {
|
|
|
|
r config resetstat
|
|
|
|
r CONFIG SET latency-tracking yes
|
|
|
|
r CONFIG SET latency-tracking-info-percentiles "50.0"
|
|
|
|
r DEBUG sleep 0.05
|
|
|
|
r SET k v
|
|
|
|
set latencystatline_debug [latency_percentiles_usec debug]
|
|
|
|
set latencystatline_set [latency_percentiles_usec set]
|
2022-01-09 20:04:18 -05:00
|
|
|
regexp "p50=(.+\..+)" $latencystatline_debug -> p50_debug
|
|
|
|
regexp "p50=(.+\..+)" $latencystatline_set -> p50_set
|
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description
The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
**( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
to calculate aggregate metrics .
By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
- `CONFIG SET latency-tracking no`
By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`
## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
vs this branch.
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )
## `INFO LATENCYSTATS` exposition format
- Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....`
## `LATENCY HISTOGRAM [command ...]` exposition format
Return a cumulative distribution of latencies in the format of a histogram for the specified command names.
The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets
We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }`
Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 07:01:05 -05:00
|
|
|
assert {$p50_debug >= 50000}
|
|
|
|
assert {$p50_set >= 0}
|
|
|
|
assert {$p50_debug >= $p50_set}
|
|
|
|
} {} {needs:debug}
|
|
|
|
|
2020-12-31 09:53:43 -05:00
|
|
|
test {errorstats: failed call authentication error} {
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
catch {r auth k} e
|
|
|
|
assert_match {ERR AUTH*} $e
|
|
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {errorstats: failed call within MULTI/EXEC} {
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
r multi
|
|
|
|
r set a b
|
|
|
|
r auth a
|
|
|
|
catch {r exec} e
|
|
|
|
assert_match {ERR AUTH*} $e
|
|
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat set]
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth]
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat exec]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
|
|
|
|
# MULTI/EXEC command errors should still be pinpointed to him
|
|
|
|
catch {r exec} e
|
|
|
|
assert_match {ERR EXEC without MULTI} $e
|
|
|
|
assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat exec]
|
|
|
|
assert_match {*count=2*} [errorstat ERR]
|
|
|
|
assert_equal [s total_error_replies] 2
|
|
|
|
}
|
|
|
|
|
|
|
|
test {errorstats: failed call within LUA} {
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
catch {r eval {redis.pcall('XGROUP', 'CREATECONSUMER', 's1', 'mygroup', 'consumer') return } 0} e
|
|
|
|
assert_match {*count=1*} [errorstat ERR]
|
Treat subcommands as commands (#9504)
## Intro
The purpose is to allow having different flags/ACL categories for
subcommands (Example: CONFIG GET is ok-loading but CONFIG SET isn't)
We create a small command table for every command that has subcommands
and each subcommand has its own flags, etc. (same as a "regular" command)
This commit also unites the Redis and the Sentinel command tables
## Affected commands
CONFIG
Used to have "admin ok-loading ok-stale no-script"
Changes:
1. Dropped "ok-loading" in all except GET (this doesn't change behavior since
there were checks in the code doing that)
XINFO
Used to have "read-only random"
Changes:
1. Dropped "random" in all except CONSUMERS
XGROUP
Used to have "write use-memory"
Changes:
1. Dropped "use-memory" in all except CREATE and CREATECONSUMER
COMMAND
No changes.
MEMORY
Used to have "random read-only"
Changes:
1. Dropped "random" in PURGE and USAGE
ACL
Used to have "admin no-script ok-loading ok-stale"
Changes:
1. Dropped "admin" in WHOAMI, GENPASS, and CAT
LATENCY
No changes.
MODULE
No changes.
SLOWLOG
Used to have "admin random ok-loading ok-stale"
Changes:
1. Dropped "random" in RESET
OBJECT
Used to have "read-only random"
Changes:
1. Dropped "random" in ENCODING and REFCOUNT
SCRIPT
Used to have "may-replicate no-script"
Changes:
1. Dropped "may-replicate" in all except FLUSH and LOAD
CLIENT
Used to have "admin no-script random ok-loading ok-stale"
Changes:
1. Dropped "random" in all except INFO and LIST
2. Dropped "admin" in ID, TRACKING, CACHING, GETREDIR, INFO, SETNAME, GETNAME, and REPLY
STRALGO
No changes.
PUBSUB
No changes.
CLUSTER
Changes:
1. Dropped "admin in countkeysinslots, getkeysinslot, info, nodes, keyslot, myid, and slots
SENTINEL
No changes.
(note that DEBUG also fits, but we decided not to convert it since it's for
debugging and anyway undocumented)
## New sub-command
This commit adds another element to the per-command output of COMMAND,
describing the list of subcommands, if any (in the same structure as "regular" commands)
Also, it adds a new subcommand:
```
COMMAND LIST [FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>)]
```
which returns a set of all commands (unless filters), but excluding subcommands.
## Module API
A new module API, RM_CreateSubcommand, was added, in order to allow
module writer to define subcommands
## ACL changes:
1. Now, that each subcommand is actually a command, each has its own ACL id.
2. The old mechanism of allowed_subcommands is redundant
(blocking/allowing a subcommand is the same as blocking/allowing a regular command),
but we had to keep it, to support the widespread usage of allowed_subcommands
to block commands with certain args, that aren't subcommands (e.g. "-select +select|0").
3. I have renamed allowed_subcommands to allowed_firstargs to emphasize the difference.
4. Because subcommands are commands in ACL too, you can now use "-" to block subcommands
(e.g. "+client -client|kill"), which wasn't possible in the past.
5. It is also possible to use the allowed_firstargs mechanism with subcommand.
For example: `+config -config|set +config|set|loglevel` will block all CONFIG SET except
for setting the log level.
6. All of the ACL changes above required some amount of refactoring.
## Misc
1. There are two approaches: Either each subcommand has its own function or all
subcommands use the same function, determining what to do according to argv[0].
For now, I took the former approaches only with CONFIG and COMMAND,
while other commands use the latter approach (for smaller blamelog diff).
2. Deleted memoryGetKeys: It is no longer needed because MEMORY USAGE now uses the "range" key spec.
4. Bugfix: GETNAME was missing from CLIENT's help message.
5. Sentinel and Redis now use the same table, with the same function pointer.
Some commands have a different implementation in Sentinel, so we redirect
them (these are ROLE, PUBLISH, and INFO).
6. Command stats now show the stats per subcommand (e.g. instead of stats just
for "config" you will have stats for "config|set", "config|get", etc.)
7. It is now possible to use COMMAND directly on subcommands:
COMMAND INFO CONFIG|GET (The pipeline syntax was inspired from ACL, and
can be used in functions lookupCommandBySds and lookupCommandByCString)
8. STRALGO is now a container command (has "help")
## Breaking changes:
1. Command stats now show the stats per subcommand (see (5) above)
2021-10-20 04:52:57 -04:00
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat xgroup\\|createconsumer]
|
2020-12-31 09:53:43 -05:00
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat eval]
|
|
|
|
|
|
|
|
# EVAL command errors should still be pinpointed to him
|
|
|
|
catch {r eval a} e
|
|
|
|
assert_match {ERR wrong*} $e
|
|
|
|
assert_match {*calls=1,*,rejected_calls=1,failed_calls=0} [cmdstat eval]
|
|
|
|
assert_match {*count=2*} [errorstat ERR]
|
|
|
|
assert_equal [s total_error_replies] 2
|
|
|
|
}
|
|
|
|
|
2021-02-24 11:45:13 -05:00
|
|
|
test {errorstats: failed call NOSCRIPT error} {
|
|
|
|
r config resetstat
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
assert_match {} [errorstat NOSCRIPT]
|
|
|
|
catch {r evalsha NotValidShaSUM 0} e
|
|
|
|
assert_match {NOSCRIPT*} $e
|
|
|
|
assert_match {*count=1*} [errorstat NOSCRIPT]
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat evalsha]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat NOSCRIPT]
|
|
|
|
}
|
|
|
|
|
2020-12-31 09:53:43 -05:00
|
|
|
test {errorstats: failed call NOGROUP error} {
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat NOGROUP]
|
|
|
|
r del mystream
|
|
|
|
r XADD mystream * f v
|
|
|
|
catch {r XGROUP CREATECONSUMER mystream mygroup consumer} e
|
|
|
|
assert_match {NOGROUP*} $e
|
|
|
|
assert_match {*count=1*} [errorstat NOGROUP]
|
Treat subcommands as commands (#9504)
## Intro
The purpose is to allow having different flags/ACL categories for
subcommands (Example: CONFIG GET is ok-loading but CONFIG SET isn't)
We create a small command table for every command that has subcommands
and each subcommand has its own flags, etc. (same as a "regular" command)
This commit also unites the Redis and the Sentinel command tables
## Affected commands
CONFIG
Used to have "admin ok-loading ok-stale no-script"
Changes:
1. Dropped "ok-loading" in all except GET (this doesn't change behavior since
there were checks in the code doing that)
XINFO
Used to have "read-only random"
Changes:
1. Dropped "random" in all except CONSUMERS
XGROUP
Used to have "write use-memory"
Changes:
1. Dropped "use-memory" in all except CREATE and CREATECONSUMER
COMMAND
No changes.
MEMORY
Used to have "random read-only"
Changes:
1. Dropped "random" in PURGE and USAGE
ACL
Used to have "admin no-script ok-loading ok-stale"
Changes:
1. Dropped "admin" in WHOAMI, GENPASS, and CAT
LATENCY
No changes.
MODULE
No changes.
SLOWLOG
Used to have "admin random ok-loading ok-stale"
Changes:
1. Dropped "random" in RESET
OBJECT
Used to have "read-only random"
Changes:
1. Dropped "random" in ENCODING and REFCOUNT
SCRIPT
Used to have "may-replicate no-script"
Changes:
1. Dropped "may-replicate" in all except FLUSH and LOAD
CLIENT
Used to have "admin no-script random ok-loading ok-stale"
Changes:
1. Dropped "random" in all except INFO and LIST
2. Dropped "admin" in ID, TRACKING, CACHING, GETREDIR, INFO, SETNAME, GETNAME, and REPLY
STRALGO
No changes.
PUBSUB
No changes.
CLUSTER
Changes:
1. Dropped "admin in countkeysinslots, getkeysinslot, info, nodes, keyslot, myid, and slots
SENTINEL
No changes.
(note that DEBUG also fits, but we decided not to convert it since it's for
debugging and anyway undocumented)
## New sub-command
This commit adds another element to the per-command output of COMMAND,
describing the list of subcommands, if any (in the same structure as "regular" commands)
Also, it adds a new subcommand:
```
COMMAND LIST [FILTERBY (MODULE <module-name>|ACLCAT <cat>|PATTERN <pattern>)]
```
which returns a set of all commands (unless filters), but excluding subcommands.
## Module API
A new module API, RM_CreateSubcommand, was added, in order to allow
module writer to define subcommands
## ACL changes:
1. Now, that each subcommand is actually a command, each has its own ACL id.
2. The old mechanism of allowed_subcommands is redundant
(blocking/allowing a subcommand is the same as blocking/allowing a regular command),
but we had to keep it, to support the widespread usage of allowed_subcommands
to block commands with certain args, that aren't subcommands (e.g. "-select +select|0").
3. I have renamed allowed_subcommands to allowed_firstargs to emphasize the difference.
4. Because subcommands are commands in ACL too, you can now use "-" to block subcommands
(e.g. "+client -client|kill"), which wasn't possible in the past.
5. It is also possible to use the allowed_firstargs mechanism with subcommand.
For example: `+config -config|set +config|set|loglevel` will block all CONFIG SET except
for setting the log level.
6. All of the ACL changes above required some amount of refactoring.
## Misc
1. There are two approaches: Either each subcommand has its own function or all
subcommands use the same function, determining what to do according to argv[0].
For now, I took the former approaches only with CONFIG and COMMAND,
while other commands use the latter approach (for smaller blamelog diff).
2. Deleted memoryGetKeys: It is no longer needed because MEMORY USAGE now uses the "range" key spec.
4. Bugfix: GETNAME was missing from CLIENT's help message.
5. Sentinel and Redis now use the same table, with the same function pointer.
Some commands have a different implementation in Sentinel, so we redirect
them (these are ROLE, PUBLISH, and INFO).
6. Command stats now show the stats per subcommand (e.g. instead of stats just
for "config" you will have stats for "config|set", "config|get", etc.)
7. It is now possible to use COMMAND directly on subcommands:
COMMAND INFO CONFIG|GET (The pipeline syntax was inspired from ACL, and
can be used in functions lookupCommandBySds and lookupCommandByCString)
8. STRALGO is now a container command (has "help")
## Breaking changes:
1. Command stats now show the stats per subcommand (see (5) above)
2021-10-20 04:52:57 -04:00
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat xgroup\\|createconsumer]
|
2020-12-31 09:53:43 -05:00
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat NOGROUP]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {errorstats: rejected call unknown command} {
|
|
|
|
r config resetstat
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
catch {r asdf} e
|
|
|
|
assert_match {ERR unknown*} $e
|
|
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {errorstats: rejected call within MULTI/EXEC} {
|
|
|
|
r config resetstat
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
r multi
|
|
|
|
catch {r set} e
|
2022-01-23 03:05:06 -05:00
|
|
|
assert_match {ERR wrong number of arguments for 'set' command} $e
|
2020-12-31 09:53:43 -05:00
|
|
|
catch {r exec} e
|
|
|
|
assert_match {EXECABORT*} $e
|
|
|
|
assert_match {*count=1*} [errorstat ERR]
|
2021-08-06 22:27:24 -04:00
|
|
|
assert_match {*count=1*} [errorstat EXECABORT]
|
|
|
|
assert_equal [s total_error_replies] 2
|
2020-12-31 09:53:43 -05:00
|
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat multi]
|
2021-08-06 22:27:24 -04:00
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat exec]
|
|
|
|
assert_equal [s total_error_replies] 2
|
2020-12-31 09:53:43 -05:00
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
}
|
|
|
|
|
|
|
|
test {errorstats: rejected call due to wrong arity} {
|
|
|
|
r config resetstat
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
assert_match {} [errorstat ERR]
|
|
|
|
catch {r set k} e
|
2022-01-23 03:05:06 -05:00
|
|
|
assert_match {ERR wrong number of arguments for 'set' command} $e
|
2020-12-31 09:53:43 -05:00
|
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
|
|
# ensure that after a rejected command, valid ones are counted properly
|
|
|
|
r set k1 v1
|
|
|
|
r set k2 v2
|
|
|
|
assert_match {calls=2,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
}
|
|
|
|
|
|
|
|
test {errorstats: rejected call by OOM error} {
|
|
|
|
r config resetstat
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
assert_match {} [errorstat OOM]
|
|
|
|
r config set maxmemory 1
|
|
|
|
catch {r set a b} e
|
|
|
|
assert_match {OOM*} $e
|
|
|
|
assert_match {*count=1*} [errorstat OOM]
|
|
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat OOM]
|
2022-02-21 04:20:41 -05:00
|
|
|
r config set maxmemory 0
|
2020-12-31 09:53:43 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
test {errorstats: rejected call by authorization error} {
|
|
|
|
r config resetstat
|
|
|
|
assert_equal [s total_error_replies] 0
|
|
|
|
assert_match {} [errorstat NOPERM]
|
|
|
|
r ACL SETUSER alice on >p1pp0 ~cached:* +get +info +config
|
|
|
|
r auth alice p1pp0
|
|
|
|
catch {r set a b} e
|
|
|
|
assert_match {NOPERM*} $e
|
|
|
|
assert_match {*count=1*} [errorstat NOPERM]
|
|
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
r config resetstat
|
|
|
|
assert_match {} [errorstat NOPERM]
|
2022-02-21 04:20:41 -05:00
|
|
|
r auth default ""
|
2020-12-31 09:53:43 -05:00
|
|
|
}
|
2022-02-21 04:20:41 -05:00
|
|
|
|
|
|
|
test {errorstats: blocking commands} {
|
|
|
|
r config resetstat
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd [redict_deferring_client]
|
2022-02-21 04:20:41 -05:00
|
|
|
$rd client id
|
|
|
|
set rd_id [$rd read]
|
|
|
|
r del list1{t}
|
|
|
|
|
|
|
|
$rd blpop list1{t} 0
|
|
|
|
wait_for_blocked_client
|
|
|
|
r client unblock $rd_id error
|
|
|
|
assert_error {UNBLOCKED*} {$rd read}
|
|
|
|
assert_match {*count=1*} [errorstat UNBLOCKED]
|
|
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat blpop]
|
|
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
$rd close
|
|
|
|
}
|
|
|
|
|
2024-03-19 02:18:22 -04:00
|
|
|
test {errorstats: limit errors will not increase indefinitely} {
|
|
|
|
r config resetstat
|
|
|
|
for {set j 1} {$j <= 1100} {incr j} {
|
|
|
|
assert_error "$j my error message" {
|
|
|
|
r eval {return redis.error_reply(string.format('%s my error message', ARGV[1]))} 0 $j
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
assert_equal [count_log_message 0 "Errorstats stopped adding new errors"] 1
|
|
|
|
assert_equal [count_log_message 0 "Current errors code list"] 1
|
|
|
|
assert_equal "count=1" [errorstat ERRORSTATS_DISABLED]
|
|
|
|
|
|
|
|
# Since we currently have no metrics exposed for server.errors, we use lazyfree
|
|
|
|
# to verify that we only have 128 errors.
|
|
|
|
wait_for_condition 50 100 {
|
|
|
|
[s lazyfreed_objects] eq 128
|
|
|
|
} else {
|
|
|
|
fail "errorstats resetstat lazyfree error"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-12 13:13:15 -04:00
|
|
|
test {stats: eventloop metrics} {
|
|
|
|
set info1 [r info stats]
|
|
|
|
set cycle1 [getInfoProperty $info1 eventloop_cycles]
|
|
|
|
set el_sum1 [getInfoProperty $info1 eventloop_duration_sum]
|
|
|
|
set cmd_sum1 [getInfoProperty $info1 eventloop_duration_cmd_sum]
|
|
|
|
assert_morethan $cycle1 0
|
|
|
|
assert_morethan $el_sum1 0
|
|
|
|
assert_morethan $cmd_sum1 0
|
|
|
|
after 110 ;# default hz is 10, wait for a cron tick.
|
|
|
|
set info2 [r info stats]
|
|
|
|
set cycle2 [getInfoProperty $info2 eventloop_cycles]
|
|
|
|
set el_sum2 [getInfoProperty $info2 eventloop_duration_sum]
|
|
|
|
set cmd_sum2 [getInfoProperty $info2 eventloop_duration_cmd_sum]
|
2023-07-05 02:32:30 -04:00
|
|
|
if {$::verbose} { puts "eventloop metrics cycle1: $cycle1, cycle2: $cycle2" }
|
2023-05-12 13:13:15 -04:00
|
|
|
assert_morethan $cycle2 $cycle1
|
|
|
|
assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
|
2023-07-05 02:32:30 -04:00
|
|
|
if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
|
2023-05-12 13:13:15 -04:00
|
|
|
assert_morethan $el_sum2 $el_sum1
|
2023-07-05 02:32:30 -04:00
|
|
|
assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance
|
|
|
|
if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
|
2023-05-12 13:13:15 -04:00
|
|
|
assert_morethan $cmd_sum2 $cmd_sum1
|
2023-07-05 02:32:30 -04:00
|
|
|
assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
|
2023-05-12 13:13:15 -04:00
|
|
|
}
|
2023-07-05 02:32:30 -04:00
|
|
|
|
2023-05-12 13:13:15 -04:00
|
|
|
test {stats: instantaneous metrics} {
|
|
|
|
r config resetstat
|
2023-11-06 11:07:14 -05:00
|
|
|
set retries 0
|
|
|
|
for {set retries 1} {$retries < 4} {incr retries} {
|
|
|
|
after 1600 ;# hz is 10, wait for 16 cron tick so that sample array is fulfilled
|
|
|
|
set value [s instantaneous_eventloop_cycles_per_sec]
|
|
|
|
if {$value > 0} break
|
|
|
|
}
|
|
|
|
|
|
|
|
assert_lessthan $retries 4
|
2023-07-05 02:32:30 -04:00
|
|
|
if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_cycles_per_sec: $value" }
|
2023-05-12 13:13:15 -04:00
|
|
|
assert_morethan $value 0
|
2023-11-06 11:07:14 -05:00
|
|
|
assert_lessthan $value [expr $retries*15] ;# default hz is 10
|
2023-05-12 13:13:15 -04:00
|
|
|
set value [s instantaneous_eventloop_duration_usec]
|
2023-07-05 02:32:30 -04:00
|
|
|
if {$::verbose} { puts "instantaneous metrics instantaneous_eventloop_duration_usec: $value" }
|
2023-05-12 13:13:15 -04:00
|
|
|
assert_morethan $value 0
|
2023-11-06 11:07:14 -05:00
|
|
|
assert_lessthan $value [expr $retries*22000] ;# default hz is 10, so duration < 1000 / 10, allow some tolerance
|
2023-05-12 13:13:15 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
test {stats: debug metrics} {
|
|
|
|
# make sure debug info is hidden
|
|
|
|
set info [r info]
|
|
|
|
assert_equal [getInfoProperty $info eventloop_duration_aof_sum] {}
|
|
|
|
set info_all [r info all]
|
|
|
|
assert_equal [getInfoProperty $info_all eventloop_duration_aof_sum] {}
|
|
|
|
|
|
|
|
set info1 [r info debug]
|
|
|
|
|
|
|
|
set aof1 [getInfoProperty $info1 eventloop_duration_aof_sum]
|
|
|
|
assert {$aof1 >= 0}
|
|
|
|
set cron1 [getInfoProperty $info1 eventloop_duration_cron_sum]
|
|
|
|
assert {$cron1 > 0}
|
|
|
|
set cycle_max1 [getInfoProperty $info1 eventloop_cmd_per_cycle_max]
|
|
|
|
assert {$cycle_max1 > 0}
|
|
|
|
set duration_max1 [getInfoProperty $info1 eventloop_duration_max]
|
|
|
|
assert {$duration_max1 > 0}
|
|
|
|
|
|
|
|
after 110 ;# hz is 10, wait for a cron tick.
|
|
|
|
set info2 [r info debug]
|
|
|
|
|
|
|
|
set aof2 [getInfoProperty $info2 eventloop_duration_aof_sum]
|
|
|
|
assert {$aof2 >= $aof1} ;# AOF is disabled, we expect $aof2 == $aof1, but allow some tolerance.
|
|
|
|
set cron2 [getInfoProperty $info2 eventloop_duration_cron_sum]
|
|
|
|
assert_morethan $cron2 $cron1
|
|
|
|
set cycle_max2 [getInfoProperty $info2 eventloop_cmd_per_cycle_max]
|
|
|
|
assert {$cycle_max2 >= $cycle_max1}
|
|
|
|
set duration_max2 [getInfoProperty $info2 eventloop_duration_max]
|
|
|
|
assert {$duration_max2 >= $duration_max1}
|
|
|
|
}
|
|
|
|
|
2023-08-30 14:51:14 -04:00
|
|
|
test {stats: client input and output buffer limit disconnections} {
|
|
|
|
r config resetstat
|
|
|
|
set info [r info stats]
|
|
|
|
assert_equal [getInfoProperty $info client_query_buffer_limit_disconnections] {0}
|
|
|
|
assert_equal [getInfoProperty $info client_output_buffer_limit_disconnections] {0}
|
|
|
|
# set qbuf limit to minimum to test stat
|
|
|
|
set org_qbuf_limit [lindex [r config get client-query-buffer-limit] 1]
|
|
|
|
r config set client-query-buffer-limit 1048576
|
|
|
|
catch {r set key [string repeat a 1048576]}
|
|
|
|
set info [r info stats]
|
|
|
|
assert_equal [getInfoProperty $info client_query_buffer_limit_disconnections] {1}
|
|
|
|
r config set client-query-buffer-limit $org_qbuf_limit
|
|
|
|
# set outbuf limit to just 10 to test stat
|
|
|
|
set org_outbuf_limit [lindex [r config get client-output-buffer-limit] 1]
|
|
|
|
r config set client-output-buffer-limit "normal 10 0 0"
|
|
|
|
r set key [string repeat a 100000] ;# to trigger output buffer limit check this needs to be big
|
|
|
|
catch {r get key}
|
|
|
|
set info [r info stats]
|
|
|
|
assert_equal [getInfoProperty $info client_output_buffer_limit_disconnections] {1}
|
|
|
|
r config set client-output-buffer-limit $org_outbuf_limit
|
2023-09-01 07:15:11 -04:00
|
|
|
} {OK} {logreqres:skip} ;# same as obuf-limits.tcl, skip logreqres
|
2023-12-13 00:44:13 -05:00
|
|
|
|
|
|
|
test {clients: pubsub clients} {
|
|
|
|
set info [r info clients]
|
|
|
|
assert_equal [getInfoProperty $info pubsub_clients] {0}
|
2024-03-21 05:56:59 -04:00
|
|
|
set rd1 [redict_deferring_client]
|
|
|
|
set rd2 [redict_deferring_client]
|
2023-12-13 00:44:13 -05:00
|
|
|
# basic count
|
|
|
|
assert_equal {1} [ssubscribe $rd1 {chan1}]
|
|
|
|
assert_equal {1} [subscribe $rd2 {chan2}]
|
|
|
|
set info [r info clients]
|
|
|
|
assert_equal [getInfoProperty $info pubsub_clients] {2}
|
|
|
|
# unsubscribe non existing channel
|
|
|
|
assert_equal {1} [unsubscribe $rd2 {non-exist-chan}]
|
|
|
|
set info [r info clients]
|
|
|
|
assert_equal [getInfoProperty $info pubsub_clients] {2}
|
|
|
|
# count change when client unsubscribe all channels
|
|
|
|
assert_equal {0} [unsubscribe $rd2 {chan2}]
|
|
|
|
set info [r info clients]
|
|
|
|
assert_equal [getInfoProperty $info pubsub_clients] {1}
|
|
|
|
# non-pubsub clients should not be involved
|
|
|
|
assert_equal {0} [unsubscribe $rd2 {non-exist-chan}]
|
|
|
|
set info [r info clients]
|
|
|
|
assert_equal [getInfoProperty $info pubsub_clients] {1}
|
|
|
|
# close all clients
|
|
|
|
$rd1 close
|
|
|
|
$rd2 close
|
|
|
|
wait_for_condition 100 50 {
|
|
|
|
[getInfoProperty [r info clients] pubsub_clients] eq {0}
|
|
|
|
} else {
|
|
|
|
fail "pubsub clients did not clear"
|
|
|
|
}
|
|
|
|
}
|
Add metrics for WATCH (#12966)
Redis has some special commands that mark the client's state, such as
`subscribe` and `blpop`, which mark the client as `CLIENT_PUBSUB` or
`CLIENT_BLOCKED`, and we have metrics for the special use cases.
However, there are also other special commands, like `WATCH`, which
although do not have a specific flags, and should also be considered
stateful client types. For stateful clients, in many scenarios, the
connections cannot be shared in "connection pool", meaning connection
pool cannot be used. For example, whenever the `WATCH` command is
executed, a new connection is required to put the client into the "watch
state" because the watched keys are stored in the client.
If different business logic requires watching different keys, separate
connections must be used; otherwise, there will be contamination. This
also means that if a user's business heavily relies on the `WATCH`
command, a large number of connections will be required.
Recently we have encountered this situation in our platform, where some
users consume a significant number of connections when using Redis
because of `WATCH`.
I hope we can have a way to observe these special use cases and special
client connections. Here I add a few monitoring metrics:
1. `watching_clients` in `INFO` reply: The number of clients currently
in the "watching" state.
2. `total_watched_keys` in `INFO` reply: The total number of keys being
watched.
3. `watch` in `CLIENT LIST` reply: The number of keys each client is
currently watching.
2024-02-18 03:36:41 -05:00
|
|
|
|
|
|
|
test {clients: watching clients} {
|
2024-03-21 05:56:59 -04:00
|
|
|
set r2 [redict_client]
|
Add metrics for WATCH (#12966)
Redis has some special commands that mark the client's state, such as
`subscribe` and `blpop`, which mark the client as `CLIENT_PUBSUB` or
`CLIENT_BLOCKED`, and we have metrics for the special use cases.
However, there are also other special commands, like `WATCH`, which
although do not have a specific flags, and should also be considered
stateful client types. For stateful clients, in many scenarios, the
connections cannot be shared in "connection pool", meaning connection
pool cannot be used. For example, whenever the `WATCH` command is
executed, a new connection is required to put the client into the "watch
state" because the watched keys are stored in the client.
If different business logic requires watching different keys, separate
connections must be used; otherwise, there will be contamination. This
also means that if a user's business heavily relies on the `WATCH`
command, a large number of connections will be required.
Recently we have encountered this situation in our platform, where some
users consume a significant number of connections when using Redis
because of `WATCH`.
I hope we can have a way to observe these special use cases and special
client connections. Here I add a few monitoring metrics:
1. `watching_clients` in `INFO` reply: The number of clients currently
in the "watching" state.
2. `total_watched_keys` in `INFO` reply: The total number of keys being
watched.
3. `watch` in `CLIENT LIST` reply: The number of keys each client is
currently watching.
2024-02-18 03:36:41 -05:00
|
|
|
assert_equal [s watching_clients] 0
|
|
|
|
assert_equal [s total_watched_keys] 0
|
|
|
|
assert_match {*watch=0*} [r client info]
|
|
|
|
assert_match {*watch=0*} [$r2 client info]
|
|
|
|
# count after watch key
|
|
|
|
$r2 watch key
|
|
|
|
assert_equal [s watching_clients] 1
|
|
|
|
assert_equal [s total_watched_keys] 1
|
|
|
|
assert_match {*watch=0*} [r client info]
|
|
|
|
assert_match {*watch=1*} [$r2 client info]
|
|
|
|
# the same client watch the same key has no effect
|
|
|
|
$r2 watch key
|
|
|
|
assert_equal [s watching_clients] 1
|
|
|
|
assert_equal [s total_watched_keys] 1
|
|
|
|
assert_match {*watch=0*} [r client info]
|
|
|
|
assert_match {*watch=1*} [$r2 client info]
|
|
|
|
# different client watch different key
|
|
|
|
r watch key2
|
|
|
|
assert_equal [s watching_clients] 2
|
|
|
|
assert_equal [s total_watched_keys] 2
|
|
|
|
assert_match {*watch=1*} [$r2 client info]
|
|
|
|
assert_match {*watch=1*} [r client info]
|
|
|
|
# count after unwatch
|
|
|
|
r unwatch
|
|
|
|
assert_equal [s watching_clients] 1
|
|
|
|
assert_equal [s total_watched_keys] 1
|
|
|
|
assert_match {*watch=0*} [r client info]
|
|
|
|
assert_match {*watch=1*} [$r2 client info]
|
|
|
|
$r2 unwatch
|
|
|
|
assert_equal [s watching_clients] 0
|
|
|
|
assert_equal [s total_watched_keys] 0
|
|
|
|
assert_match {*watch=0*} [r client info]
|
|
|
|
assert_match {*watch=0*} [$r2 client info]
|
|
|
|
|
|
|
|
# count after watch/multi/exec
|
|
|
|
$r2 watch key
|
|
|
|
assert_equal [s watching_clients] 1
|
|
|
|
$r2 multi
|
|
|
|
$r2 exec
|
|
|
|
assert_equal [s watching_clients] 0
|
|
|
|
# count after watch/multi/discard
|
|
|
|
$r2 watch key
|
|
|
|
assert_equal [s watching_clients] 1
|
|
|
|
$r2 multi
|
|
|
|
$r2 discard
|
|
|
|
assert_equal [s watching_clients] 0
|
|
|
|
# discard without multi has no effect
|
|
|
|
$r2 watch key
|
|
|
|
assert_equal [s watching_clients] 1
|
|
|
|
catch {$r2 discard} e
|
|
|
|
assert_equal [s watching_clients] 1
|
|
|
|
# unwatch without watch has no effect
|
|
|
|
r unwatch
|
|
|
|
assert_equal [s watching_clients] 1
|
2024-02-20 04:12:19 -05:00
|
|
|
# after disconnect, since close may arrive later, or the client may
|
|
|
|
# be freed asynchronously, we use a wait_for_condition
|
Add metrics for WATCH (#12966)
Redis has some special commands that mark the client's state, such as
`subscribe` and `blpop`, which mark the client as `CLIENT_PUBSUB` or
`CLIENT_BLOCKED`, and we have metrics for the special use cases.
However, there are also other special commands, like `WATCH`, which
although do not have a specific flags, and should also be considered
stateful client types. For stateful clients, in many scenarios, the
connections cannot be shared in "connection pool", meaning connection
pool cannot be used. For example, whenever the `WATCH` command is
executed, a new connection is required to put the client into the "watch
state" because the watched keys are stored in the client.
If different business logic requires watching different keys, separate
connections must be used; otherwise, there will be contamination. This
also means that if a user's business heavily relies on the `WATCH`
command, a large number of connections will be required.
Recently we have encountered this situation in our platform, where some
users consume a significant number of connections when using Redis
because of `WATCH`.
I hope we can have a way to observe these special use cases and special
client connections. Here I add a few monitoring metrics:
1. `watching_clients` in `INFO` reply: The number of clients currently
in the "watching" state.
2. `total_watched_keys` in `INFO` reply: The total number of keys being
watched.
3. `watch` in `CLIENT LIST` reply: The number of keys each client is
currently watching.
2024-02-18 03:36:41 -05:00
|
|
|
$r2 close
|
2024-02-20 04:12:19 -05:00
|
|
|
wait_for_watched_clients_count 0
|
Add metrics for WATCH (#12966)
Redis has some special commands that mark the client's state, such as
`subscribe` and `blpop`, which mark the client as `CLIENT_PUBSUB` or
`CLIENT_BLOCKED`, and we have metrics for the special use cases.
However, there are also other special commands, like `WATCH`, which
although do not have a specific flags, and should also be considered
stateful client types. For stateful clients, in many scenarios, the
connections cannot be shared in "connection pool", meaning connection
pool cannot be used. For example, whenever the `WATCH` command is
executed, a new connection is required to put the client into the "watch
state" because the watched keys are stored in the client.
If different business logic requires watching different keys, separate
connections must be used; otherwise, there will be contamination. This
also means that if a user's business heavily relies on the `WATCH`
command, a large number of connections will be required.
Recently we have encountered this situation in our platform, where some
users consume a significant number of connections when using Redis
because of `WATCH`.
I hope we can have a way to observe these special use cases and special
client connections. Here I add a few monitoring metrics:
1. `watching_clients` in `INFO` reply: The number of clients currently
in the "watching" state.
2. `total_watched_keys` in `INFO` reply: The total number of keys being
watched.
3. `watch` in `CLIENT LIST` reply: The number of keys each client is
currently watching.
2024-02-18 03:36:41 -05:00
|
|
|
}
|
2020-12-31 09:53:43 -05:00
|
|
|
}
|
|
|
|
}
|
2024-03-01 00:41:24 -05:00
|
|
|
|
|
|
|
start_server {tags {"info" "external:skip"}} {
|
|
|
|
test {memory: database and pubsub overhead and rehashing dict count} {
|
|
|
|
r flushall
|
|
|
|
set info_mem [r info memory]
|
|
|
|
set mem_stats [r memory stats]
|
|
|
|
assert_equal [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] {0}
|
|
|
|
assert_equal [dict get $mem_stats overhead.db.hashtable.lut] {0}
|
|
|
|
assert_equal [dict get $mem_stats overhead.db.hashtable.rehashing] {0}
|
|
|
|
assert_equal [dict get $mem_stats db.dict.rehashing.count] {0}
|
|
|
|
# Initial dict expand is not rehashing
|
|
|
|
r set a b
|
|
|
|
set info_mem [r info memory]
|
|
|
|
set mem_stats [r memory stats]
|
|
|
|
assert_equal [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] {0}
|
|
|
|
assert_range [dict get $mem_stats overhead.db.hashtable.lut] 1 64
|
|
|
|
assert_equal [dict get $mem_stats overhead.db.hashtable.rehashing] {0}
|
|
|
|
assert_equal [dict get $mem_stats db.dict.rehashing.count] {0}
|
|
|
|
# set 4 more keys to trigger rehashing
|
|
|
|
# get the info within a transaction to make sure the rehashing is not completed
|
|
|
|
r multi
|
|
|
|
r set b c
|
|
|
|
r set c d
|
|
|
|
r set d e
|
|
|
|
r set e f
|
|
|
|
r info memory
|
|
|
|
r memory stats
|
|
|
|
set res [r exec]
|
|
|
|
set info_mem [lindex $res 4]
|
|
|
|
set mem_stats [lindex $res 5]
|
|
|
|
assert_range [getInfoProperty $info_mem mem_overhead_db_hashtable_rehashing] 1 64
|
|
|
|
assert_range [dict get $mem_stats overhead.db.hashtable.lut] 1 192
|
|
|
|
assert_range [dict get $mem_stats overhead.db.hashtable.rehashing] 1 64
|
|
|
|
assert_equal [dict get $mem_stats db.dict.rehashing.count] {1}
|
|
|
|
}
|
|
|
|
}
|