mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 16:18:28 -05:00
5dd15443ac
# Short description The Redis extended latency stats track per command latencies and enables: - exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command. **( percentile distribution is not mergeable between cluster nodes ).** - exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command. Using the cumulative distribution of latencies we can merge several stats from different cluster nodes to calculate aggregate metrics . By default, the extended latency monitoring is enabled since the overhead of keeping track of the command latency is very small. If you don't want to track extended latency metrics, you can easily disable it at runtime using the command: - `CONFIG SET latency-tracking no` By default, the exported latency percentiles are the p50, p99, and p999. You can alter them at runtime using the command: - `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"` ## Some details: - The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command was called for the first time. - With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable vs this branch. - We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf ) ## `INFO LATENCYSTATS` exposition format - Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....` ## `LATENCY HISTOGRAM [command ...]` exposition format Return a cumulative distribution of latencies in the format of a histogram for the specified command names. The histogram is composed of a map of time buckets: - Each representing a latency range, between 1 nanosecond and roughly 1 second. - Each bucket covers twice the previous bucket's range. - Empty buckets are not printed. - Everything above 1 sec is considered +Inf. - At max there will be log2(1000000000)=30 buckets We reply a map for each command in the format: `<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ... } }` Co-authored-by: Oran Agra <oran@redislabs.com>
249 lines
10 KiB
Tcl
249 lines
10 KiB
Tcl
proc cmdstat {cmd} {
|
|
return [cmdrstat $cmd r]
|
|
}
|
|
|
|
proc errorstat {cmd} {
|
|
return [errorrstat $cmd r]
|
|
}
|
|
|
|
proc latency_percentiles_usec {cmd} {
|
|
return [latencyrstat_percentiles $cmd r]
|
|
}
|
|
|
|
start_server {tags {"info" "external:skip"}} {
|
|
start_server {} {
|
|
|
|
test {latencystats: disable/enable} {
|
|
r config resetstat
|
|
r CONFIG SET latency-tracking no
|
|
r set a b
|
|
assert_match {} [latency_percentiles_usec set]
|
|
r CONFIG SET latency-tracking yes
|
|
r set a b
|
|
assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec set]
|
|
r config resetstat
|
|
assert_match {} [latency_percentiles_usec set]
|
|
}
|
|
|
|
test {latencystats: configure percentiles} {
|
|
r config resetstat
|
|
assert_match {} [latency_percentiles_usec set]
|
|
r CONFIG SET latency-tracking yes
|
|
r SET a b
|
|
r GET a
|
|
assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec set]
|
|
assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec get]
|
|
r CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"
|
|
assert_match [r config get latency-tracking-info-percentiles] {latency-tracking-info-percentiles {0.000000 50.000000 100.000000}}
|
|
assert_match {*p0.000000=*,p50.000000=*,p100.000000=*} [latency_percentiles_usec set]
|
|
assert_match {*p0.000000=*,p50.000000=*,p100.000000=*} [latency_percentiles_usec get]
|
|
r config resetstat
|
|
assert_match {} [latency_percentiles_usec set]
|
|
}
|
|
|
|
test {latencystats: bad configure percentiles} {
|
|
r config resetstat
|
|
set configlatencyline [r config get latency-tracking-info-percentiles]
|
|
catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 a"} e
|
|
assert_match {ERR CONFIG SET failed*} $e
|
|
assert_equal [s total_error_replies] 1
|
|
assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
|
|
catch {r CONFIG SET latency-tracking-info-percentiles "10.0 50.0 101.0"} e
|
|
assert_match {ERR CONFIG SET failed*} $e
|
|
assert_equal [s total_error_replies] 2
|
|
assert_match [r config get latency-tracking-info-percentiles] $configlatencyline
|
|
r config resetstat
|
|
assert_match {} [errorstat ERR]
|
|
}
|
|
|
|
test {latencystats: blocking commands} {
|
|
r config resetstat
|
|
r CONFIG SET latency-tracking yes
|
|
r CONFIG SET latency-tracking-info-percentiles "50.0 99.0 99.9"
|
|
set rd [redis_deferring_client]
|
|
r del list1{t}
|
|
|
|
$rd blpop list1{t} 0
|
|
wait_for_blocked_client
|
|
r lpush list1{t} a
|
|
assert_equal [$rd read] {list1{t} a}
|
|
$rd blpop list1{t} 0
|
|
wait_for_blocked_client
|
|
r lpush list1{t} b
|
|
assert_equal [$rd read] {list1{t} b}
|
|
assert_match {*p50.000000=*,p99.000000=*,p99.900000=*} [latency_percentiles_usec blpop]
|
|
$rd close
|
|
}
|
|
|
|
test {latencystats: measure latency} {
|
|
r config resetstat
|
|
r CONFIG SET latency-tracking yes
|
|
r CONFIG SET latency-tracking-info-percentiles "50.0"
|
|
r DEBUG sleep 0.05
|
|
r SET k v
|
|
set latencystatline_debug [latency_percentiles_usec debug]
|
|
set latencystatline_set [latency_percentiles_usec set]
|
|
regexp "p50.000000=(.+\..+)" $latencystatline_debug -> p50_debug
|
|
regexp "p50.000000=(.+\..+)" $latencystatline_set -> p50_set
|
|
assert {$p50_debug >= 50000}
|
|
assert {$p50_set >= 0}
|
|
assert {$p50_debug >= $p50_set}
|
|
} {} {needs:debug}
|
|
|
|
test {errorstats: failed call authentication error} {
|
|
r config resetstat
|
|
assert_match {} [errorstat ERR]
|
|
assert_equal [s total_error_replies] 0
|
|
catch {r auth k} e
|
|
assert_match {ERR AUTH*} $e
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth]
|
|
assert_equal [s total_error_replies] 1
|
|
r config resetstat
|
|
assert_match {} [errorstat ERR]
|
|
}
|
|
|
|
test {errorstats: failed call within MULTI/EXEC} {
|
|
r config resetstat
|
|
assert_match {} [errorstat ERR]
|
|
assert_equal [s total_error_replies] 0
|
|
r multi
|
|
r set a b
|
|
r auth a
|
|
catch {r exec} e
|
|
assert_match {ERR AUTH*} $e
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat set]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat auth]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat exec]
|
|
assert_equal [s total_error_replies] 1
|
|
|
|
# MULTI/EXEC command errors should still be pinpointed to him
|
|
catch {r exec} e
|
|
assert_match {ERR EXEC without MULTI} $e
|
|
assert_match {*calls=2,*,rejected_calls=0,failed_calls=1} [cmdstat exec]
|
|
assert_match {*count=2*} [errorstat ERR]
|
|
assert_equal [s total_error_replies] 2
|
|
}
|
|
|
|
test {errorstats: failed call within LUA} {
|
|
r config resetstat
|
|
assert_match {} [errorstat ERR]
|
|
assert_equal [s total_error_replies] 0
|
|
catch {r eval {redis.pcall('XGROUP', 'CREATECONSUMER', 's1', 'mygroup', 'consumer') return } 0} e
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat xgroup\\|createconsumer]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat eval]
|
|
|
|
# EVAL command errors should still be pinpointed to him
|
|
catch {r eval a} e
|
|
assert_match {ERR wrong*} $e
|
|
assert_match {*calls=1,*,rejected_calls=1,failed_calls=0} [cmdstat eval]
|
|
assert_match {*count=2*} [errorstat ERR]
|
|
assert_equal [s total_error_replies] 2
|
|
}
|
|
|
|
test {errorstats: failed call NOSCRIPT error} {
|
|
r config resetstat
|
|
assert_equal [s total_error_replies] 0
|
|
assert_match {} [errorstat NOSCRIPT]
|
|
catch {r evalsha NotValidShaSUM 0} e
|
|
assert_match {NOSCRIPT*} $e
|
|
assert_match {*count=1*} [errorstat NOSCRIPT]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat evalsha]
|
|
assert_equal [s total_error_replies] 1
|
|
r config resetstat
|
|
assert_match {} [errorstat NOSCRIPT]
|
|
}
|
|
|
|
test {errorstats: failed call NOGROUP error} {
|
|
r config resetstat
|
|
assert_match {} [errorstat NOGROUP]
|
|
r del mystream
|
|
r XADD mystream * f v
|
|
catch {r XGROUP CREATECONSUMER mystream mygroup consumer} e
|
|
assert_match {NOGROUP*} $e
|
|
assert_match {*count=1*} [errorstat NOGROUP]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat xgroup\\|createconsumer]
|
|
r config resetstat
|
|
assert_match {} [errorstat NOGROUP]
|
|
}
|
|
|
|
test {errorstats: rejected call unknown command} {
|
|
r config resetstat
|
|
assert_equal [s total_error_replies] 0
|
|
assert_match {} [errorstat ERR]
|
|
catch {r asdf} e
|
|
assert_match {ERR unknown*} $e
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
assert_equal [s total_error_replies] 1
|
|
r config resetstat
|
|
assert_match {} [errorstat ERR]
|
|
}
|
|
|
|
test {errorstats: rejected call within MULTI/EXEC} {
|
|
r config resetstat
|
|
assert_equal [s total_error_replies] 0
|
|
assert_match {} [errorstat ERR]
|
|
r multi
|
|
catch {r set} e
|
|
assert_match {ERR wrong number of arguments*} $e
|
|
catch {r exec} e
|
|
assert_match {EXECABORT*} $e
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
assert_match {*count=1*} [errorstat EXECABORT]
|
|
assert_equal [s total_error_replies] 2
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=0} [cmdstat multi]
|
|
assert_match {*calls=1,*,rejected_calls=0,failed_calls=1} [cmdstat exec]
|
|
assert_equal [s total_error_replies] 2
|
|
r config resetstat
|
|
assert_match {} [errorstat ERR]
|
|
}
|
|
|
|
test {errorstats: rejected call due to wrong arity} {
|
|
r config resetstat
|
|
assert_equal [s total_error_replies] 0
|
|
assert_match {} [errorstat ERR]
|
|
catch {r set k} e
|
|
assert_match {ERR wrong number of arguments*} $e
|
|
assert_match {*count=1*} [errorstat ERR]
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
# ensure that after a rejected command, valid ones are counted properly
|
|
r set k1 v1
|
|
r set k2 v2
|
|
assert_match {calls=2,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
assert_equal [s total_error_replies] 1
|
|
}
|
|
|
|
test {errorstats: rejected call by OOM error} {
|
|
r config resetstat
|
|
assert_equal [s total_error_replies] 0
|
|
assert_match {} [errorstat OOM]
|
|
r config set maxmemory 1
|
|
catch {r set a b} e
|
|
assert_match {OOM*} $e
|
|
assert_match {*count=1*} [errorstat OOM]
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
assert_equal [s total_error_replies] 1
|
|
r config resetstat
|
|
assert_match {} [errorstat OOM]
|
|
}
|
|
|
|
test {errorstats: rejected call by authorization error} {
|
|
r config resetstat
|
|
assert_equal [s total_error_replies] 0
|
|
assert_match {} [errorstat NOPERM]
|
|
r ACL SETUSER alice on >p1pp0 ~cached:* +get +info +config
|
|
r auth alice p1pp0
|
|
catch {r set a b} e
|
|
assert_match {NOPERM*} $e
|
|
assert_match {*count=1*} [errorstat NOPERM]
|
|
assert_match {*calls=0,*,rejected_calls=1,failed_calls=0} [cmdstat set]
|
|
assert_equal [s total_error_replies] 1
|
|
r config resetstat
|
|
assert_match {} [errorstat NOPERM]
|
|
}
|
|
}
|
|
}
|