redict/tests/unit/moduleapi/blockonbackground.tcl
filipe oliveira 5dd15443ac
Added INFO LATENCYSTATS section: latency by percentile distribution/latency by cumulative distribution of latencies (#9462)
# Short description

The Redis extended latency stats track per command latencies and enables:
- exporting the per-command percentile distribution via the `INFO LATENCYSTATS` command.
  **( percentile distribution is not mergeable between cluster nodes ).**
- exporting the per-command cumulative latency distributions via the `LATENCY HISTOGRAM` command.
  Using the cumulative distribution of latencies we can merge several stats from different cluster nodes
  to calculate aggregate metrics .

By default, the extended latency monitoring is enabled since the overhead of keeping track of the
command latency is very small.
 
If you don't want to track extended latency metrics, you can easily disable it at runtime using the command:
 - `CONFIG SET latency-tracking no`

By default, the exported latency percentiles are the p50, p99, and p999.
You can alter them at runtime using the command:
- `CONFIG SET latency-tracking-info-percentiles "0.0 50.0 100.0"`


## Some details:
- The total size per histogram should sit around 40 KiB. We only allocate those 40KiB when a command
  was called for the first time.
- With regards to the WRITE overhead As seen below, there is no measurable overhead on the achievable
  ops/sec or full latency spectrum on the client. Including also the measured redis-benchmark for unstable
  vs this branch. 
- We track from 1 nanosecond to 1 second ( everything above 1 second is considered +Inf )

## `INFO LATENCYSTATS` exposition format

   - Format: `latency_percentiles_usec_<CMDNAME>:p0=XX,p50....` 

## `LATENCY HISTOGRAM [command ...]` exposition format

Return a cumulative distribution of latencies in the format of a histogram for the specified command names.

The histogram is composed of a map of time buckets:
- Each representing a latency range, between 1 nanosecond and roughly 1 second.
- Each bucket covers twice the previous bucket's range.
- Empty buckets are not printed.
- Everything above 1 sec is considered +Inf.
- At max there will be log2(1000000000)=30 buckets

We reply a map for each command in the format:
`<command name> : { `calls`: <total command calls> , `histogram` : { <bucket 1> : latency , < bucket 2> : latency, ...  } }`

Co-authored-by: Oran Agra <oran@redislabs.com>
2022-01-05 14:01:05 +02:00

127 lines
4.3 KiB
Tcl

set testmodule [file normalize tests/modules/blockonbackground.so]
source tests/support/util.tcl
proc latency_percentiles_usec {cmd} {
return [latencyrstat_percentiles $cmd r]
}
start_server {tags {"modules"}} {
r module load $testmodule
test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time} {
r slowlog reset
r config set slowlog-log-slower-than 200000
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
r block.debug 0 10000
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
r config resetstat
r config set latency-tracking yes
r config set latency-tracking-info-percentiles "50.0"
r block.debug 200 10000
if {!$::no_latency} {
assert_equal [r slowlog len] 1
}
set cmdstatline [cmdrstat block.debug r]
set latencystatline_debug [latency_percentiles_usec block.debug]
regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline -> usec usec_per_call
regexp "p50.000000=(.+\..+)" $latencystatline_debug -> p50
assert {$usec >= 100000}
assert {$usec_per_call >= 100000}
assert {$p50 >= 100000}
}
test { blocked clients time tracking - check blocked command that uses RedisModule_BlockedClientMeasureTimeStart() is tracking background time even in timeout } {
r slowlog reset
r config set slowlog-log-slower-than 200000
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
r block.debug 0 20000
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
r config resetstat
r block.debug 20000 500
if {!$::no_latency} {
assert_equal [r slowlog len] 1
}
set cmdstatline [cmdrstat block.debug r]
regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
assert {$usec >= 250000}
assert {$usec_per_call >= 250000}
}
test { blocked clients time tracking - check blocked command with multiple calls RedisModule_BlockedClientMeasureTimeStart() is tracking the total background time } {
r slowlog reset
r config set slowlog-log-slower-than 200000
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
r block.double_debug 0
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
r config resetstat
r block.double_debug 100
if {!$::no_latency} {
assert_equal [r slowlog len] 1
}
set cmdstatline [cmdrstat block.double_debug r]
regexp "calls=1,usec=(.*?),usec_per_call=(.*?),rejected_calls=0,failed_calls=0" $cmdstatline usec usec_per_call
assert {$usec >= 60000}
assert {$usec_per_call >= 60000}
}
test { blocked clients time tracking - check blocked command without calling RedisModule_BlockedClientMeasureTimeStart() is not reporting background time } {
r slowlog reset
r config set slowlog-log-slower-than 200000
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
r block.debug_no_track 200 1000
# ensure slowlog is still empty
if {!$::no_latency} {
assert_equal [r slowlog len] 0
}
}
test "client unblock works only for modules with timeout support" {
set rd [redis_deferring_client]
$rd client id
set id [$rd read]
# Block with a timeout function - may unblock
$rd block.block 20000
wait_for_condition 50 100 {
[r block.is_blocked] == 1
} else {
fail "Module did not block"
}
assert_equal 1 [r client unblock $id]
assert_match {*Timed out*} [$rd read]
# Block without a timeout function - cannot unblock
$rd block.block 0
wait_for_condition 50 100 {
[r block.is_blocked] == 1
} else {
fail "Module did not block"
}
assert_equal 0 [r client unblock $id]
assert_equal "OK" [r block.release foobar]
assert_equal "foobar" [$rd read]
}
}