From 9b15dd288e2a053cc84f3e0088a0275dc7c17486 Mon Sep 17 00:00:00 2001 From: ranshid <88133677+ranshid@users.noreply.github.com> Date: Tue, 1 Mar 2022 14:40:29 +0200 Subject: [PATCH] Introduce debug command to disable reply buffer resizing (#10360) In order to resolve some flaky tests which hard rely on examine memory footprint. we introduce the following fixes: # Fix in client-eviction test - by @yoav-steinberg Sometime the libc allocator can use different size client struct allocations. this may cause unexpected memory calculations to fail the test. # Introduce new DEBUG command for disabling reply buffer resizing In order to eliminate reply buffer resizing during specific tests. we introduced the ability to disable (and enable) the resizing cron job Co-authored-by: yoav-steinberg yoav@redislabs.com --- src/debug.c | 25 +++++++++++++++++-------- src/server.c | 5 +++++ src/server.h | 1 + tests/unit/client-eviction.tcl | 34 +++++++++++++++++++++------------- tests/unit/replybufsize.tcl | 8 ++++---- 5 files changed, 48 insertions(+), 25 deletions(-) diff --git a/src/debug.c b/src/debug.c index b95daaa36..2f57e7de3 100644 --- a/src/debug.c +++ b/src/debug.c @@ -482,10 +482,12 @@ void debugCommand(client *c) { " Show low level client eviction pools info (maxmemory-clients).", "PAUSE-CRON <0|1>", " Stop periodic cron job processing.", -"REPLYBUFFER-PEAK-RESET-TIME ", +"REPLYBUFFER PEAK-RESET-TIME ", " Sets the time (in milliseconds) to wait between client reply buffer peak resets.", " In case NEVER is provided the last observed peak will never be reset", " In case RESET is provided the peak reset time will be restored to the default value", +"REPLYBUFFER RESIZING <0|1>", +" Enable or disable the replay buffer resize cron job", NULL }; addReplyHelp(c, help); @@ -962,14 +964,21 @@ NULL { server.pause_cron = atoi(c->argv[2]->ptr); addReply(c,shared.ok); - } else if (!strcasecmp(c->argv[1]->ptr,"replybuffer-peak-reset-time") && c->argc == 3 ) { - if (!strcasecmp(c->argv[2]->ptr, "never")) { - server.reply_buffer_peak_reset_time = -1; - } else if(!strcasecmp(c->argv[2]->ptr, "reset")) { - server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME; + } else if (!strcasecmp(c->argv[1]->ptr,"replybuffer") && c->argc == 4 ) { + if(!strcasecmp(c->argv[2]->ptr, "peak-reset-time")) { + if (!strcasecmp(c->argv[3]->ptr, "never")) { + server.reply_buffer_peak_reset_time = -1; + } else if(!strcasecmp(c->argv[3]->ptr, "reset")) { + server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME; + } else { + if (getLongFromObjectOrReply(c, c->argv[3], &server.reply_buffer_peak_reset_time, NULL) != C_OK) + return; + } + } else if(!strcasecmp(c->argv[2]->ptr,"resizing")) { + server.reply_buffer_resizing_enabled = atoi(c->argv[3]->ptr); } else { - if (getLongFromObjectOrReply(c, c->argv[2], &server.reply_buffer_peak_reset_time, NULL) != C_OK) - return; + addReplySubcommandSyntaxError(c); + return; } addReply(c, shared.ok); } else { diff --git a/src/server.c b/src/server.c index 3fa912937..f170ea7a2 100644 --- a/src/server.c +++ b/src/server.c @@ -744,6 +744,10 @@ int clientsCronResizeOutputBuffer(client *c, mstime_t now_ms) { const size_t buffer_target_shrink_size = c->buf_usable_size/2; const size_t buffer_target_expand_size = c->buf_usable_size*2; + /* in case the resizing is disabled return immediately */ + if(!server.reply_buffer_resizing_enabled) + return 0; + if (buffer_target_shrink_size >= PROTO_REPLY_MIN_BYTES && c->buf_peak < buffer_target_shrink_size ) { @@ -2429,6 +2433,7 @@ void initServer(void) { server.thp_enabled = 0; server.cluster_drop_packet_filter = -1; server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME; + server.reply_buffer_resizing_enabled = 1; resetReplicationBuffer(); if ((server.tls_port || server.tls_replication || server.tls_cluster) diff --git a/src/server.h b/src/server.h index df21838cd..7f979d22b 100644 --- a/src/server.h +++ b/src/server.h @@ -1914,6 +1914,7 @@ struct redisServer { int cluster_allow_pubsubshard_when_down; /* Is pubsubshard allowed when the cluster is down, doesn't affect pubsub global. */ long reply_buffer_peak_reset_time; /* The amount of time (in milliseconds) to wait between reply buffer peak resets */ + int reply_buffer_resizing_enabled; /* Is reply buffer resizing enabled (1 by default) */ }; #define MAX_KEYS_BUFFER 256 diff --git a/tests/unit/client-eviction.tcl b/tests/unit/client-eviction.tcl index 949ac8f3d..469828473 100644 --- a/tests/unit/client-eviction.tcl +++ b/tests/unit/client-eviction.tcl @@ -395,13 +395,14 @@ start_server {} { test "evict clients only until below limit" { set client_count 10 set client_mem [mb 1] - r debug replybuffer-peak-reset-time never + r debug replybuffer resizing 0 r config set maxmemory-clients 0 r client setname control r client no-evict on # Make multiple clients consume together roughly 1mb less than maxmemory_clients set total_client_mem 0 + set max_client_mem 0 set rrs {} for {set j 0} {$j < $client_count} {incr j} { set rr [redis_client] @@ -414,20 +415,27 @@ start_server {} { } else { fail "Failed to fill qbuf for test" } - incr total_client_mem [client_field client$j tot-mem] + # In theory all these clients should use the same amount of memory (~1mb). But in practice + # some allocators (libc) can return different allocation sizes for the same malloc argument causing + # some clients to use slightly more memory than others. We find the largest client and make sure + # all clients are roughly the same size (+-1%). Then we can safely set the client eviction limit and + # expect consistent results in the test. + set cmem [client_field client$j tot-mem] + if {$max_client_mem > 0} { + set size_ratio [expr $max_client_mem.0/$cmem.0] + assert_range $size_ratio 0.99 1.01 + } + if {$cmem > $max_client_mem} { + set max_client_mem $cmem + } } - set client_actual_mem [expr $total_client_mem / $client_count] - - # Make sure client_acutal_mem is more or equal to what we intended - assert {$client_actual_mem >= $client_mem} - # Make sure all clients are still connected set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]] assert {$connected_clients == $client_count} # Set maxmemory-clients to accommodate half our clients (taking into account the control client) - set maxmemory_clients [expr ($client_actual_mem * $client_count) / 2 + [client_field control tot-mem]] + set maxmemory_clients [expr ($max_client_mem * $client_count) / 2 + [client_field control tot-mem]] r config set maxmemory-clients $maxmemory_clients # Make sure total used memory is below maxmemory_clients @@ -438,8 +446,8 @@ start_server {} { set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]] assert {$connected_clients == [expr $client_count / 2]} - # Restore the peak reset time to default - r debug replybuffer-peak-reset-time reset + # Restore the reply buffer resize to default + r debug replybuffer resizing 1 foreach rr $rrs {$rr close} } {} {needs:debug} @@ -454,7 +462,7 @@ start_server {} { r client setname control r client no-evict on r config set maxmemory-clients 0 - r debug replybuffer-peak-reset-time never + r debug replybuffer resizing 0 # Run over all sizes and create some clients using up that size set total_client_mem 0 @@ -505,8 +513,8 @@ start_server {} { } } - # Restore the peak reset time to default - r debug replybuffer-peak-reset-time reset + # Restore the reply buffer resize to default + r debug replybuffer resizing 1 foreach rr $rrs {$rr close} } {} {needs:debug} diff --git a/tests/unit/replybufsize.tcl b/tests/unit/replybufsize.tcl index 9377a8fd3..933189eb3 100644 --- a/tests/unit/replybufsize.tcl +++ b/tests/unit/replybufsize.tcl @@ -3,7 +3,7 @@ proc get_reply_buffer_size {cname} { set clients [split [string trim [r client list]] "\r\n"] set c [lsearch -inline $clients *name=$cname*] if {![regexp rbs=(\[a-zA-Z0-9-\]+) $c - rbufsize]} { - error "field rbus not found in $c" + error "field rbs not found in $c" } return $rbufsize } @@ -12,7 +12,7 @@ start_server {tags {"replybufsize"}} { test {verify reply buffer limits} { # In order to reduce test time we can set the peak reset time very low - r debug replybuffer-peak-reset-time 100 + r debug replybuffer peak-reset-time 100 # Create a simple idle test client variable tc [redis_client] @@ -29,7 +29,7 @@ start_server {tags {"replybufsize"}} { r set bigval [string repeat x 32768] # In order to reduce test time we can set the peak reset time very low - r debug replybuffer-peak-reset-time never + r debug replybuffer peak-reset-time never wait_for_condition 10 100 { [$tc get bigval ; get_reply_buffer_size test_client] >= 16384 && [get_reply_buffer_size test_client] < 32768 @@ -39,7 +39,7 @@ start_server {tags {"replybufsize"}} { } # Restore the peak reset time to default - r debug replybuffer-peak-reset-time reset + r debug replybuffer peak-reset-time reset $tc close } {0} {needs:debug}