mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 08:08:53 -05:00
Add oom-score-adj configuration option to control Linux OOM killer. (#1690)
Add Linux kernel OOM killer control option. This adds the ability to control the Linux OOM killer oom_score_adj parameter for all Redis processes, depending on the process role (i.e. master, replica, background child). A oom-score-adj global boolean flag control this feature. In addition, specific values can be configured using oom-score-adj-values if additional tuning is required.
This commit is contained in:
parent
f3df3ec134
commit
2530dc0ebd
26
redis.conf
26
redis.conf
@ -1049,6 +1049,32 @@ lazyfree-lazy-user-del no
|
||||
# --threads option to match the number of Redis theads, otherwise you'll not
|
||||
# be able to notice the improvements.
|
||||
|
||||
############################ KERNEL OOM CONTROL ##############################
|
||||
|
||||
# On Linux, it is possible to hint the kernel OOM killer on what processes
|
||||
# should be killed first when out of memory.
|
||||
#
|
||||
# Enabling this feature makes Redis actively control the oom_score_adj value
|
||||
# for all its processes, depending on their role. The default scores will
|
||||
# attempt to have background child processes killed before all others, and
|
||||
# replicas killed before masters.
|
||||
|
||||
oom-score-adj no
|
||||
|
||||
# When oom-score-adj is used, this directive controls the specific values used
|
||||
# for master, replica and background child processes. Values range -1000 to
|
||||
# 1000 (higher means more likely to be killed).
|
||||
#
|
||||
# Unprivileged processes (not root, and without CAP_SYS_RESOURCE capabilities)
|
||||
# can freely increase their value, but not decrease it below its initial
|
||||
# settings.
|
||||
#
|
||||
# Values are used relative to the initial value of oom_score_adj when the server
|
||||
# starts. Because typically the initial value is 0, they will often match the
|
||||
# absolute values.
|
||||
|
||||
oom-score-adj-values 0 200 800
|
||||
|
||||
############################## APPEND ONLY MODE ###############################
|
||||
|
||||
# By default Redis asynchronously dumps the dataset on disk. This mode is
|
||||
|
119
src/config.c
119
src/config.c
@ -111,6 +111,9 @@ clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT] = {
|
||||
{1024*1024*32, 1024*1024*8, 60} /* pubsub */
|
||||
};
|
||||
|
||||
/* OOM Score defaults */
|
||||
int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT] = { 0, 200, 800 };
|
||||
|
||||
/* Generic config infrastructure function pointers
|
||||
* int is_valid_fn(val, err)
|
||||
* Return 1 when val is valid, and 0 when invalid.
|
||||
@ -286,6 +289,59 @@ void queueLoadModule(sds path, sds *argv, int argc) {
|
||||
listAddNodeTail(server.loadmodule_queue,loadmod);
|
||||
}
|
||||
|
||||
/* Parse an array of CONFIG_OOM_COUNT sds strings, validate and populate
|
||||
* server.oom_score_adj_values if valid.
|
||||
*/
|
||||
|
||||
static int updateOOMScoreAdjValues(sds *args, char **err) {
|
||||
int i;
|
||||
int values[CONFIG_OOM_COUNT];
|
||||
|
||||
for (i = 0; i < CONFIG_OOM_COUNT; i++) {
|
||||
char *eptr;
|
||||
long long val = strtoll(args[i], &eptr, 10);
|
||||
|
||||
if (*eptr != '\0' || val < -1000 || val > 1000) {
|
||||
if (err) *err = "Invalid oom-score-adj-values, elements must be between -1000 and 1000.";
|
||||
return C_ERR;
|
||||
}
|
||||
|
||||
values[i] = val;
|
||||
}
|
||||
|
||||
/* Verify that the values make sense. If they don't omit a warning but
|
||||
* keep the configuration, which may still be valid for privileged processes.
|
||||
*/
|
||||
|
||||
if (values[CONFIG_OOM_REPLICA] < values[CONFIG_OOM_MASTER] ||
|
||||
values[CONFIG_OOM_BGCHILD] < values[CONFIG_OOM_REPLICA]) {
|
||||
serverLog(LOG_WARNING,
|
||||
"The oom-score-adj-values configuration may not work for non-privileged processes! "
|
||||
"Please consult the documentation.");
|
||||
}
|
||||
|
||||
/* Store values, retain previous config for rollback in case we fail. */
|
||||
int old_values[CONFIG_OOM_COUNT];
|
||||
for (i = 0; i < CONFIG_OOM_COUNT; i++) {
|
||||
old_values[i] = server.oom_score_adj_values[i];
|
||||
server.oom_score_adj_values[i] = values[i];
|
||||
}
|
||||
|
||||
/* Update */
|
||||
if (setOOMScoreAdj(-1) == C_ERR) {
|
||||
/* Roll back */
|
||||
for (i = 0; i < CONFIG_OOM_COUNT; i++)
|
||||
server.oom_score_adj_values[i] = old_values[i];
|
||||
|
||||
if (err)
|
||||
*err = "Failed to apply oom-score-adj-values configuration, check server logs.";
|
||||
|
||||
return C_ERR;
|
||||
}
|
||||
|
||||
return C_OK;
|
||||
}
|
||||
|
||||
void initConfigValues() {
|
||||
for (standardConfig *config = configs; config->name != NULL; config++) {
|
||||
config->interface.init(config->data);
|
||||
@ -479,6 +535,8 @@ void loadServerConfigFromString(char *config) {
|
||||
server.client_obuf_limits[class].hard_limit_bytes = hard;
|
||||
server.client_obuf_limits[class].soft_limit_bytes = soft;
|
||||
server.client_obuf_limits[class].soft_limit_seconds = soft_seconds;
|
||||
} else if (!strcasecmp(argv[0],"oom-score-adj-values") && argc == 1 + CONFIG_OOM_COUNT) {
|
||||
if (updateOOMScoreAdjValues(&argv[1], &err) == C_ERR) goto loaderr;
|
||||
} else if (!strcasecmp(argv[0],"notify-keyspace-events") && argc == 2) {
|
||||
int flags = keyspaceEventsStringToFlags(argv[1]);
|
||||
|
||||
@ -728,6 +786,17 @@ void configSetCommand(client *c) {
|
||||
server.client_obuf_limits[class].soft_limit_seconds = soft_seconds;
|
||||
}
|
||||
sdsfreesplitres(v,vlen);
|
||||
} config_set_special_field("oom-score-adj-values") {
|
||||
int vlen;
|
||||
int success = 1;
|
||||
|
||||
sds *v = sdssplitlen(o->ptr, sdslen(o->ptr), " ", 1, &vlen);
|
||||
if (vlen != CONFIG_OOM_COUNT || updateOOMScoreAdjValues(v, &errstr) == C_ERR)
|
||||
success = 0;
|
||||
|
||||
sdsfreesplitres(v, vlen);
|
||||
if (!success)
|
||||
goto badfmt;
|
||||
} config_set_special_field("notify-keyspace-events") {
|
||||
int flags = keyspaceEventsStringToFlags(o->ptr);
|
||||
|
||||
@ -923,6 +992,22 @@ void configGetCommand(client *c) {
|
||||
matches++;
|
||||
}
|
||||
|
||||
if (stringmatch(pattern,"oom-score-adj-values",0)) {
|
||||
sds buf = sdsempty();
|
||||
int j;
|
||||
|
||||
for (j = 0; j < CONFIG_OOM_COUNT; j++) {
|
||||
buf = sdscatprintf(buf,"%d", server.oom_score_adj_values[j]);
|
||||
if (j != CONFIG_OOM_COUNT-1)
|
||||
buf = sdscatlen(buf," ",1);
|
||||
}
|
||||
|
||||
addReplyBulkCString(c,"oom-score-adj-values");
|
||||
addReplyBulkCString(c,buf);
|
||||
sdsfree(buf);
|
||||
matches++;
|
||||
}
|
||||
|
||||
setDeferredMapLen(c,replylen,matches);
|
||||
}
|
||||
|
||||
@ -1330,6 +1415,25 @@ void rewriteConfigClientoutputbufferlimitOption(struct rewriteConfigState *state
|
||||
}
|
||||
}
|
||||
|
||||
/* Rewrite the oom-score-adj-values option. */
|
||||
void rewriteConfigOOMScoreAdjValuesOption(struct rewriteConfigState *state) {
|
||||
int force = 0;
|
||||
int j;
|
||||
char *option = "oom-score-adj-values";
|
||||
sds line;
|
||||
|
||||
line = sdsempty();
|
||||
for (j = 0; j < CONFIG_OOM_COUNT; j++) {
|
||||
if (server.oom_score_adj_values[j] != configOOMScoreAdjValuesDefaults[j])
|
||||
force = 1;
|
||||
|
||||
line = sdscatprintf(line, "%d", server.oom_score_adj_values[j]);
|
||||
if (j+1 != CONFIG_OOM_COUNT)
|
||||
line = sdscatlen(line, " ", 1);
|
||||
}
|
||||
rewriteConfigRewriteLine(state,option,line,force);
|
||||
}
|
||||
|
||||
/* Rewrite the bind option. */
|
||||
void rewriteConfigBindOption(struct rewriteConfigState *state) {
|
||||
int force = 1;
|
||||
@ -1528,6 +1632,7 @@ int rewriteConfig(char *path) {
|
||||
rewriteConfigStringOption(state,"cluster-config-file",server.cluster_configfile,CONFIG_DEFAULT_CLUSTER_CONFIG_FILE);
|
||||
rewriteConfigNotifykeyspaceeventsOption(state);
|
||||
rewriteConfigClientoutputbufferlimitOption(state);
|
||||
rewriteConfigOOMScoreAdjValuesOption(state);
|
||||
|
||||
/* Rewrite Sentinel config if in Sentinel mode. */
|
||||
if (server.sentinel_mode) rewriteConfigSentinelOption(state);
|
||||
@ -2082,6 +2187,19 @@ static int updateMaxclients(long long val, long long prev, char **err) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int updateOOMScoreAdj(int val, int prev, char **err) {
|
||||
UNUSED(prev);
|
||||
|
||||
if (val) {
|
||||
if (setOOMScoreAdj(-1) == C_ERR) {
|
||||
*err = "Failed to set current oom_score_adj. Check server logs.";
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
#ifdef USE_OPENSSL
|
||||
static int updateTlsCfg(char *val, char *prev, char **err) {
|
||||
UNUSED(val);
|
||||
@ -2146,6 +2264,7 @@ standardConfig configs[] = {
|
||||
createBoolConfig("crash-log-enabled", NULL, MODIFIABLE_CONFIG, server.crashlog_enabled, 1, NULL, updateSighandlerEnabled),
|
||||
createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL),
|
||||
createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, server.use_exit_on_panic, 0, NULL, NULL),
|
||||
createBoolConfig("oom-score-adj", NULL, MODIFIABLE_CONFIG, server.oom_score_adj, 0, NULL, updateOOMScoreAdj),
|
||||
|
||||
/* String Configs */
|
||||
createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL),
|
||||
|
@ -54,6 +54,7 @@
|
||||
#define HAVE_PROC_MAPS 1
|
||||
#define HAVE_PROC_SMAPS 1
|
||||
#define HAVE_PROC_SOMAXCONN 1
|
||||
#define HAVE_PROC_OOM_SCORE_ADJ 1
|
||||
#endif
|
||||
|
||||
/* Test for task_info() */
|
||||
|
@ -2497,6 +2497,9 @@ void replicationSetMaster(char *ip, int port) {
|
||||
server.masterhost = sdsnew(ip);
|
||||
server.masterport = port;
|
||||
|
||||
/* Update oom_score_adj */
|
||||
setOOMScoreAdj(-1);
|
||||
|
||||
/* Force our slaves to resync with us as well. They may hopefully be able
|
||||
* to partially resync with us, but we can notify the replid change. */
|
||||
disconnectSlaves();
|
||||
@ -2564,6 +2567,9 @@ void replicationUnsetMaster(void) {
|
||||
* master switch. */
|
||||
server.slaveseldb = -1;
|
||||
|
||||
/* Update oom_score_adj */
|
||||
setOOMScoreAdj(-1);
|
||||
|
||||
/* Once we turn from slave to master, we consider the starting time without
|
||||
* slaves (that is used to count the replication backlog time to live) as
|
||||
* starting from now. Otherwise the backlog will be freed after a
|
||||
|
60
src/server.c
60
src/server.c
@ -2422,6 +2422,10 @@ void initServerConfig(void) {
|
||||
for (j = 0; j < CLIENT_TYPE_OBUF_COUNT; j++)
|
||||
server.client_obuf_limits[j] = clientBufferLimitsDefaults[j];
|
||||
|
||||
/* Linux OOM Score config */
|
||||
for (j = 0; j < CONFIG_OOM_COUNT; j++)
|
||||
server.oom_score_adj_values[j] = configOOMScoreAdjValuesDefaults[j];
|
||||
|
||||
/* Double constants initialization */
|
||||
R_Zero = 0.0;
|
||||
R_PosInf = 1.0/R_Zero;
|
||||
@ -2527,6 +2531,58 @@ int restartServer(int flags, mstime_t delay) {
|
||||
return C_ERR; /* Never reached. */
|
||||
}
|
||||
|
||||
static void readOOMScoreAdj(void) {
|
||||
#ifdef HAVE_PROC_OOM_SCORE_ADJ
|
||||
char buf[64];
|
||||
int fd = open("/proc/self/oom_score_adj", O_RDONLY);
|
||||
|
||||
if (fd < 0) return;
|
||||
if (read(fd, buf, sizeof(buf)) > 0)
|
||||
server.oom_score_adj_base = atoi(buf);
|
||||
close(fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* This function will configure the current process's oom_score_adj according
|
||||
* to user specified configuration. This is currently implemented on Linux
|
||||
* only.
|
||||
*
|
||||
* A process_class value of -1 implies OOM_CONFIG_MASTER or OOM_CONFIG_REPLICA,
|
||||
* depending on current role.
|
||||
*/
|
||||
int setOOMScoreAdj(int process_class) {
|
||||
int fd;
|
||||
int val;
|
||||
char buf[64];
|
||||
|
||||
if (!server.oom_score_adj) return C_OK;
|
||||
if (process_class == -1)
|
||||
process_class = (server.masterhost ? CONFIG_OOM_REPLICA : CONFIG_OOM_MASTER);
|
||||
|
||||
serverAssert(process_class >= 0 && process_class < CONFIG_OOM_COUNT);
|
||||
|
||||
#ifdef HAVE_PROC_OOM_SCORE_ADJ
|
||||
val = server.oom_score_adj_base + server.oom_score_adj_values[process_class];
|
||||
if (val > 1000) val = 1000;
|
||||
if (val < -1000) val = -1000;
|
||||
|
||||
snprintf(buf, sizeof(buf) - 1, "%d\n", val);
|
||||
|
||||
fd = open("/proc/self/oom_score_adj", O_WRONLY);
|
||||
if (fd < 0 || write(fd, buf, strlen(buf)) < 0) {
|
||||
serverLog(LOG_WARNING, "Unable to write oom_score_adj: %s", strerror(errno));
|
||||
if (fd != -1) close(fd);
|
||||
return C_ERR;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return C_OK;
|
||||
#else
|
||||
/* Unsupported */
|
||||
return C_ERR;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* This function will try to raise the max number of open files accordingly to
|
||||
* the configured max number of clients. It also reserves a number of file
|
||||
* descriptors (CONFIG_MIN_RESERVED_FDS) for extra operations of
|
||||
@ -4866,6 +4922,7 @@ int redisFork() {
|
||||
long long start = ustime();
|
||||
if ((childpid = fork()) == 0) {
|
||||
/* Child */
|
||||
setOOMScoreAdj(CONFIG_OOM_BGCHILD);
|
||||
closeListeningSockets(0);
|
||||
setupChildSignalHandlers();
|
||||
} else {
|
||||
@ -5197,6 +5254,7 @@ int main(int argc, char **argv) {
|
||||
server.supervised = redisIsSupervised(server.supervised_mode);
|
||||
int background = server.daemonize && !server.supervised;
|
||||
if (background) daemonize();
|
||||
readOOMScoreAdj();
|
||||
|
||||
initServer();
|
||||
if (background || server.pidfile) createPidFile();
|
||||
@ -5250,6 +5308,8 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
|
||||
redisSetCpuAffinity(server.server_cpulist);
|
||||
setOOMScoreAdj(-1);
|
||||
|
||||
aeMain(server.el);
|
||||
aeDeleteEventLoop(server.el);
|
||||
return 0;
|
||||
|
12
src/server.h
12
src/server.h
@ -150,6 +150,14 @@ typedef long long ustime_t; /* microsecond time type. */
|
||||
* in order to make sure of not over provisioning more than 128 fds. */
|
||||
#define CONFIG_FDSET_INCR (CONFIG_MIN_RESERVED_FDS+96)
|
||||
|
||||
/* OOM Score Adjustment classes. */
|
||||
#define CONFIG_OOM_MASTER 0
|
||||
#define CONFIG_OOM_REPLICA 1
|
||||
#define CONFIG_OOM_BGCHILD 2
|
||||
#define CONFIG_OOM_COUNT 3
|
||||
|
||||
extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
|
||||
|
||||
/* Hash table parameters */
|
||||
#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */
|
||||
|
||||
@ -1350,6 +1358,9 @@ struct redisServer {
|
||||
int lfu_log_factor; /* LFU logarithmic counter factor. */
|
||||
int lfu_decay_time; /* LFU counter decay factor. */
|
||||
long long proto_max_bulk_len; /* Protocol bulk length maximum size. */
|
||||
int oom_score_adj_base; /* Base oom_score_adj value, as observed on startup */
|
||||
int oom_score_adj_values[CONFIG_OOM_COUNT]; /* Linux oom_score_adj configuration */
|
||||
int oom_score_adj; /* If true, oom_score_adj is managed */
|
||||
/* Blocked clients */
|
||||
unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/
|
||||
unsigned int blocked_clients_by_type[BLOCKED_NUM];
|
||||
@ -2016,6 +2027,7 @@ const char *evictPolicyToString(void);
|
||||
struct redisMemOverhead *getMemoryOverheadData(void);
|
||||
void freeMemoryOverheadData(struct redisMemOverhead *mh);
|
||||
void checkChildrenDone(void);
|
||||
int setOOMScoreAdj(int process_class);
|
||||
|
||||
#define RESTART_SERVER_NONE 0
|
||||
#define RESTART_SERVER_GRACEFULLY (1<<0) /* Do proper shutdown. */
|
||||
|
@ -68,6 +68,7 @@ set ::all_tests {
|
||||
unit/pendingquerybuf
|
||||
unit/tls
|
||||
unit/tracking
|
||||
unit/oom-score-adj
|
||||
}
|
||||
# Index to the next test to run in the ::all_tests list.
|
||||
set ::next_test 0
|
||||
|
81
tests/unit/oom-score-adj.tcl
Normal file
81
tests/unit/oom-score-adj.tcl
Normal file
@ -0,0 +1,81 @@
|
||||
set system_name [string tolower [exec uname -s]]
|
||||
set user_id [exec id -u]
|
||||
|
||||
if {$system_name eq {linux}} {
|
||||
start_server {tags {"oom-score-adj"}} {
|
||||
proc get_oom_score_adj {{pid ""}} {
|
||||
if {$pid == ""} {
|
||||
set pid [srv 0 pid]
|
||||
}
|
||||
set fd [open "/proc/$pid/oom_score_adj" "r"]
|
||||
set val [gets $fd]
|
||||
close $fd
|
||||
|
||||
return $val
|
||||
}
|
||||
|
||||
proc get_child_pid {} {
|
||||
set pid [srv 0 pid]
|
||||
set fd [open "|ps --ppid $pid -o pid -h" "r"]
|
||||
set child_pid [string trim [read $fd]]
|
||||
close $fd
|
||||
|
||||
return $child_pid
|
||||
}
|
||||
|
||||
test {CONFIG SET oom-score-adj works as expected} {
|
||||
set base [get_oom_score_adj]
|
||||
|
||||
# Enable oom-score-adj, check defaults
|
||||
r config set oom-score-adj-values "10 20 30"
|
||||
r config set oom-score-adj yes
|
||||
|
||||
assert {[get_oom_score_adj] == [expr $base + 10]}
|
||||
|
||||
# Modify current class
|
||||
r config set oom-score-adj-values "15 20 30"
|
||||
assert {[get_oom_score_adj] == [expr $base + 15]}
|
||||
|
||||
# Check replica class
|
||||
r replicaof localhost 1
|
||||
assert {[get_oom_score_adj] == [expr $base + 20]}
|
||||
r replicaof no one
|
||||
assert {[get_oom_score_adj] == [expr $base + 15]}
|
||||
|
||||
# Check child process
|
||||
r set key-a value-a
|
||||
r config set rdb-key-save-delay 100000
|
||||
r bgsave
|
||||
|
||||
set child_pid [get_child_pid]
|
||||
assert {[get_oom_score_adj $child_pid] == [expr $base + 30]}
|
||||
}
|
||||
|
||||
# Failed oom-score-adj tests can only run unprivileged
|
||||
if {$user_id != 0} {
|
||||
test {CONFIG SET oom-score-adj handles configuration failures} {
|
||||
# Bad config
|
||||
r config set oom-score-adj no
|
||||
r config set oom-score-adj-values "-1000 -1000 -1000"
|
||||
|
||||
# Make sure it fails
|
||||
catch {r config set oom-score-adj yes} e
|
||||
assert_match {*Failed to set*} $e
|
||||
|
||||
# Make sure it remains off
|
||||
assert {[r config get oom-score-adj] == "oom-score-adj no"}
|
||||
|
||||
# Fix config
|
||||
r config set oom-score-adj-values "0 100 100"
|
||||
r config set oom-score-adj yes
|
||||
|
||||
# Make sure it fails
|
||||
catch {r config set oom-score-adj-values "-1000 -1000 -1000"} e
|
||||
assert_match {*Failed*} $e
|
||||
|
||||
# Make sure previous values remain
|
||||
assert {[r config get oom-score-adj-values] == {oom-score-adj-values {0 100 100}}}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user