diff --git a/redis.conf b/redis.conf index d4e3e47f0..8440b4750 100644 --- a/redis.conf +++ b/redis.conf @@ -269,6 +269,16 @@ logfile "" # Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. # syslog-facility local0 +# To disable the built in crash log, which will possibly produce cleaner core +# dumps when they are needed, uncomment the following: +# +# crash-log-enabled no + +# To disable the fast memory check that's run as part of the crash log, which +# will possibly let redis terminate sooner, uncomment the following: +# +# crash-memcheck-enabled no + # Set the number of databases. The default database is DB 0, you can select # a different one on a per-connection basis using SELECT where # dbid is a number between 0 and 'databases'-1 diff --git a/src/config.c b/src/config.c index a68fddf1d..52a859a77 100644 --- a/src/config.c +++ b/src/config.c @@ -2043,6 +2043,16 @@ static int updateAppendonly(int val, int prev, char **err) { return 1; } +static int updateSighandlerEnabled(int val, int prev, char **err) { + UNUSED(err); + UNUSED(prev); + if (val) + setupSignalHandlers(); + else + removeSignalHandlers(); + return 1; +} + static int updateMaxclients(long long val, long long prev, char **err) { /* Try to check if the OS is capable of supporting so many FDs. */ if (val > prev) { @@ -2132,7 +2142,9 @@ standardConfig configs[] = { createBoolConfig("cluster-enabled", NULL, IMMUTABLE_CONFIG, server.cluster_enabled, 0, NULL, NULL), createBoolConfig("appendonly", NULL, MODIFIABLE_CONFIG, server.aof_enabled, 0, NULL, updateAppendonly), createBoolConfig("cluster-allow-reads-when-down", NULL, MODIFIABLE_CONFIG, server.cluster_allow_reads_when_down, 0, NULL, NULL), - + createBoolConfig("crash-log-enabled", NULL, MODIFIABLE_CONFIG, server.crashlog_enabled, 1, NULL, updateSighandlerEnabled), + createBoolConfig("crash-memcheck-enabled", NULL, MODIFIABLE_CONFIG, server.memcheck_enabled, 1, NULL, NULL), + createBoolConfig("use-exit-on-panic", NULL, MODIFIABLE_CONFIG, server.use_exit_on_panic, 0, NULL, NULL), /* String Configs */ createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL), diff --git a/src/debug.c b/src/debug.c index 0bea69876..3b40e685b 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,5 +1,6 @@ /* - * Copyright (c) 2009-2012, Salvatore Sanfilippo + * Copyright (c) 2009-2020, Salvatore Sanfilippo + * Copyright (c) 2020, Redis Labs, Inc * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,10 +31,13 @@ #include "server.h" #include "sha1.h" /* SHA1 is used for DEBUG DIGEST */ #include "crc64.h" +#include "bio.h" #include #include #include +#include +#include #ifdef HAVE_BACKTRACE #include @@ -42,9 +46,6 @@ #else typedef ucontext_t sigcontext_t; #endif -#include -#include "bio.h" -#include #endif /* HAVE_BACKTRACE */ #ifdef __CYGWIN__ @@ -53,6 +54,15 @@ typedef ucontext_t sigcontext_t; #endif #endif +/* Globals */ +static int bug_report_start = 0; /* True if bug report header was already logged. */ + +/* Forward declarations */ +void bugReportStart(void); +void printCrashReport(void); +void bugReportEnd(int killViaSignal, int sig); +void logStackTrace(void *eip, int uplevel); + /* ================================= Debugging ============================== */ /* Compute the sha1 of string at 's' with 'len' bytes long. @@ -814,13 +824,14 @@ void _serverAssert(const char *estr, const char *file, int line) { bugReportStart(); serverLog(LL_WARNING,"=== ASSERTION FAILED ==="); serverLog(LL_WARNING,"==> %s:%d '%s' is not true",file,line,estr); + + if (server.crashlog_enabled) { #ifdef HAVE_BACKTRACE - server.assert_failed = estr; - server.assert_file = file; - server.assert_line = line; - serverLog(LL_WARNING,"(forcing SIGSEGV to print the bug report.)"); + logStackTrace(NULL, 1); #endif - *((char*)-1) = 'x'; + printCrashReport(); + } + bugReportEnd(0, 0); } void _serverAssertPrintClientInfo(const client *c) { @@ -897,18 +908,21 @@ void _serverPanic(const char *file, int line, const char *msg, ...) { serverLog(LL_WARNING,"------------------------------------------------"); serverLog(LL_WARNING,"!!! Software Failure. Press left mouse button to continue"); serverLog(LL_WARNING,"Guru Meditation: %s #%s:%d",fmtmsg,file,line); + + if (server.crashlog_enabled) { #ifdef HAVE_BACKTRACE - serverLog(LL_WARNING,"(forcing SIGSEGV in order to print the stack trace)"); + logStackTrace(NULL, 1); #endif - serverLog(LL_WARNING,"------------------------------------------------"); - *((char*)-1) = 'x'; + printCrashReport(); + } + bugReportEnd(0, 0); } void bugReportStart(void) { - if (server.bug_report_start == 0) { + if (bug_report_start == 0) { serverLogRaw(LL_WARNING|LL_RAW, "\n\n=== REDIS BUG REPORT START: Cut & paste starting from here ===\n"); - server.bug_report_start = 1; + bug_report_start = 1; } } @@ -980,6 +994,7 @@ void logStackContent(void **sp) { } } +/* Log dump of processor registers */ void logRegisters(ucontext_t *uc) { serverLog(LL_WARNING|LL_RAW, "\n------ REGISTERS ------\n"); @@ -1352,6 +1367,8 @@ void logRegisters(ucontext_t *uc) { #endif } +#endif /* HAVE_BACKTRACE */ + /* Return a file descriptor to write directly to the Redis log with the * write(2) syscall, that can be used in critical sections of the code * where the rest of Redis can't be trusted (for example during the memory @@ -1372,33 +1389,66 @@ void closeDirectLogFiledes(int fd) { if (!log_to_stdout) close(fd); } +#ifdef HAVE_BACKTRACE + /* Logs the stack trace using the backtrace() call. This function is designed - * to be called from signal handlers safely. */ -void logStackTrace(ucontext_t *uc) { - void *trace[101]; + * to be called from signal handlers safely. + * The eip argument is optional (can take NULL). + * The uplevel argument indicates how many of the calling functions to skip. + */ +void logStackTrace(void *eip, int uplevel) { + void *trace[100]; int trace_size = 0, fd = openDirectLogFiledes(); + char *msg; + uplevel++; /* skip this function */ if (fd == -1) return; /* If we can't log there is anything to do. */ - /* Generate the stack trace */ - trace_size = backtrace(trace+1, 100); + /* Get the stack trace first! */ + trace_size = backtrace(trace, 100); - if (getMcontextEip(uc) != NULL) { - char *msg1 = "EIP:\n"; - char *msg2 = "\nBacktrace:\n"; - if (write(fd,msg1,strlen(msg1)) == -1) {/* Avoid warning. */}; - trace[0] = getMcontextEip(uc); - backtrace_symbols_fd(trace, 1, fd); - if (write(fd,msg2,strlen(msg2)) == -1) {/* Avoid warning. */}; + msg = "\n------ STACK TRACE ------\n"; + if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; + + if (eip) { + /* Write EIP to the log file*/ + msg = "EIP:\n"; + if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; + backtrace_symbols_fd(&eip, 1, fd); } /* Write symbols to log file */ - backtrace_symbols_fd(trace+1, trace_size, fd); + msg = "\nBacktrace:\n"; + if (write(fd,msg,strlen(msg)) == -1) {/* Avoid warning. */}; + backtrace_symbols_fd(trace+uplevel, trace_size-uplevel, fd); /* Cleanup */ closeDirectLogFiledes(fd); } +#endif /* HAVE_BACKTRACE */ + +/* Log global server info */ +void logServerInfo(void) { + sds infostring, clients; + serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n"); + infostring = genRedisInfoString("all"); + serverLogRaw(LL_WARNING|LL_RAW, infostring); + serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n"); + clients = getAllClientsInfoString(-1); + serverLogRaw(LL_WARNING|LL_RAW, clients); + sdsfree(infostring); + sdsfree(clients); +} + +/* Log modules info. Something we wanna do last since we fear it may crash. */ +void logModulesInfo(void) { + serverLogRaw(LL_WARNING|LL_RAW, "\n------ MODULES INFO OUTPUT ------\n"); + sds infostring = modulesCollectInfo(sdsempty(), NULL, 1, 0); + serverLogRaw(LL_WARNING|LL_RAW, infostring); + sdsfree(infostring); +} + /* Log information about the "current" client, that is, the client that is * currently being served by Redis. May be NULL if Redis is not serving a * client right now. */ @@ -1505,6 +1555,23 @@ int memtest_test_linux_anonymous_maps(void) { } #endif +void doFastMemoryTest(void) { +#if defined(HAVE_PROC_MAPS) + if (server.memcheck_enabled) { + /* Test memory */ + serverLogRaw(LL_WARNING|LL_RAW, "\n------ FAST MEMORY TEST ------\n"); + bioKillThreads(); + if (memtest_test_linux_anonymous_maps()) { + serverLogRaw(LL_WARNING|LL_RAW, + "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n"); + } else { + serverLogRaw(LL_WARNING|LL_RAW, + "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n"); + } + } +#endif +} + /* Scans the (assumed) x86 code starting at addr, for a max of `len` * bytes, searching for E8 (callq) opcodes, and dumping the symbols * and the call offset if they appear to be valid. */ @@ -1531,95 +1598,86 @@ void dumpX86Calls(void *addr, size_t len) { } } +void dumpCodeAroundEIP(void *eip) { + Dl_info info; + if (dladdr(eip, &info) != 0) { + serverLog(LL_WARNING|LL_RAW, + "\n------ DUMPING CODE AROUND EIP ------\n" + "Symbol: %s (base: %p)\n" + "Module: %s (base %p)\n" + "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n" + "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n" + "------\n", + info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase, + info.dli_saddr); + size_t len = (long)eip - (long)info.dli_saddr; + unsigned long sz = sysconf(_SC_PAGESIZE); + if (len < 1<<13) { /* we don't have functions over 8k (verified) */ + /* Find the address of the next page, which is our "safety" + * limit when dumping. Then try to dump just 128 bytes more + * than EIP if there is room, or stop sooner. */ + unsigned long next = ((unsigned long)eip + sz) & ~(sz-1); + unsigned long end = (unsigned long)eip + 128; + if (end > next) end = next; + len = end - (unsigned long)info.dli_saddr; + serverLogHexDump(LL_WARNING, "dump of function", + info.dli_saddr ,len); + dumpX86Calls(info.dli_saddr,len); + } + } +} + void sigsegvHandler(int sig, siginfo_t *info, void *secret) { - ucontext_t *uc = (ucontext_t*) secret; - void *eip = getMcontextEip(uc); - sds infostring, clients; - struct sigaction act; + UNUSED(secret); UNUSED(info); bugReportStart(); serverLog(LL_WARNING, "Redis %s crashed by signal: %d", REDIS_VERSION, sig); - if (eip != NULL) { - serverLog(LL_WARNING, - "Crashed running the instruction at: %p", eip); - } if (sig == SIGSEGV || sig == SIGBUS) { serverLog(LL_WARNING, "Accessing address: %p", (void*)info->si_addr); } - serverLog(LL_WARNING, - "Failed assertion: %s (%s:%d)", server.assert_failed, - server.assert_file, server.assert_line); - /* Log the stack trace */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ STACK TRACE ------\n"); - logStackTrace(uc); +#ifdef HAVE_BACKTRACE + ucontext_t *uc = (ucontext_t*) secret; + void *eip = getMcontextEip(uc); + if (eip != NULL) { + serverLog(LL_WARNING, + "Crashed running the instruction at: %p", eip); + } + logStackTrace(getMcontextEip(uc), 1); + + logRegisters(uc); +#endif + + printCrashReport(); + +#ifdef HAVE_BACKTRACE + if (eip != NULL) + dumpCodeAroundEIP(eip); +#endif + + bugReportEnd(1, sig); +} + +void printCrashReport(void) { /* Log INFO and CLIENT LIST */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n"); - infostring = genRedisInfoString("all"); - serverLogRaw(LL_WARNING|LL_RAW, infostring); - serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n"); - clients = getAllClientsInfoString(-1); - serverLogRaw(LL_WARNING|LL_RAW, clients); - sdsfree(infostring); - sdsfree(clients); + logServerInfo(); /* Log the current client */ logCurrentClient(); - /* Log dump of processor registers */ - logRegisters(uc); + /* Log modules info. Something we wanna do last since we fear it may crash. */ + logModulesInfo(); - /* Log Modules INFO */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ MODULES INFO OUTPUT ------\n"); - infostring = modulesCollectInfo(sdsempty(), NULL, 1, 0); - serverLogRaw(LL_WARNING|LL_RAW, infostring); - sdsfree(infostring); + /* Run memory test in case the crash was triggered by memory corruption. */ + doFastMemoryTest(); +} -#if defined(HAVE_PROC_MAPS) - /* Test memory */ - serverLogRaw(LL_WARNING|LL_RAW, "\n------ FAST MEMORY TEST ------\n"); - bioKillThreads(); - if (memtest_test_linux_anonymous_maps()) { - serverLogRaw(LL_WARNING|LL_RAW, - "!!! MEMORY ERROR DETECTED! Check your memory ASAP !!!\n"); - } else { - serverLogRaw(LL_WARNING|LL_RAW, - "Fast memory test PASSED, however your memory can still be broken. Please run a memory test for several hours if possible.\n"); - } -#endif - - if (eip != NULL) { - Dl_info info; - if (dladdr(eip, &info) != 0) { - serverLog(LL_WARNING|LL_RAW, - "\n------ DUMPING CODE AROUND EIP ------\n" - "Symbol: %s (base: %p)\n" - "Module: %s (base %p)\n" - "$ xxd -r -p /tmp/dump.hex /tmp/dump.bin\n" - "$ objdump --adjust-vma=%p -D -b binary -m i386:x86-64 /tmp/dump.bin\n" - "------\n", - info.dli_sname, info.dli_saddr, info.dli_fname, info.dli_fbase, - info.dli_saddr); - size_t len = (long)eip - (long)info.dli_saddr; - unsigned long sz = sysconf(_SC_PAGESIZE); - if (len < 1<<13) { /* we don't have functions over 8k (verified) */ - /* Find the address of the next page, which is our "safety" - * limit when dumping. Then try to dump just 128 bytes more - * than EIP if there is room, or stop sooner. */ - unsigned long next = ((unsigned long)eip + sz) & ~(sz-1); - unsigned long end = (unsigned long)eip + 128; - if (end > next) end = next; - len = end - (unsigned long)info.dli_saddr; - serverLogHexDump(LL_WARNING, "dump of function", - info.dli_saddr ,len); - dumpX86Calls(info.dli_saddr,len); - } - } - } +void bugReportEnd(int killViaSignal, int sig) { + struct sigaction act; serverLogRaw(LL_WARNING|LL_RAW, "\n=== REDIS BUG REPORT END. Make sure to include from START to END. ===\n\n" @@ -1631,6 +1689,12 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { /* free(messages); Don't call free() with possibly corrupted memory. */ if (server.daemonize && server.supervised == 0) unlink(server.pidfile); + if (!killViaSignal) { + if (server.use_exit_on_panic) + exit(1); + abort(); + } + /* Make sure we exit with the right signal at the end. So for instance * the core will be dumped if enabled. */ sigemptyset (&act.sa_mask); @@ -1639,7 +1703,6 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) { sigaction (sig, &act, NULL); kill(getpid(),sig); } -#endif /* HAVE_BACKTRACE */ /* ==================== Logging functions for debugging ===================== */ @@ -1679,7 +1742,7 @@ void watchdogSignalHandler(int sig, siginfo_t *info, void *secret) { serverLogFromHandler(LL_WARNING,"\n--- WATCHDOG TIMER EXPIRED ---"); #ifdef HAVE_BACKTRACE - logStackTrace(uc); + logStackTrace(getMcontextEip(uc), 1); #else serverLogFromHandler(LL_WARNING,"Sorry: no support for backtrace()."); #endif diff --git a/src/server.c b/src/server.c index 3b45bea59..45db1cbcf 100644 --- a/src/server.c +++ b/src/server.c @@ -2442,10 +2442,6 @@ void initServerConfig(void) { server.rpoplpushCommand = lookupCommandByCString("rpoplpush"); /* Debugging */ - server.assert_failed = ""; - server.assert_file = ""; - server.assert_line = 0; - server.bug_report_start = 0; server.watchdog_period = 0; /* By default we want scripts to be always replicated by effects @@ -4812,16 +4808,27 @@ void setupSignalHandlers(void) { sigaction(SIGTERM, &act, NULL); sigaction(SIGINT, &act, NULL); -#ifdef HAVE_BACKTRACE sigemptyset(&act.sa_mask); act.sa_flags = SA_NODEFER | SA_RESETHAND | SA_SIGINFO; act.sa_sigaction = sigsegvHandler; + if(server.crashlog_enabled) { + sigaction(SIGSEGV, &act, NULL); + sigaction(SIGBUS, &act, NULL); + sigaction(SIGFPE, &act, NULL); + sigaction(SIGILL, &act, NULL); + } + return; +} + +void removeSignalHandlers(void) { + struct sigaction act; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_NODEFER | SA_RESETHAND; + act.sa_handler = SIG_DFL; sigaction(SIGSEGV, &act, NULL); sigaction(SIGBUS, &act, NULL); sigaction(SIGFPE, &act, NULL); sigaction(SIGILL, &act, NULL); -#endif - return; } /* This is the signal handler for children process. It is currently useful diff --git a/src/server.h b/src/server.h index 018dd4607..efb7d2ee8 100644 --- a/src/server.h +++ b/src/server.h @@ -1274,6 +1274,11 @@ struct redisServer { int syslog_enabled; /* Is syslog enabled? */ char *syslog_ident; /* Syslog ident */ int syslog_facility; /* Syslog facility */ + int crashlog_enabled; /* Enable signal handler for crashlog. + * disable for clean core dumps. */ + int memcheck_enabled; /* Enable memory check on crash. */ + int use_exit_on_panic; /* Use exit() on panic and assert rather than + * abort(). useful for Valgrind. */ /* Replication (master) */ char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */ char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/ @@ -1439,10 +1444,6 @@ struct redisServer { old "requirepass" directive for backward compatibility with Redis <= 5. */ /* Assert & bug reporting */ - const char *assert_failed; - const char *assert_file; - int assert_line; - int bug_report_start; /* True if bug report header was already logged. */ int watchdog_period; /* Software watchdog period in ms. 0 = off */ /* System hardware info */ size_t system_memory_size; /* Total memory in system as reported by OS */ @@ -1975,6 +1976,7 @@ int freeMemoryIfNeeded(void); int freeMemoryIfNeededAndSafe(void); int processCommand(client *c); void setupSignalHandlers(void); +void removeSignalHandlers(void); struct redisCommand *lookupCommand(sds name); struct redisCommand *lookupCommandByCString(char *s); struct redisCommand *lookupCommandOrOriginal(sds name); @@ -2432,7 +2434,6 @@ void *realloc(void *ptr, size_t size) __attribute__ ((deprecated)); void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line); void _serverAssert(const char *estr, const char *file, int line); void _serverPanic(const char *file, int line, const char *msg, ...); -void bugReportStart(void); void serverLogObjectDebugInfo(const robj *o); void sigsegvHandler(int sig, siginfo_t *info, void *secret); sds genRedisInfoString(const char *section);