From 2a232dfa9a9e00a5abcb4a0f02bf5db55590f17e Mon Sep 17 00:00:00 2001 From: antirez Date: Tue, 8 Jul 2014 11:31:46 +0200 Subject: [PATCH] LATENCY DOCTOR: initial draft and events summary output. --- src/latency.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++- src/latency.h | 1 + src/redis-cli.c | 4 ++- 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/src/latency.c b/src/latency.c index 185003c7d..6fdf204a1 100644 --- a/src/latency.c +++ b/src/latency.c @@ -140,6 +140,7 @@ void analyzeLatencyForEvent(char *event, struct latencyStats *ls) { ls->max = 0; ls->mad = 0; ls->samples = 0; + ls->period = 0; if (!ts) return; /* First pass, populate everything but the MAD. */ @@ -156,8 +157,20 @@ void analyzeLatencyForEvent(char *event, struct latencyStats *ls) { ls->max = ts->samples[j].latency; } sum += ts->samples[j].latency; + + /* Track the oldest event time in ls->period. */ + if (ls->period == 0 || ts->samples[j].time < ls->period) + ls->period = ts->samples[j].time; + } + + /* So far avg is actually the sum of the latencies, and period is + * the oldest event time. We need to make the first an average and + * the second a range of seconds. */ + if (ls->samples) { + ls->avg = sum / ls->samples; + ls->period = time(NULL) - ls->period; + if (ls->period == 0) ls->period = 1; } - if (ls->samples) ls->avg = sum / ls->samples; /* Second pass, compute MAD. */ sum = 0; @@ -172,6 +185,68 @@ void analyzeLatencyForEvent(char *event, struct latencyStats *ls) { if (ls->samples) ls->mad = sum / ls->samples; } +/* Create a human readable report of latency events for this Redis instance. */ +sds createLatencyReport(void) { + sds report = sdsempty(); + + /* Return ASAP if the latency engine is disabled and it looks like it + * was never enabled so far. */ + if (dictSize(server.latency_events) == 0 && + server.latency_monitor_threshold == 0) + { + report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold .\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at http://redis.io/topics/latency-monitor."); + return report; + } + + /* Show all the events stats and add for each event some event-related + * comment depending on the values. */ + dictIterator *di; + dictEntry *de; + int eventnum = 0; + + di = dictGetIterator(server.latency_events); + while((de = dictNext(di)) != NULL) { + char *event = dictGetKey(de); + struct latencyTimeSeries *ts = dictGetVal(de); + struct latencyStats ls; + int nontrivial, severe; + + if (ts == NULL) continue; + eventnum++; + if (eventnum == 1) { + report = sdscat(report,"Dave, I have observed latency spikes in this Redis instance. You don't mind talking about it, do you Dave?\n\n"); + } + analyzeLatencyForEvent(event,&ls); + + report = sdscatprintf(report, + "%d. %s: %d latency spikes (average %lums, mean deviation %lums, period %lu sec). Worst all time event %lums.", + eventnum, event, + ls.samples, + (unsigned long) ls.avg, + (unsigned long) ls.mad, + (unsigned long) ls.period/ls.samples, + (unsigned long) ts->max); + + /* Add some event specific information. */ + nontrivial = ts->max > 100; + severe = ts->max > 500; + + /* Fork */ + if (!strcasecmp(event,"fork")) { + double fork_bandwidth = (double) zmalloc_used_memory() * 1000000 / server.stat_fork_time / (1024*1024); /* GB per second. */ + } + + report = sdscatlen(report,"\n",1); + } + dictReleaseIterator(di); + + if (eventnum == 0) { + report = sdscat(report,"Dave, no latency spike was observed during the lifetime of this Redis instance, not in the slightest bit. I honestly think you ought to sit down calmly, take a stress pill, and think things over.\n"); + } + + return report; +} + /* ---------------------- Latency command implementation -------------------- */ /* latencyCommand() helper to produce a time-delay reply for all the samples @@ -293,6 +368,12 @@ void latencyCommand(redisClient *c) { } else if (!strcasecmp(c->argv[1]->ptr,"latest") && c->argc == 2) { /* LATENCY LATEST */ latencyCommandReplyWithLatestEvents(c); + } else if (!strcasecmp(c->argv[1]->ptr,"doctor") && c->argc == 2) { + /* LATENCY DOCTOR */ + sds report = createLatencyReport(); + + addReplyBulkCBuffer(c,report,sdslen(report)); + sdsfree(report); } else if (!strcasecmp(c->argv[1]->ptr,"reset") && c->argc >= 2) { /* LATENCY RESET */ if (c->argc == 2) { diff --git a/src/latency.h b/src/latency.h index f28fce375..6ddbe0410 100644 --- a/src/latency.h +++ b/src/latency.h @@ -58,6 +58,7 @@ struct latencyStats { uint32_t max; /* Max of current samples. */ uint32_t mad; /* Mean absolute deviation. */ uint32_t samples; /* Number of non-zero samples. */ + time_t period; /* Number of seconds since first event and now. */ }; void latencyMonitorInit(void); diff --git a/src/redis-cli.c b/src/redis-cli.c index 782958b26..907e62b74 100644 --- a/src/redis-cli.c +++ b/src/redis-cli.c @@ -601,7 +601,9 @@ static int cliSendCommand(int argc, char **argv, int repeat) { (argc == 2 && !strcasecmp(command,"client") && !strcasecmp(argv[1],"list")) || (argc == 3 && !strcasecmp(command,"latency") && - !strcasecmp(argv[1],"graph"))) + !strcasecmp(argv[1],"graph")) || + (argc == 2 && !strcasecmp(command,"latency") && + !strcasecmp(argv[1],"doctor"))) { output_raw = 1; }