From 271733f4f83552acc52a8baba4ae3fa7bd6b4ba0 Mon Sep 17 00:00:00 2001 From: antirez Date: Sat, 15 Apr 2017 10:08:39 +0200 Subject: [PATCH] Cluster: discard pong times in the future. However we allow for 500 milliseconds of tolerance, in order to avoid often discarding semantically valid info (the node is up) because of natural few milliseconds desync among servers even when NTP is used. Note that anyway we should ping the node from time to time regardless and discover if it's actually down from our point of view, since no update is accepted while we have an active ping on the node. Related to #3929. --- src/cluster.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/cluster.c b/src/cluster.c index b23160b90..d5ad85fe7 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -1365,7 +1365,14 @@ void clusterProcessGossipSection(clusterMsg *hdr, clusterLink *link) { { mstime_t pongtime = ntohl(g->pong_received); pongtime *= 1000; /* Convert back to milliseconds. */ - if (pongtime > node->pong_received) { + + /* Replace the pong time with the received one only if + * it's greater than our view but is not in the future + * (with 500 milliseconds tolerance) from the POV of our + * clock. */ + if (pongtime <= (server.mstime+500) && + pongtime > node->pong_received) + { node->pong_received = pongtime; } }