From 5c9f6d4f55c321aa22a4a51f9046f762fbb1655d Mon Sep 17 00:00:00 2001 From: antirez Date: Fri, 3 May 2013 12:37:45 +0200 Subject: [PATCH] Cluster: link reconnection on delayed PONG reply. When the PONG delay is half the cluster node timeout, the link gets disconnected (and later automatically reconnected) in order to ensure that it's not just a dead connection issue. However this operation is only performed if the link is old enough, in order to avoid to disconnect the same link again and again (and among the other problems, never receive the PONG because of that). Note: when the link is reconnected, the 'ping_sent' field is not updated even if a new ping is sent using the new connection, so we can still reliably detect a node ping timeout. --- src/cluster.c | 6 +++--- src/redis.h | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/cluster.c b/src/cluster.c index 24c933740..426002980 100644 --- a/src/cluster.c +++ b/src/cluster.c @@ -268,6 +268,7 @@ void clusterInit(void) { clusterLink *createClusterLink(clusterNode *node) { clusterLink *link = zmalloc(sizeof(*link)); + link->ctime = time(NULL); link->sndbuf = sdsempty(); link->rcvbuf = sdsempty(); link->node = node; @@ -1596,21 +1597,20 @@ void clusterCron(void) { (REDIS_NODE_MYSELF|REDIS_NODE_NOADDR|REDIS_NODE_HANDSHAKE)) continue; -#if 0 /* If we are waiting for the PONG more than half the cluster * timeout, reconnect the link: maybe there is a connection * issue even if the node is alive. */ if (node->link && /* is connected */ + time(NULL) - node->link->ctime > + server.cluster_node_timeout && /* was not already reconnected */ node->ping_sent && /* we already sent a ping */ node->pong_received < node->ping_sent && /* still waiting pong */ /* and we are waiting for the pong more than timeout/2 */ now - node->ping_sent > server.cluster_node_timeout/2) { /* Disconnect the link, it will be reconnected automatically. */ - printf("DISCONNECT!\n"); freeClusterLink(node->link); } -#endif /* If we have currently no active ping in this instance, and the * received PONG is older than half the cluster timeout, send diff --git a/src/redis.h b/src/redis.h index 16e92c369..e0fe50ffb 100644 --- a/src/redis.h +++ b/src/redis.h @@ -538,6 +538,7 @@ struct clusterNode; /* clusterLink encapsulates everything needed to talk with a remote node. */ typedef struct clusterLink { + time_t ctime; /* Link creation time */ int fd; /* TCP socket file descriptor */ sds sndbuf; /* Packet send buffer */ sds rcvbuf; /* Packet reception buffer */