From 747be463d2f825c1e0b620ef2b120a0695121d8a Mon Sep 17 00:00:00 2001 From: antirez Date: Thu, 10 Oct 2019 10:23:34 +0200 Subject: [PATCH] Cluster: fix memory leak of cached master. This is what happened: 1. Instance starts, is a slave in the cluster configuration, but actually server.masterhost is not set, so technically the instance is acting like a master. 2. loadDataFromDisk() calls replicationCacheMasterUsingMyself() even if the instance is a master, in the case it is logically a slave and the cluster is enabled. So now we have a cached master even if the instance is practically configured as a master (from the POV of server.masterhost value and so forth). 3. clusterCron() sees that the instance requires to replicate from its master, because logically it is a slave, so it calls replicationSetMaster() that will in turn call replicationCacheMasterUsingMyself(): before this commit, this call would overwrite the old cached master, creating a memory leak. --- src/replication.c | 5 ++++- src/server.c | 8 +++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/replication.c b/src/replication.c index 76090a9e5..081212761 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2182,7 +2182,10 @@ void replicationSetMaster(char *ip, int port) { cancelReplicationHandshake(); /* Before destroying our master state, create a cached master using * our own parameters, to later PSYNC with the new master. */ - if (was_master) replicationCacheMasterUsingMyself(); + if (was_master) { + replicationDiscardCachedMaster(); + replicationCacheMasterUsingMyself(); + } server.repl_state = REPL_STATE_CONNECT; } diff --git a/src/server.c b/src/server.c index e089865f8..9392ffb8e 100644 --- a/src/server.c +++ b/src/server.c @@ -4719,12 +4719,14 @@ void loadDataFromDisk(void) { (float)(ustime()-start)/1000000); /* Restore the replication ID / offset from the RDB file. */ - if ((server.masterhost || (server.cluster_enabled && nodeIsSlave(server.cluster->myself)))&& + if ((server.masterhost || + (server.cluster_enabled && + nodeIsSlave(server.cluster->myself))) && rsi.repl_id_is_set && rsi.repl_offset != -1 && /* Note that older implementations may save a repl_stream_db - * of -1 inside the RDB file in a wrong way, see more information - * in function rdbPopulateSaveInfo. */ + * of -1 inside the RDB file in a wrong way, see more + * information in function rdbPopulateSaveInfo. */ rsi.repl_stream_db != -1) { memcpy(server.replid,rsi.repl_id,sizeof(server.replid));