PSYNC2: reorganize comments related to recent fixes.

Related to PR #4412 and issue #4407.
2025-01-22 16:18:28 -05:00 · 2017-11-24 11:08:22 +01:00 · 2017-11-24 11:08:22 +01:00 · 4d063bb6ba
commit 4d063bb6ba
parent 9d86ae4597
2 changed files with 24 additions and 26 deletions
--- a/src/rdb.c
+++ b/src/rdb.c
@ -2039,19 +2039,16 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
     * only when repl_backlog is not NULL. If the repl_backlog is NULL,
     * it means that the instance isn't in any replication chains. In this
     * scenario the replication info is useless, because when a slave
-     * connect to us, the NULL repl_backlog will trigger a full synchronization,
-     * at the same time we will use a new replid and clear replid2.
-     * And remember that after free backlog if we reach repl_backlog_time_limit,
-     * we will use a new replid and clear replid2 too. So there is only one
-     * scenario which can make repl_stream_db be -1, that is the instance is
-     * a master, and it have repl_backlog, but server.slaveseldb is -1. */
+     * connects to us, the NULL repl_backlog will trigger a full
+     * synchronization, at the same time we will use a new replid and clear
+     * replid2. */
    if (!server.masterhost && server.repl_backlog) {
-        rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
        /* Note that when server.slaveseldb is -1, it means that this master
         * didn't apply any write commands after a full synchronization.
         * So we can let repl_stream_db be 0, this allows a restarted slave
         * to reload replication ID/offset, it's safe because the next write
         * command must generate a SELECT statement. */
+        rsi->repl_stream_db = server.slaveseldb == -1 ? 0 : server.slaveseldb;
        return rsi;
    }

@ -2061,16 +2058,12 @@ rdbSaveInfo *rdbPopulateSaveInfo(rdbSaveInfo *rsi) {
        rsi->repl_stream_db = server.master->db->id;
        return rsi;
    }
-    /* It is useful to persist cached_master's db id inside RDB file.
-     * When a slave lost master's connection, server.master will be
-     * cached as server.cached_master, after that a slave can not
-     * increment the master_repl_offset because slave only apply data
-     * from connected master, so the cached_master can hold right
-     * replication info. But please note that this action is safe
-     * only after we fix the free backlog problem, because when a master
-     * turn to be a slave, it will use itself as the server.cached_master,
-     * that is dangerous if we didn't use a new replication ID after
-     * free backlog. */
+
+    /* If we have a cached master we can use it in order to populate the
+     * replication selected DB info inside the RDB file: the slave can
+     * increment the master_repl_offset only from data arriving from the
+     * master, so if we are disconnected the offset in the cached master
+     * is valid. */
    if (server.cached_master) {
        rsi->repl_stream_db = server.cached_master->db->id;
        return rsi;
--- a/src/replication.c
+++ b/src/replication.c
@ -2614,15 +2614,20 @@ void replicationCron(void) {

        if (idle > server.repl_backlog_time_limit) {
            /* When we free the backlog, we always use a new
-             * replication ID and clear the ID2. Since without
-             * backlog we can not increment master_repl_offset
-             * even do write commands, that may lead to inconsistency
-             * when we try to connect a "slave-before" master
-             * (if this master is our slave before, our replid
-             * equals the master's replid2). As the master have our
-             * history, so we can match the master's replid2 and
-             * second_replid_offset, that make partial sync work,
-             * but the data is inconsistent. */
+             * replication ID and clear the ID2. This is needed
+             * because when there is no backlog, the master_repl_offset
+             * is not updated, but we would still retain our replication
+             * ID, leading to the following problem:
+             *
+             * 1. We are a master instance.
+             * 2. Our slave is promoted to master. It's repl-id-2 will
+             *    be the same as our repl-id.
+             * 3. We, yet as master, receive some updates, that will not
+             *    increment the master_repl_offset.
+             * 4. Later we are turned into a slave, connecto to the new
+             *    master that will accept our PSYNC request by second
+             *    replication ID, but there will be data inconsistency
+             *    because we received writes. */
            changeReplicationId();
            clearReplicationId2();
            freeReplicationBacklog();