From 81b18fa3a0926b60a59083eee144cbf3d0e2fd64 Mon Sep 17 00:00:00 2001 From: antirez Date: Mon, 8 Jul 2019 18:32:47 +0200 Subject: [PATCH] Diskless replica: a few aesthetic changes to replication.c. --- src/replication.c | 126 ++++++++++++++++++++++++++++++++-------------- src/rio.c | 12 +++-- 2 files changed, 96 insertions(+), 42 deletions(-) diff --git a/src/replication.c b/src/replication.c index e2bac08bd..a7c1c0d6a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1127,7 +1127,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { ssize_t nread, readlen, nwritten; int use_diskless_load; redisDb *diskless_load_backup = NULL; - int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : EMPTYDB_NO_FLAGS; + int empty_db_flags = server.repl_slave_lazy_flush ? EMPTYDB_ASYNC : + EMPTYDB_NO_FLAGS; int i; off_t left; UNUSED(el); @@ -1199,8 +1200,8 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { use_diskless_load = useDisklessLoad(); if (!use_diskless_load) { - - /* read the data from the socket, store it to a file and search for the EOF */ + /* Read the data from the socket, store it to a file and search + * for the EOF. */ if (usemark) { readlen = sizeof(buf); } else { @@ -1222,20 +1223,28 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { int eof_reached = 0; if (usemark) { - /* Update the last bytes array, and check if it matches our delimiter.*/ + /* Update the last bytes array, and check if it matches our + * delimiter. */ if (nread >= CONFIG_RUN_ID_SIZE) { - memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE,CONFIG_RUN_ID_SIZE); + memcpy(lastbytes,buf+nread-CONFIG_RUN_ID_SIZE, + CONFIG_RUN_ID_SIZE); } else { int rem = CONFIG_RUN_ID_SIZE-nread; memmove(lastbytes,lastbytes+nread,rem); memcpy(lastbytes+rem,buf,nread); } - if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) eof_reached = 1; + if (memcmp(lastbytes,eofmark,CONFIG_RUN_ID_SIZE) == 0) + eof_reached = 1; } + /* Update the last I/O time for the replication transfer (used in + * order to detect timeouts during replication), and write what we + * got from the socket to the dump file on disk. */ server.repl_transfer_lastio = server.unixtime; if ((nwritten = write(server.repl_transfer_fd,buf,nread)) != nread) { - serverLog(LL_WARNING,"Write error or short write writing to the DB dump file needed for MASTER <-> REPLICA synchronization: %s", + serverLog(LL_WARNING, + "Write error or short write writing to the DB dump file " + "needed for MASTER <-> REPLICA synchronization: %s", (nwritten == -1) ? strerror(errno) : "short write"); goto error; } @@ -1246,14 +1255,16 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (ftruncate(server.repl_transfer_fd, server.repl_transfer_read - CONFIG_RUN_ID_SIZE) == -1) { - serverLog(LL_WARNING,"Error truncating the RDB file received from the master for SYNC: %s", strerror(errno)); + serverLog(LL_WARNING, + "Error truncating the RDB file received from the master " + "for SYNC: %s", strerror(errno)); goto error; } } - /* Sync data on disk from time to time, otherwise at the end of the transfer - * we may suffer a big delay as the memory buffers are copied into the - * actual disk. */ + /* Sync data on disk from time to time, otherwise at the end of the + * transfer we may suffer a big delay as the memory buffers are copied + * into the actual disk. */ if (server.repl_transfer_read >= server.repl_transfer_last_fsync_off + REPL_MAX_WRITTEN_BEFORE_FSYNC) { @@ -1269,19 +1280,34 @@ void readSyncBulkPayload(aeEventLoop *el, int fd, void *privdata, int mask) { if (server.repl_transfer_read == server.repl_transfer_size) eof_reached = 1; } - if (!eof_reached) - return; + + /* If the transfer is yet not complete, we need to read more, so + * return ASAP and wait for the handler to be called again. */ + if (!eof_reached) return; } - /* We reach here when the slave is using diskless replication, - * or when we are done reading from the socket to the rdb file. */ + /* We reach this point in one of the following cases: + * + * 1. The replica is using diskless replication, that is, it reads data + * directly from the socket to the Redis memory, without using + * a temporary RDB file on disk. In that case we just block and + * read everything from the socket. + * + * 2. Or when we are done reading from the socket to the RDB file, in + * such case we want just to read the RDB file in memory. */ serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Flushing old data"); - /* We need to stop any AOFRW fork before flusing and parsing - * RDB, otherwise we'll create a copy-on-write disaster. */ + + /* We need to stop any AOF rewriting child before flusing and parsing + * the RDB, otherwise we'll create a copy-on-write disaster. */ if (server.aof_state != AOF_OFF) stopAppendOnly(); signalFlushedDb(-1); - if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { - /* create a backup of the current db */ + + /* When diskless RDB loading is used by replicas, it may be configured + * in order to save the current DB instead of throwing it away, + * so that we can restore it in case of failed transfer. */ + if (use_diskless_load && + server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) + { diskless_load_backup = zmalloc(sizeof(redisDb)*server.dbnum); for (i=0; i REPLICA synchronization: %s", + serverLog(LL_WARNING, + "Failed trying to rename the temp DB into %s in " + "MASTER <-> REPLICA synchronization: %s", server.rdb_filename, strerror(errno)); cancelReplicationHandshake(); return; } if (rdbLoad(server.rdb_filename,&rsi) != C_OK) { - serverLog(LL_WARNING,"Failed trying to load the MASTER synchronization DB from disk"); + serverLog(LL_WARNING, + "Failed trying to load the MASTER synchronization " + "DB from disk"); cancelReplicationHandshake(); /* Note that there's no point in restarting the AOF on sync failure, - it'll be restarted when sync succeeds or slave promoted. */ + it'll be restarted when sync succeeds or replica promoted. */ return; } + + /* Cleanup. */ zfree(server.repl_transfer_tmpfile); close(server.repl_transfer_fd); server.repl_transfer_fd = -1; server.repl_transfer_tmpfile = NULL; } + /* Final setup of the connected slave <- master link */ replicationCreateMasterClient(server.repl_transfer_s,rsi.repl_stream_db); server.repl_state = REPL_STATE_CONNECTED; server.repl_down_since = 0; + /* After a full resynchroniziation we use the replication ID and * offset of the master. The secondary ID / offset are cleared since * we are starting a new history. */ memcpy(server.replid,server.master->replid,sizeof(server.replid)); server.master_repl_offset = server.master->reploff; clearReplicationId2(); + /* Let's create the replication backlog if needed. Slaves need to * accumulate the backlog regardless of the fact they have sub-slaves * or not, in order to behave correctly if they are promoted to * masters after a failover. */ if (server.repl_backlog == NULL) createReplicationBacklog(); - serverLog(LL_NOTICE, "MASTER <-> REPLICA sync: Finished with success"); + /* Restart the AOF subsystem now that we finished the sync. This * will trigger an AOF rewrite, and when done will start appending * to the new file. */ diff --git a/src/rio.c b/src/rio.c index 993768b56..9327c17a8 100644 --- a/src/rio.c +++ b/src/rio.c @@ -173,13 +173,13 @@ static size_t rioFdRead(rio *r, void *buf, size_t len) { /* if the buffer is too small for the entire request: realloc */ if (sdslen(r->io.fd.buf) + sdsavail(r->io.fd.buf) < len) r->io.fd.buf = sdsMakeRoomFor(r->io.fd.buf, len - sdslen(r->io.fd.buf)); - + /* if the remaining unused buffer is not large enough: memmove so that we can read the rest */ if (len > avail && sdsavail(r->io.fd.buf) < len - avail) { sdsrange(r->io.fd.buf, r->io.fd.pos, -1); r->io.fd.pos = 0; } - + /* if we don't already have all the data in the sds, read more */ while (len > sdslen(r->io.fd.buf) - r->io.fd.pos) { size_t buffered = sdslen(r->io.fd.buf) - r->io.fd.pos; @@ -251,8 +251,10 @@ void rioInitWithFd(rio *r, int fd, size_t read_limit) { /* release the rio stream. * optionally returns the unread buffered data. */ -void rioFreeFd(rio *r, sds* out_remainingBufferedData) { - if(out_remainingBufferedData && (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) { +void rioFreeFd(rio *r, sds *out_remainingBufferedData) { + if (out_remainingBufferedData && + (size_t)r->io.fd.pos < sdslen(r->io.fd.buf)) + { if (r->io.fd.pos > 0) sdsrange(r->io.fd.buf, r->io.fd.pos, -1); *out_remainingBufferedData = r->io.fd.buf; @@ -264,7 +266,7 @@ void rioFreeFd(rio *r, sds* out_remainingBufferedData) { r->io.fd.buf = NULL; } -/* ------------------- File descriptors set implementation ------------------- */ +/* ------------------- File descriptors set implementation ------------------ */ /* Returns 1 or 0 for success/failure. * The function returns success as long as we are able to correctly write