From 970de3e9c0f4e4b0ac56d1710e3b66f4a0cd06a5 Mon Sep 17 00:00:00 2001 From: antirez Date: Wed, 5 Feb 2014 16:38:10 +0100 Subject: [PATCH] Check for EAGAIN in sendBulkToSlave(). Sometime an osx master with a Linux server over a slow link caused a strange error where osx called the writable function for the socket but actually apparently there was no room in the socket buffer to accept the write: write(2) call returned an EAGAIN error, that was not checked, so we considered write(2) == 0 always as a connection reset, which was unfortunate since the bulk transfer has to start again. Also more errors are logged with the WARNING level in the same code path now. --- src/replication.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/replication.c b/src/replication.c index 113efbcb1..9c1e1f01a 100644 --- a/src/replication.c +++ b/src/replication.c @@ -613,9 +613,11 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) { return; } if ((nwritten = write(fd,buf,buflen)) == -1) { - redisLog(REDIS_VERBOSE,"Write error sending DB to slave: %s", - strerror(errno)); - freeClient(slave); + if (errno != EAGAIN) { + redisLog(REDIS_WARNING,"Write error sending DB to slave: %s", + strerror(errno)); + freeClient(slave); + } return; } slave->repldboff += nwritten; @@ -627,6 +629,7 @@ void sendBulkToSlave(aeEventLoop *el, int fd, void *privdata, int mask) { slave->repl_ack_time = server.unixtime; if (aeCreateFileEvent(server.el, slave->fd, AE_WRITABLE, sendReplyToClient, slave) == AE_ERR) { + redisLog(REDIS_WARNING,"Unable to register writable event for slave bulk transfer: %s", strerror(errno)); freeClient(slave); return; }