From ccf8a651f363f62c415e352bc4072e3756e0f197 Mon Sep 17 00:00:00 2001 From: menwen Date: Thu, 4 Nov 2021 15:09:28 +0800 Subject: [PATCH] Retry when a blocked connection system call is interrupted by a signal (#9629) When repl-diskless-load is enabled, the connection is set to the blocking state. The connection may be interrupted by a signal during a system call. This would have resulted in a disconnection and possibly a reconnection loop. Co-authored-by: Oran Agra --- src/connection.h | 12 +++++++++- src/rio.c | 2 ++ tests/integration/replication.tcl | 38 +++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/src/connection.h b/src/connection.h index 60f1f70cb..ac2ebc16c 100644 --- a/src/connection.h +++ b/src/connection.h @@ -31,6 +31,8 @@ #ifndef __REDIS_CONNECTION_H #define __REDIS_CONNECTION_H +#include + #define CONN_INFO_LEN 32 struct aeEventLoop; @@ -149,7 +151,11 @@ static inline int connWrite(connection *conn, const void *data, size_t data_len) * connGetState() to see if the connection state is still CONN_STATE_CONNECTED. */ static inline int connRead(connection *conn, void *buf, size_t buf_len) { - return conn->type->read(conn, buf, buf_len); + int ret = conn->type->read(conn, buf, buf_len); + if (ret == -1 && conn->last_errno == EINTR) { + conn->state = CONN_STATE_CONNECTED; + } + return ret; } /* Register a write handler, to be called when the connection is writable. @@ -203,6 +209,10 @@ static inline int connGetType(connection *conn) { return conn->type->get_type(conn); } +static inline int connLastErrorRetryable(connection *conn) { + return conn->last_errno == EINTR; +} + connection *connCreateSocket(); connection *connCreateAcceptedSocket(int fd); diff --git a/src/rio.c b/src/rio.c index 70817347b..caeaaf3db 100644 --- a/src/rio.c +++ b/src/rio.c @@ -245,6 +245,7 @@ static size_t rioConnRead(rio *r, void *buf, size_t len) { (char*)r->io.conn.buf + sdslen(r->io.conn.buf), toread); if (retval <= 0) { + if (connLastErrorRetryable(r->io.conn.conn)) continue; if (errno == EWOULDBLOCK) errno = ETIMEDOUT; return 0; } @@ -352,6 +353,7 @@ static size_t rioFdWrite(rio *r, const void *buf, size_t len) { while(nwritten != len) { retval = write(r->io.fd.fd,p+nwritten,len-nwritten); if (retval <= 0) { + if (retval == -1 && errno == EINTR) continue; /* With blocking io, which is the sole user of this * rio target, EWOULDBLOCK is returned only because of * the SO_SNDTIMEO socket option, so we translate the error diff --git a/tests/integration/replication.tcl b/tests/integration/replication.tcl index 804d83a1d..13206d634 100644 --- a/tests/integration/replication.tcl +++ b/tests/integration/replication.tcl @@ -996,3 +996,41 @@ start_server {tags {"repl external:skip"}} { } } } + +test {replica can handle EINTR if use diskless load} { + start_server {tags {"repl"}} { + set replica [srv 0 client] + set replica_log [srv 0 stdout] + start_server {} { + set master [srv 0 client] + set master_host [srv 0 host] + set master_port [srv 0 port] + + $master debug populate 100 master 100000 + $master config set rdbcompression no + $master config set repl-diskless-sync yes + $master config set repl-diskless-sync-delay 0 + $replica config set repl-diskless-load on-empty-db + # Construct EINTR error by using the built in watchdog + $replica config set watchdog-period 200 + # Block replica in read() + $master config set rdb-key-save-delay 10000 + # set speedy shutdown + $master config set save "" + # Start the replication process... + $replica replicaof $master_host $master_port + + # Wait for the replica to start reading the rdb + set res [wait_for_log_messages -1 {"*Loading DB in memory*"} 0 200 10] + set loglines [lindex $res 1] + + # Wait till we see the watchgod log line AFTER the loading started + wait_for_log_messages -1 {"*WATCHDOG TIMER EXPIRED*"} $loglines 200 10 + + # Make sure we're still loading, and that there was just one full sync attempt + assert ![log_file_matches [srv -1 stdout] "*Reconnecting to MASTER*"] + assert_equal 1 [s 0 sync_full] + assert_equal 1 [s -1 loading] + } + } +} {} {external:skip}