Retry when a blocked connection system call is interrupted by a signal (#9629)

When repl-diskless-load is enabled, the connection is set to the blocking state.
The connection may be interrupted by a signal during a system call.
This would have resulted in a disconnection and possibly a reconnection loop.

Co-authored-by: Oran Agra <oran@redislabs.com>
This commit is contained in:
menwen 2021-11-04 15:09:28 +08:00 committed by GitHub
parent d04f306931
commit ccf8a651f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 51 additions and 1 deletions

View File

@ -31,6 +31,8 @@
#ifndef __REDIS_CONNECTION_H #ifndef __REDIS_CONNECTION_H
#define __REDIS_CONNECTION_H #define __REDIS_CONNECTION_H
#include <sys/errno.h>
#define CONN_INFO_LEN 32 #define CONN_INFO_LEN 32
struct aeEventLoop; struct aeEventLoop;
@ -149,7 +151,11 @@ static inline int connWrite(connection *conn, const void *data, size_t data_len)
* connGetState() to see if the connection state is still CONN_STATE_CONNECTED. * connGetState() to see if the connection state is still CONN_STATE_CONNECTED.
*/ */
static inline int connRead(connection *conn, void *buf, size_t buf_len) { static inline int connRead(connection *conn, void *buf, size_t buf_len) {
return conn->type->read(conn, buf, buf_len); int ret = conn->type->read(conn, buf, buf_len);
if (ret == -1 && conn->last_errno == EINTR) {
conn->state = CONN_STATE_CONNECTED;
}
return ret;
} }
/* Register a write handler, to be called when the connection is writable. /* Register a write handler, to be called when the connection is writable.
@ -203,6 +209,10 @@ static inline int connGetType(connection *conn) {
return conn->type->get_type(conn); return conn->type->get_type(conn);
} }
static inline int connLastErrorRetryable(connection *conn) {
return conn->last_errno == EINTR;
}
connection *connCreateSocket(); connection *connCreateSocket();
connection *connCreateAcceptedSocket(int fd); connection *connCreateAcceptedSocket(int fd);

View File

@ -245,6 +245,7 @@ static size_t rioConnRead(rio *r, void *buf, size_t len) {
(char*)r->io.conn.buf + sdslen(r->io.conn.buf), (char*)r->io.conn.buf + sdslen(r->io.conn.buf),
toread); toread);
if (retval <= 0) { if (retval <= 0) {
if (connLastErrorRetryable(r->io.conn.conn)) continue;
if (errno == EWOULDBLOCK) errno = ETIMEDOUT; if (errno == EWOULDBLOCK) errno = ETIMEDOUT;
return 0; return 0;
} }
@ -352,6 +353,7 @@ static size_t rioFdWrite(rio *r, const void *buf, size_t len) {
while(nwritten != len) { while(nwritten != len) {
retval = write(r->io.fd.fd,p+nwritten,len-nwritten); retval = write(r->io.fd.fd,p+nwritten,len-nwritten);
if (retval <= 0) { if (retval <= 0) {
if (retval == -1 && errno == EINTR) continue;
/* With blocking io, which is the sole user of this /* With blocking io, which is the sole user of this
* rio target, EWOULDBLOCK is returned only because of * rio target, EWOULDBLOCK is returned only because of
* the SO_SNDTIMEO socket option, so we translate the error * the SO_SNDTIMEO socket option, so we translate the error

View File

@ -996,3 +996,41 @@ start_server {tags {"repl external:skip"}} {
} }
} }
} }
test {replica can handle EINTR if use diskless load} {
start_server {tags {"repl"}} {
set replica [srv 0 client]
set replica_log [srv 0 stdout]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master debug populate 100 master 100000
$master config set rdbcompression no
$master config set repl-diskless-sync yes
$master config set repl-diskless-sync-delay 0
$replica config set repl-diskless-load on-empty-db
# Construct EINTR error by using the built in watchdog
$replica config set watchdog-period 200
# Block replica in read()
$master config set rdb-key-save-delay 10000
# set speedy shutdown
$master config set save ""
# Start the replication process...
$replica replicaof $master_host $master_port
# Wait for the replica to start reading the rdb
set res [wait_for_log_messages -1 {"*Loading DB in memory*"} 0 200 10]
set loglines [lindex $res 1]
# Wait till we see the watchgod log line AFTER the loading started
wait_for_log_messages -1 {"*WATCHDOG TIMER EXPIRED*"} $loglines 200 10
# Make sure we're still loading, and that there was just one full sync attempt
assert ![log_file_matches [srv -1 stdout] "*Reconnecting to MASTER*"]
assert_equal 1 [s 0 sync_full]
assert_equal 1 [s -1 loading]
}
}
} {} {external:skip}