From a0e19e3cf153ffbd8b1c7e72249d22b92f71532f Mon Sep 17 00:00:00 2001 From: Wang Yuan Date: Sun, 11 Apr 2021 13:14:31 +0800 Subject: [PATCH] Fix wrong check for aof fsync and handle aof fsync errno (#8751) The bio aof fsync fd may be closed by main thread (AOFRW done handler) and even possibly reused for another socket, pipe, or file. This can can an EBADF or EINVAL fsync error, which will lead to -MISCONF errors failing all writes. We just ignore these errno because aof fsync did not really fail. We handle errno when fsyncing aof in bio, so we could know the real reason when users get -MISCONF Errors writing to the AOF file error Issue created with #8419 --- src/bio.c | 8 +++++++- src/server.c | 9 ++++++--- src/server.h | 3 ++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/bio.c b/src/bio.c index 1a3e7a602..ff18c4fe7 100644 --- a/src/bio.c +++ b/src/bio.c @@ -220,10 +220,16 @@ void *bioProcessBackgroundJobs(void *arg) { if (type == BIO_CLOSE_FILE) { close(job->fd); } else if (type == BIO_AOF_FSYNC) { - if (redis_fsync(job->fd) == -1) { + /* The fd may be closed by main thread and reused for another + * socket, pipe, or file. We just ignore these errno because + * aof fsync did not really fail. */ + if (redis_fsync(job->fd) == -1 && + errno != EBADF && errno != EINVAL) + { int last_status; atomicGet(server.aof_bio_fsync_status,last_status); atomicSet(server.aof_bio_fsync_status,C_ERR); + atomicSet(server.aof_bio_fsync_errno,errno); if (last_status == C_OK) { serverLog(LL_WARNING, "Fail to fsync the AOF file: %s",strerror(errno)); diff --git a/src/server.c b/src/server.c index e2e9c8062..b28cc24bb 100644 --- a/src/server.c +++ b/src/server.c @@ -4390,11 +4390,14 @@ int writeCommandsDeniedByDiskError(void) { { return DISK_ERROR_TYPE_RDB; } else if (server.aof_state != AOF_OFF) { + if (server.aof_last_write_status == C_ERR) { + return DISK_ERROR_TYPE_AOF; + } + /* AOF fsync error. */ int aof_bio_fsync_status; atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status); - if (server.aof_last_write_status == C_ERR || - aof_bio_fsync_status == C_ERR) - { + if (aof_bio_fsync_status == C_ERR) { + atomicGet(server.aof_bio_fsync_errno,server.aof_last_write_errno); return DISK_ERROR_TYPE_AOF; } } diff --git a/src/server.h b/src/server.h index 2739d9824..22eea0ab8 100644 --- a/src/server.h +++ b/src/server.h @@ -1359,10 +1359,11 @@ struct redisServer { int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */ int rdb_save_incremental_fsync; /* fsync incrementally while rdb saving? */ int aof_last_write_status; /* C_OK or C_ERR */ - int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */ + int aof_last_write_errno; /* Valid if aof write/fsync status is ERR */ int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */ int aof_use_rdb_preamble; /* Use RDB preamble on AOF rewrites. */ redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */ + redisAtomic int aof_bio_fsync_errno; /* Errno of AOF fsync in bio job. */ /* AOF pipes used to communicate between parent and child during rewrite. */ int aof_pipe_write_data_to_child; int aof_pipe_read_data_from_parent;