From 99a425d0f3b7b00896cb855d5de4ae93be1fe3f0 Mon Sep 17 00:00:00 2001 From: Tian Date: Tue, 21 Jun 2022 00:17:23 +0800 Subject: [PATCH] Fsync directory while persisting AOF manifest, RDB file, and config file (#10737) The current process to persist files is `write` the data, `fsync` and `rename` the file, but a underlying problem is that the rename may be lost when a sudden crash like power outage and the directory hasn't been persisted. The article [Ensuring data reaches disk](https://lwn.net/Articles/457667/) mentions a safe way to update file should be: 1. create a new temp file (on the same file system!) 2. write data to the temp file 3. fsync() the temp file 4. rename the temp file to the appropriate name 5. fsync() the containing directory This commit handles CONFIG REWRITE, AOF manifest, and RDB file (both for persistence, and the one the replica gets from the master). It doesn't handle (yet), ACL SAVE and Cluster configs, since these don't yet follow this pattern. --- src/aof.c | 10 ++++++++++ src/config.c | 5 +++++ src/rdb.c | 11 +++++++---- src/replication.c | 10 ++++++++++ src/util.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ src/util.h | 1 + 6 files changed, 82 insertions(+), 4 deletions(-) diff --git a/src/aof.c b/src/aof.c index 823759f24..55d9990bc 100644 --- a/src/aof.c +++ b/src/aof.c @@ -572,6 +572,16 @@ int writeAofManifestFile(sds buf) { tmp_am_name, am_name, strerror(errno)); ret = C_ERR; + goto cleanup; + } + + /* Also sync the AOF directory as new AOF files may be added in the directory */ + if (fsyncFileDir(am_filepath) == -1) { + serverLog(LL_WARNING, "Fail to fsync AOF directory %s: %s.", + am_filepath, strerror(errno)); + + ret = C_ERR; + goto cleanup; } cleanup: diff --git a/src/config.c b/src/config.c index c714b1994..115446c67 100644 --- a/src/config.c +++ b/src/config.c @@ -1665,6 +1665,7 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) { const char *tmp_suffix = ".XXXXXX"; size_t offset = 0; ssize_t written_bytes = 0; + int old_errno; int tmp_path_len = snprintf(tmp_conffile, sizeof(tmp_conffile), "%s%s", configfile, tmp_suffix); if (tmp_path_len <= 0 || (unsigned int)tmp_path_len >= sizeof(tmp_conffile)) { @@ -1701,14 +1702,18 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) { serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno)); else if (rename(tmp_conffile, configfile) == -1) serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno)); + else if (fsyncFileDir(configfile) == -1) + serverLog(LL_WARNING, "Could not sync config file dir (%s)", strerror(errno)); else { retval = 0; serverLog(LL_DEBUG, "Rewritten config file (%s) successfully", configfile); } cleanup: + old_errno = errno; close(fd); if (retval) unlink(tmp_conffile); + errno = old_errno; return retval; } diff --git a/src/rdb.c b/src/rdb.c index 141677c30..ddbdb8b1b 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -1397,6 +1397,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) { FILE *fp = NULL; rio rdb; int error = 0; + char *err_op; /* For a detailed log */ snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid()); fp = fopen(tmpfile,"w"); @@ -1420,13 +1421,14 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) { if (rdbSaveRio(req,&rdb,&error,RDBFLAGS_NONE,rsi) == C_ERR) { errno = error; + err_op = "rdbSaveRio"; goto werr; } /* Make sure data will not remain on the OS's output buffers */ - if (fflush(fp)) goto werr; - if (fsync(fileno(fp))) goto werr; - if (fclose(fp)) { fp = NULL; goto werr; } + if (fflush(fp)) { err_op = "fflush"; goto werr; } + if (fsync(fileno(fp))) { err_op = "fsync"; goto werr; } + if (fclose(fp)) { fp = NULL; err_op = "fclose"; goto werr; } fp = NULL; /* Use RENAME to make sure the DB file is changed atomically only @@ -1445,6 +1447,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) { stopSaving(0); return C_ERR; } + if (fsyncFileDir(filename) == -1) { err_op = "fsyncFileDir"; goto werr; } serverLog(LL_NOTICE,"DB saved on disk"); server.dirty = 0; @@ -1454,7 +1457,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) { return C_OK; werr: - serverLog(LL_WARNING,"Write error saving DB on disk: %s", strerror(errno)); + serverLog(LL_WARNING,"Write error saving DB on disk(%s): %s", err_op, strerror(errno)); if (fp) fclose(fp); unlink(tmpfile); stopSaving(0); diff --git a/src/replication.c b/src/replication.c index 8a40a8197..b929b0460 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2151,6 +2151,16 @@ void readSyncBulkPayload(connection *conn) { /* Close old rdb asynchronously. */ if (old_rdb_fd != -1) bioCreateCloseJob(old_rdb_fd); + /* Sync the directory to ensure rename is persisted */ + if (fsyncFileDir(server.rdb_filename) == -1) { + serverLog(LL_WARNING, + "Failed trying to sync DB directory %s in " + "MASTER <-> REPLICA synchronization: %s", + server.rdb_filename, strerror(errno)); + cancelReplicationHandshake(1); + return; + } + if (rdbLoad(server.rdb_filename,&rsi,RDBFLAGS_REPLICATION) != C_OK) { serverLog(LL_WARNING, "Failed trying to load the MASTER synchronization " diff --git a/src/util.c b/src/util.c index 1e083fc54..4b534e9a6 100644 --- a/src/util.c +++ b/src/util.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "util.h" #include "sha256.h" @@ -923,6 +924,54 @@ sds makePath(char *path, char *filename) { return sdscatfmt(sdsempty(), "%s/%s", path, filename); } +/* Given the filename, sync the corresponding directory. + * + * Usually a portable and safe pattern to overwrite existing files would be like: + * 1. create a new temp file (on the same file system!) + * 2. write data to the temp file + * 3. fsync() the temp file + * 4. rename the temp file to the appropriate name + * 5. fsync() the containing directory */ +int fsyncFileDir(const char *filename) { +#ifdef _AIX + /* AIX is unable to fsync a directory */ + return 0; +#endif + char temp_filename[PATH_MAX + 1]; + char *dname; + int dir_fd; + + if (strlen(filename) > PATH_MAX) { + errno = ENAMETOOLONG; + return -1; + } + + /* In the glibc implementation dirname may modify their argument. */ + memcpy(temp_filename, filename, strlen(filename) + 1); + dname = dirname(temp_filename); + + dir_fd = open(dname, O_RDONLY); + if (dir_fd == -1) { + /* Some OSs don't allow us to open directories at all, just + * ignore the error in that case */ + if (errno == EISDIR) { + return 0; + } + return -1; + } + /* Some OSs don't allow us to fsync directories at all, so we can ignore + * those errors. */ + if (redis_fsync(dir_fd) == -1 && !(errno == EBADF || errno == EINVAL)) { + int save_errno = errno; + close(dir_fd); + errno = save_errno; + return -1; + } + + close(dir_fd); + return 0; +} + #ifdef REDIS_TEST #include diff --git a/src/util.h b/src/util.h index 0515f1a83..3cf6c3e98 100644 --- a/src/util.h +++ b/src/util.h @@ -85,6 +85,7 @@ int dirExists(char *dname); int dirRemove(char *dname); int fileExist(char *filename); sds makePath(char *path, char *filename); +int fsyncFileDir(const char *filename); #ifdef REDIS_TEST int utilTest(int argc, char **argv, int flags);