Fsync directory while persisting AOF manifest, RDB file, and config file (#10737)

The current process to persist files is `write` the data, `fsync` and `rename` the file,
but a underlying problem is that the rename may be lost when a sudden crash like
power outage and the directory hasn't been persisted.

The article [Ensuring data reaches disk](https://lwn.net/Articles/457667/) mentions
a safe way to update file should be:

1. create a new temp file (on the same file system!)
2. write data to the temp file
3. fsync() the temp file
4. rename the temp file to the appropriate name
5. fsync() the containing directory

This commit handles CONFIG REWRITE, AOF manifest, and RDB file (both for persistence,
and the one the replica gets from the master).
It doesn't handle (yet), ACL SAVE and Cluster configs, since these don't yet follow this pattern.
This commit is contained in:
Tian 2022-06-21 00:17:23 +08:00 committed by GitHub
parent 4c72a09b78
commit 99a425d0f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 82 additions and 4 deletions

View File

@ -572,6 +572,16 @@ int writeAofManifestFile(sds buf) {
tmp_am_name, am_name, strerror(errno));
ret = C_ERR;
goto cleanup;
}
/* Also sync the AOF directory as new AOF files may be added in the directory */
if (fsyncFileDir(am_filepath) == -1) {
serverLog(LL_WARNING, "Fail to fsync AOF directory %s: %s.",
am_filepath, strerror(errno));
ret = C_ERR;
goto cleanup;
}
cleanup:

View File

@ -1665,6 +1665,7 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) {
const char *tmp_suffix = ".XXXXXX";
size_t offset = 0;
ssize_t written_bytes = 0;
int old_errno;
int tmp_path_len = snprintf(tmp_conffile, sizeof(tmp_conffile), "%s%s", configfile, tmp_suffix);
if (tmp_path_len <= 0 || (unsigned int)tmp_path_len >= sizeof(tmp_conffile)) {
@ -1701,14 +1702,18 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) {
serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno));
else if (rename(tmp_conffile, configfile) == -1)
serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno));
else if (fsyncFileDir(configfile) == -1)
serverLog(LL_WARNING, "Could not sync config file dir (%s)", strerror(errno));
else {
retval = 0;
serverLog(LL_DEBUG, "Rewritten config file (%s) successfully", configfile);
}
cleanup:
old_errno = errno;
close(fd);
if (retval) unlink(tmp_conffile);
errno = old_errno;
return retval;
}

View File

@ -1397,6 +1397,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
FILE *fp = NULL;
rio rdb;
int error = 0;
char *err_op; /* For a detailed log */
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
fp = fopen(tmpfile,"w");
@ -1420,13 +1421,14 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
if (rdbSaveRio(req,&rdb,&error,RDBFLAGS_NONE,rsi) == C_ERR) {
errno = error;
err_op = "rdbSaveRio";
goto werr;
}
/* Make sure data will not remain on the OS's output buffers */
if (fflush(fp)) goto werr;
if (fsync(fileno(fp))) goto werr;
if (fclose(fp)) { fp = NULL; goto werr; }
if (fflush(fp)) { err_op = "fflush"; goto werr; }
if (fsync(fileno(fp))) { err_op = "fsync"; goto werr; }
if (fclose(fp)) { fp = NULL; err_op = "fclose"; goto werr; }
fp = NULL;
/* Use RENAME to make sure the DB file is changed atomically only
@ -1445,6 +1447,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
stopSaving(0);
return C_ERR;
}
if (fsyncFileDir(filename) == -1) { err_op = "fsyncFileDir"; goto werr; }
serverLog(LL_NOTICE,"DB saved on disk");
server.dirty = 0;
@ -1454,7 +1457,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
return C_OK;
werr:
serverLog(LL_WARNING,"Write error saving DB on disk: %s", strerror(errno));
serverLog(LL_WARNING,"Write error saving DB on disk(%s): %s", err_op, strerror(errno));
if (fp) fclose(fp);
unlink(tmpfile);
stopSaving(0);

View File

@ -2151,6 +2151,16 @@ void readSyncBulkPayload(connection *conn) {
/* Close old rdb asynchronously. */
if (old_rdb_fd != -1) bioCreateCloseJob(old_rdb_fd);
/* Sync the directory to ensure rename is persisted */
if (fsyncFileDir(server.rdb_filename) == -1) {
serverLog(LL_WARNING,
"Failed trying to sync DB directory %s in "
"MASTER <-> REPLICA synchronization: %s",
server.rdb_filename, strerror(errno));
cancelReplicationHandshake(1);
return;
}
if (rdbLoad(server.rdb_filename,&rsi,RDBFLAGS_REPLICATION) != C_OK) {
serverLog(LL_WARNING,
"Failed trying to load the MASTER synchronization "

View File

@ -43,6 +43,7 @@
#include <sys/stat.h>
#include <dirent.h>
#include <fcntl.h>
#include <libgen.h>
#include "util.h"
#include "sha256.h"
@ -923,6 +924,54 @@ sds makePath(char *path, char *filename) {
return sdscatfmt(sdsempty(), "%s/%s", path, filename);
}
/* Given the filename, sync the corresponding directory.
*
* Usually a portable and safe pattern to overwrite existing files would be like:
* 1. create a new temp file (on the same file system!)
* 2. write data to the temp file
* 3. fsync() the temp file
* 4. rename the temp file to the appropriate name
* 5. fsync() the containing directory */
int fsyncFileDir(const char *filename) {
#ifdef _AIX
/* AIX is unable to fsync a directory */
return 0;
#endif
char temp_filename[PATH_MAX + 1];
char *dname;
int dir_fd;
if (strlen(filename) > PATH_MAX) {
errno = ENAMETOOLONG;
return -1;
}
/* In the glibc implementation dirname may modify their argument. */
memcpy(temp_filename, filename, strlen(filename) + 1);
dname = dirname(temp_filename);
dir_fd = open(dname, O_RDONLY);
if (dir_fd == -1) {
/* Some OSs don't allow us to open directories at all, just
* ignore the error in that case */
if (errno == EISDIR) {
return 0;
}
return -1;
}
/* Some OSs don't allow us to fsync directories at all, so we can ignore
* those errors. */
if (redis_fsync(dir_fd) == -1 && !(errno == EBADF || errno == EINVAL)) {
int save_errno = errno;
close(dir_fd);
errno = save_errno;
return -1;
}
close(dir_fd);
return 0;
}
#ifdef REDIS_TEST
#include <assert.h>

View File

@ -85,6 +85,7 @@ int dirExists(char *dname);
int dirRemove(char *dname);
int fileExist(char *filename);
sds makePath(char *path, char *filename);
int fsyncFileDir(const char *filename);
#ifdef REDIS_TEST
int utilTest(int argc, char **argv, int flags);