mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-23 00:28:26 -05:00
Fsync directory while persisting AOF manifest, RDB file, and config file (#10737)
The current process to persist files is `write` the data, `fsync` and `rename` the file, but a underlying problem is that the rename may be lost when a sudden crash like power outage and the directory hasn't been persisted. The article [Ensuring data reaches disk](https://lwn.net/Articles/457667/) mentions a safe way to update file should be: 1. create a new temp file (on the same file system!) 2. write data to the temp file 3. fsync() the temp file 4. rename the temp file to the appropriate name 5. fsync() the containing directory This commit handles CONFIG REWRITE, AOF manifest, and RDB file (both for persistence, and the one the replica gets from the master). It doesn't handle (yet), ACL SAVE and Cluster configs, since these don't yet follow this pattern.
This commit is contained in:
parent
4c72a09b78
commit
99a425d0f3
10
src/aof.c
10
src/aof.c
@ -572,6 +572,16 @@ int writeAofManifestFile(sds buf) {
|
||||
tmp_am_name, am_name, strerror(errno));
|
||||
|
||||
ret = C_ERR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Also sync the AOF directory as new AOF files may be added in the directory */
|
||||
if (fsyncFileDir(am_filepath) == -1) {
|
||||
serverLog(LL_WARNING, "Fail to fsync AOF directory %s: %s.",
|
||||
am_filepath, strerror(errno));
|
||||
|
||||
ret = C_ERR;
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
cleanup:
|
||||
|
@ -1665,6 +1665,7 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) {
|
||||
const char *tmp_suffix = ".XXXXXX";
|
||||
size_t offset = 0;
|
||||
ssize_t written_bytes = 0;
|
||||
int old_errno;
|
||||
|
||||
int tmp_path_len = snprintf(tmp_conffile, sizeof(tmp_conffile), "%s%s", configfile, tmp_suffix);
|
||||
if (tmp_path_len <= 0 || (unsigned int)tmp_path_len >= sizeof(tmp_conffile)) {
|
||||
@ -1701,14 +1702,18 @@ int rewriteConfigOverwriteFile(char *configfile, sds content) {
|
||||
serverLog(LL_WARNING, "Could not chmod config file (%s)", strerror(errno));
|
||||
else if (rename(tmp_conffile, configfile) == -1)
|
||||
serverLog(LL_WARNING, "Could not rename tmp config file (%s)", strerror(errno));
|
||||
else if (fsyncFileDir(configfile) == -1)
|
||||
serverLog(LL_WARNING, "Could not sync config file dir (%s)", strerror(errno));
|
||||
else {
|
||||
retval = 0;
|
||||
serverLog(LL_DEBUG, "Rewritten config file (%s) successfully", configfile);
|
||||
}
|
||||
|
||||
cleanup:
|
||||
old_errno = errno;
|
||||
close(fd);
|
||||
if (retval) unlink(tmp_conffile);
|
||||
errno = old_errno;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
11
src/rdb.c
11
src/rdb.c
@ -1397,6 +1397,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
|
||||
FILE *fp = NULL;
|
||||
rio rdb;
|
||||
int error = 0;
|
||||
char *err_op; /* For a detailed log */
|
||||
|
||||
snprintf(tmpfile,256,"temp-%d.rdb", (int) getpid());
|
||||
fp = fopen(tmpfile,"w");
|
||||
@ -1420,13 +1421,14 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
|
||||
|
||||
if (rdbSaveRio(req,&rdb,&error,RDBFLAGS_NONE,rsi) == C_ERR) {
|
||||
errno = error;
|
||||
err_op = "rdbSaveRio";
|
||||
goto werr;
|
||||
}
|
||||
|
||||
/* Make sure data will not remain on the OS's output buffers */
|
||||
if (fflush(fp)) goto werr;
|
||||
if (fsync(fileno(fp))) goto werr;
|
||||
if (fclose(fp)) { fp = NULL; goto werr; }
|
||||
if (fflush(fp)) { err_op = "fflush"; goto werr; }
|
||||
if (fsync(fileno(fp))) { err_op = "fsync"; goto werr; }
|
||||
if (fclose(fp)) { fp = NULL; err_op = "fclose"; goto werr; }
|
||||
fp = NULL;
|
||||
|
||||
/* Use RENAME to make sure the DB file is changed atomically only
|
||||
@ -1445,6 +1447,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
|
||||
stopSaving(0);
|
||||
return C_ERR;
|
||||
}
|
||||
if (fsyncFileDir(filename) == -1) { err_op = "fsyncFileDir"; goto werr; }
|
||||
|
||||
serverLog(LL_NOTICE,"DB saved on disk");
|
||||
server.dirty = 0;
|
||||
@ -1454,7 +1457,7 @@ int rdbSave(int req, char *filename, rdbSaveInfo *rsi) {
|
||||
return C_OK;
|
||||
|
||||
werr:
|
||||
serverLog(LL_WARNING,"Write error saving DB on disk: %s", strerror(errno));
|
||||
serverLog(LL_WARNING,"Write error saving DB on disk(%s): %s", err_op, strerror(errno));
|
||||
if (fp) fclose(fp);
|
||||
unlink(tmpfile);
|
||||
stopSaving(0);
|
||||
|
@ -2151,6 +2151,16 @@ void readSyncBulkPayload(connection *conn) {
|
||||
/* Close old rdb asynchronously. */
|
||||
if (old_rdb_fd != -1) bioCreateCloseJob(old_rdb_fd);
|
||||
|
||||
/* Sync the directory to ensure rename is persisted */
|
||||
if (fsyncFileDir(server.rdb_filename) == -1) {
|
||||
serverLog(LL_WARNING,
|
||||
"Failed trying to sync DB directory %s in "
|
||||
"MASTER <-> REPLICA synchronization: %s",
|
||||
server.rdb_filename, strerror(errno));
|
||||
cancelReplicationHandshake(1);
|
||||
return;
|
||||
}
|
||||
|
||||
if (rdbLoad(server.rdb_filename,&rsi,RDBFLAGS_REPLICATION) != C_OK) {
|
||||
serverLog(LL_WARNING,
|
||||
"Failed trying to load the MASTER synchronization "
|
||||
|
49
src/util.c
49
src/util.c
@ -43,6 +43,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <dirent.h>
|
||||
#include <fcntl.h>
|
||||
#include <libgen.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "sha256.h"
|
||||
@ -923,6 +924,54 @@ sds makePath(char *path, char *filename) {
|
||||
return sdscatfmt(sdsempty(), "%s/%s", path, filename);
|
||||
}
|
||||
|
||||
/* Given the filename, sync the corresponding directory.
|
||||
*
|
||||
* Usually a portable and safe pattern to overwrite existing files would be like:
|
||||
* 1. create a new temp file (on the same file system!)
|
||||
* 2. write data to the temp file
|
||||
* 3. fsync() the temp file
|
||||
* 4. rename the temp file to the appropriate name
|
||||
* 5. fsync() the containing directory */
|
||||
int fsyncFileDir(const char *filename) {
|
||||
#ifdef _AIX
|
||||
/* AIX is unable to fsync a directory */
|
||||
return 0;
|
||||
#endif
|
||||
char temp_filename[PATH_MAX + 1];
|
||||
char *dname;
|
||||
int dir_fd;
|
||||
|
||||
if (strlen(filename) > PATH_MAX) {
|
||||
errno = ENAMETOOLONG;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* In the glibc implementation dirname may modify their argument. */
|
||||
memcpy(temp_filename, filename, strlen(filename) + 1);
|
||||
dname = dirname(temp_filename);
|
||||
|
||||
dir_fd = open(dname, O_RDONLY);
|
||||
if (dir_fd == -1) {
|
||||
/* Some OSs don't allow us to open directories at all, just
|
||||
* ignore the error in that case */
|
||||
if (errno == EISDIR) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
/* Some OSs don't allow us to fsync directories at all, so we can ignore
|
||||
* those errors. */
|
||||
if (redis_fsync(dir_fd) == -1 && !(errno == EBADF || errno == EINVAL)) {
|
||||
int save_errno = errno;
|
||||
close(dir_fd);
|
||||
errno = save_errno;
|
||||
return -1;
|
||||
}
|
||||
|
||||
close(dir_fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef REDIS_TEST
|
||||
#include <assert.h>
|
||||
|
||||
|
@ -85,6 +85,7 @@ int dirExists(char *dname);
|
||||
int dirRemove(char *dname);
|
||||
int fileExist(char *filename);
|
||||
sds makePath(char *path, char *filename);
|
||||
int fsyncFileDir(const char *filename);
|
||||
|
||||
#ifdef REDIS_TEST
|
||||
int utilTest(int argc, char **argv, int flags);
|
||||
|
Loading…
Reference in New Issue
Block a user