Handle remaining fsync errors (#8419)

In `aof.c`, we call fsync when stop aof, and now print a log to let user know that if fail.
In `cluster.c`, we now return error, the calling function already handles these write errors.
In `redis-cli.c`, users hope to save rdb, we now print a message if fsync failed.
In `rio.c`, we now treat fsync errors like we do for write errors. 
In `server.c`, we try to fsync aof file when shutdown redis, we only can print one log if fail.
In `bio.c`, if failing to fsync aof file, we will set `aof_bio_fsync_status` to error , and reject writing just like last writing aof error,  moreover also set INFO command field `aof_last_write_status` to error.
This commit is contained in:
Wang Yuan 2021-04-01 17:45:15 +08:00 committed by GitHub
parent 44d8b039e8
commit 1eb85249e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 53 additions and 16 deletions

View File

@ -234,9 +234,12 @@ void killAppendOnlyChild(void) {
void stopAppendOnly(void) {
serverAssert(server.aof_state != AOF_OFF);
flushAppendOnlyFile(1);
redis_fsync(server.aof_fd);
server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
if (redis_fsync(server.aof_fd) == -1) {
serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno));
} else {
server.aof_fsync_offset = server.aof_current_size;
server.aof_last_fsync = server.unixtime;
}
close(server.aof_fd);
server.aof_fd = -1;
@ -290,6 +293,15 @@ int startAppendOnly(void) {
server.aof_last_fsync = server.unixtime;
server.aof_fd = newfd;
/* If AOF fsync error in bio job, we just ignore it and log the event. */
int aof_bio_fsync_status;
atomicGet(server.aof_bio_fsync_status, aof_bio_fsync_status);
if (aof_bio_fsync_status == C_ERR) {
serverLog(LL_WARNING,
"AOF reopen, just ignore the AOF fsync error in bio job");
atomicSet(server.aof_bio_fsync_status,C_OK);
}
/* If AOF was in error state, we just ignore it and log the event. */
if (server.aof_last_write_status == C_ERR) {
serverLog(LL_WARNING,"AOF reopen, just ignore the last error.");

View File

@ -220,7 +220,17 @@ void *bioProcessBackgroundJobs(void *arg) {
if (type == BIO_CLOSE_FILE) {
close(job->fd);
} else if (type == BIO_AOF_FSYNC) {
redis_fsync(job->fd);
if (redis_fsync(job->fd) == -1) {
int last_status;
atomicGet(server.aof_bio_fsync_status,last_status);
atomicSet(server.aof_bio_fsync_status,C_ERR);
if (last_status == C_OK) {
serverLog(LL_WARNING,
"Fail to fsync the AOF file: %s",strerror(errno));
}
} else {
atomicSet(server.aof_bio_fsync_status,C_OK);
}
} else if (type == BIO_LAZY_FREE) {
job->free_fn(job->free_args);
} else {

View File

@ -358,7 +358,7 @@ int clusterSaveConfig(int do_fsync) {
if (write(fd,ci,sdslen(ci)) != (ssize_t)sdslen(ci)) goto err;
if (do_fsync) {
server.cluster->todo_before_sleep &= ~CLUSTER_TODO_FSYNC_CONFIG;
fsync(fd);
if (fsync(fd) == -1) goto err;
}
/* Truncate the file if needed to remove the final \n padding that

View File

@ -7202,7 +7202,10 @@ static void getRDB(clusterManagerNode *node) {
redisFree(s); /* Close the connection ASAP as fsync() may take time. */
if (node)
node->context = NULL;
fsync(fd);
if (fsync(fd) == -1) {
fprintf(stderr,"Fail to fsync '%s': %s\n", filename, strerror(errno));
exit(1);
}
close(fd);
if (node) {
sdsfree(filename);

View File

@ -117,7 +117,7 @@ static size_t rioFileWrite(rio *r, const void *buf, size_t len) {
r->io.file.buffered >= r->io.file.autosync)
{
fflush(r->io.file.fp);
redis_fsync(fileno(r->io.file.fp));
if (redis_fsync(fileno(r->io.file.fp)) == -1) return 0;
r->io.file.buffered = 0;
}
return retval;

View File

@ -2655,6 +2655,7 @@ void initServerConfig(void) {
server.aof_rewrite_scheduled = 0;
server.aof_flush_sleep = 0;
server.aof_last_fsync = time(NULL);
atomicSet(server.aof_bio_fsync_status,C_OK);
server.aof_rewrite_time_last = -1;
server.aof_rewrite_time_start = -1;
server.aof_lastbgrewrite_status = C_OK;
@ -4322,7 +4323,10 @@ int prepareForShutdown(int flags) {
/* Append only file: flush buffers and fsync() the AOF at exit */
serverLog(LL_NOTICE,"Calling fsync() on the AOF file.");
flushAppendOnlyFile(1);
redis_fsync(server.aof_fd);
if (redis_fsync(server.aof_fd) == -1) {
serverLog(LL_WARNING,"Fail to fsync the AOF file: %s.",
strerror(errno));
}
}
/* Create a new RDB file before exiting. */
@ -4385,13 +4389,17 @@ int writeCommandsDeniedByDiskError(void) {
server.lastbgsave_status == C_ERR)
{
return DISK_ERROR_TYPE_RDB;
} else if (server.aof_state != AOF_OFF &&
server.aof_last_write_status == C_ERR)
{
return DISK_ERROR_TYPE_AOF;
} else {
return DISK_ERROR_TYPE_NONE;
} else if (server.aof_state != AOF_OFF) {
int aof_bio_fsync_status;
atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
if (server.aof_last_write_status == C_ERR ||
aof_bio_fsync_status == C_ERR)
{
return DISK_ERROR_TYPE_AOF;
}
}
return DISK_ERROR_TYPE_NONE;
}
/* The PING command. It works in a different way if the client is in
@ -4851,7 +4859,9 @@ sds genRedisInfoString(const char *section) {
} else if (server.stat_current_save_keys_total) {
fork_perc = ((double)server.stat_current_save_keys_processed / server.stat_current_save_keys_total) * 100;
}
int aof_bio_fsync_status;
atomicGet(server.aof_bio_fsync_status,aof_bio_fsync_status);
info = sdscatprintf(info,
"# Persistence\r\n"
"loading:%d\r\n"
@ -4898,7 +4908,8 @@ sds genRedisInfoString(const char *section) {
(intmax_t)((server.child_type != CHILD_TYPE_AOF) ?
-1 : time(NULL)-server.aof_rewrite_time_start),
(server.aof_lastbgrewrite_status == C_OK) ? "ok" : "err",
(server.aof_last_write_status == C_OK) ? "ok" : "err",
(server.aof_last_write_status == C_OK &&
aof_bio_fsync_status == C_OK) ? "ok" : "err",
server.stat_aof_cow_bytes,
server.child_type == CHILD_TYPE_MODULE,
server.stat_module_cow_bytes);

View File

@ -1362,6 +1362,7 @@ struct redisServer {
int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
int aof_use_rdb_preamble; /* Use RDB preamble on AOF rewrites. */
redisAtomic int aof_bio_fsync_status; /* Status of AOF fsync in bio job. */
/* AOF pipes used to communicate between parent and child during rewrite. */
int aof_pipe_write_data_to_child;
int aof_pipe_read_data_from_parent;