mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 08:08:53 -05:00
AOF_FSYNC_EVERYSEC higher resolution, change aof_last_fsync and aof_flush_postponed_start to use mstime (#13041)
Currently aof_last_fsync is using a low resolution unixtime is really bad, it checks if the absolute number of (full) seconds changed by one. depending on which side of the second barrier it falls, we can get very different results. This PR change the resolution to use milliseconds instead of complete seconds. In cases where the event loop cycle duration is short and their rapid (e.g. running many fast commands with short pipeline, or a high `hz` config), this change will not make much difference, since in anyway, we'll be quick to detect that we're on a "new second", and it's likely that these fsync will always be executed close to the second switch barrier. But in cases of rare or slow event loops cycles (e.g. either slow commands, or very low rate of traffic to redis, and low `hz`), it could easily be that with the old code, in some cases we'll have over 1.5 seconds between fsyncs, and in others less than 0.5. see discussion in #8612 This PR also handle aof_flush_postponed_start as well, the damage there is smaller since the threshold is 2 seconds, and not 1. --------- Co-authored-by: Oran Agra <oran@redislabs.com>
This commit is contained in:
parent
dd92dd8fb5
commit
9103ccc398
18
src/aof.c
18
src/aof.c
@ -833,7 +833,7 @@ int openNewIncrAofForAppend(void) {
|
||||
* is already synced at this point so fsync doesn't matter. */
|
||||
if (server.aof_fd != -1) {
|
||||
aof_background_fsync_and_close(server.aof_fd);
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
server.aof_last_fsync = server.mstime;
|
||||
}
|
||||
server.aof_fd = newfd;
|
||||
|
||||
@ -954,7 +954,7 @@ void stopAppendOnly(void) {
|
||||
if (redis_fsync(server.aof_fd) == -1) {
|
||||
serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno));
|
||||
} else {
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
server.aof_last_fsync = server.mstime;
|
||||
}
|
||||
close(server.aof_fd);
|
||||
|
||||
@ -998,7 +998,7 @@ int startAppendOnly(void) {
|
||||
return C_ERR;
|
||||
}
|
||||
}
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
server.aof_last_fsync = server.mstime;
|
||||
/* If AOF fsync error in bio job, we just ignore it and log the event. */
|
||||
int aof_bio_fsync_status;
|
||||
atomicGet(server.aof_bio_fsync_status, aof_bio_fsync_status);
|
||||
@ -1074,7 +1074,7 @@ void flushAppendOnlyFile(int force) {
|
||||
* the data in page cache cannot be flushed in time. */
|
||||
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
|
||||
server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
|
||||
server.unixtime > server.aof_last_fsync &&
|
||||
server.mstime - server.aof_last_fsync >= 1000 &&
|
||||
!(sync_in_progress = aofFsyncInProgress())) {
|
||||
goto try_fsync;
|
||||
|
||||
@ -1109,9 +1109,9 @@ void flushAppendOnlyFile(int force) {
|
||||
if (server.aof_flush_postponed_start == 0) {
|
||||
/* No previous write postponing, remember that we are
|
||||
* postponing the flush and return. */
|
||||
server.aof_flush_postponed_start = server.unixtime;
|
||||
server.aof_flush_postponed_start = server.mstime;
|
||||
return;
|
||||
} else if (server.unixtime - server.aof_flush_postponed_start < 2) {
|
||||
} else if (server.mstime - server.aof_flush_postponed_start < 2000) {
|
||||
/* We were already waiting for fsync to finish, but for less
|
||||
* than two seconds this is still ok. Postpone again. */
|
||||
return;
|
||||
@ -1260,15 +1260,15 @@ try_fsync:
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("aof-fsync-always",latency);
|
||||
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
server.aof_last_fsync = server.mstime;
|
||||
atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
|
||||
} else if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
|
||||
server.unixtime > server.aof_last_fsync) {
|
||||
server.mstime - server.aof_last_fsync >= 1000) {
|
||||
if (!sync_in_progress) {
|
||||
aof_background_fsync(server.aof_fd);
|
||||
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
|
||||
}
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
server.aof_last_fsync = server.mstime;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2056,7 +2056,7 @@ void initServerConfig(void) {
|
||||
server.aof_rewrite_base_size = 0;
|
||||
server.aof_rewrite_scheduled = 0;
|
||||
server.aof_flush_sleep = 0;
|
||||
server.aof_last_fsync = time(NULL);
|
||||
server.aof_last_fsync = time(NULL) * 1000;
|
||||
server.aof_cur_timestamp = 0;
|
||||
atomicSet(server.aof_bio_fsync_status,C_OK);
|
||||
server.aof_rewrite_time_last = -1;
|
||||
|
@ -1779,8 +1779,8 @@ struct redisServer {
|
||||
sds aof_buf; /* AOF buffer, written before entering the event loop */
|
||||
int aof_fd; /* File descriptor of currently selected AOF file */
|
||||
int aof_selected_db; /* Currently selected DB in AOF */
|
||||
time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
|
||||
time_t aof_last_fsync; /* UNIX time of last fsync() */
|
||||
mstime_t aof_flush_postponed_start; /* mstime of postponed AOF flush */
|
||||
mstime_t aof_last_fsync; /* mstime of last fsync() */
|
||||
time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
|
||||
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
|
||||
time_t aof_cur_timestamp; /* Current record timestamp in AOF */
|
||||
|
Loading…
Reference in New Issue
Block a user