AOF_FSYNC_EVERYSEC higher resolution, change aof_last_fsync and aof_flush_postponed_start to use mstime (#13041)

Currently aof_last_fsync is using a low resolution unixtime is really
bad,
it checks if the absolute number of (full) seconds changed by one.
depending on which side of the second barrier it falls, we can get very
different results.

This PR change the resolution to use milliseconds instead of complete
seconds.

In cases where the event loop cycle duration is short and their rapid
(e.g. running
many fast commands with short pipeline, or a high `hz` config), this
change will not
make much difference, since in anyway, we'll be quick to detect that
we're on a "new
second", and it's likely that these fsync will always be executed close
to the second
switch barrier.

But in cases of rare or slow event loops cycles (e.g. either slow
commands, or very
low rate of traffic to redis, and low `hz`), it could easily be that
with the old code,
in some cases we'll have over 1.5 seconds between fsyncs, and in others
less than 0.5.

see discussion in #8612

This PR also handle aof_flush_postponed_start as well, the damage there
is smaller
since the threshold is 2 seconds, and not 1.

---------

Co-authored-by: Oran Agra <oran@redislabs.com>
This commit is contained in:
Binbin 2024-02-18 18:08:29 +08:00 committed by GitHub
parent dd92dd8fb5
commit 9103ccc398
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 12 additions and 12 deletions

View File

@ -833,7 +833,7 @@ int openNewIncrAofForAppend(void) {
* is already synced at this point so fsync doesn't matter. */
if (server.aof_fd != -1) {
aof_background_fsync_and_close(server.aof_fd);
server.aof_last_fsync = server.unixtime;
server.aof_last_fsync = server.mstime;
}
server.aof_fd = newfd;
@ -954,7 +954,7 @@ void stopAppendOnly(void) {
if (redis_fsync(server.aof_fd) == -1) {
serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno));
} else {
server.aof_last_fsync = server.unixtime;
server.aof_last_fsync = server.mstime;
}
close(server.aof_fd);
@ -998,7 +998,7 @@ int startAppendOnly(void) {
return C_ERR;
}
}
server.aof_last_fsync = server.unixtime;
server.aof_last_fsync = server.mstime;
/* If AOF fsync error in bio job, we just ignore it and log the event. */
int aof_bio_fsync_status;
atomicGet(server.aof_bio_fsync_status, aof_bio_fsync_status);
@ -1074,7 +1074,7 @@ void flushAppendOnlyFile(int force) {
* the data in page cache cannot be flushed in time. */
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
server.unixtime > server.aof_last_fsync &&
server.mstime - server.aof_last_fsync >= 1000 &&
!(sync_in_progress = aofFsyncInProgress())) {
goto try_fsync;
@ -1109,9 +1109,9 @@ void flushAppendOnlyFile(int force) {
if (server.aof_flush_postponed_start == 0) {
/* No previous write postponing, remember that we are
* postponing the flush and return. */
server.aof_flush_postponed_start = server.unixtime;
server.aof_flush_postponed_start = server.mstime;
return;
} else if (server.unixtime - server.aof_flush_postponed_start < 2) {
} else if (server.mstime - server.aof_flush_postponed_start < 2000) {
/* We were already waiting for fsync to finish, but for less
* than two seconds this is still ok. Postpone again. */
return;
@ -1260,15 +1260,15 @@ try_fsync:
latencyEndMonitor(latency);
latencyAddSampleIfNeeded("aof-fsync-always",latency);
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
server.aof_last_fsync = server.unixtime;
server.aof_last_fsync = server.mstime;
atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
} else if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
server.unixtime > server.aof_last_fsync) {
server.mstime - server.aof_last_fsync >= 1000) {
if (!sync_in_progress) {
aof_background_fsync(server.aof_fd);
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
}
server.aof_last_fsync = server.unixtime;
server.aof_last_fsync = server.mstime;
}
}

View File

@ -2056,7 +2056,7 @@ void initServerConfig(void) {
server.aof_rewrite_base_size = 0;
server.aof_rewrite_scheduled = 0;
server.aof_flush_sleep = 0;
server.aof_last_fsync = time(NULL);
server.aof_last_fsync = time(NULL) * 1000;
server.aof_cur_timestamp = 0;
atomicSet(server.aof_bio_fsync_status,C_OK);
server.aof_rewrite_time_last = -1;

View File

@ -1779,8 +1779,8 @@ struct redisServer {
sds aof_buf; /* AOF buffer, written before entering the event loop */
int aof_fd; /* File descriptor of currently selected AOF file */
int aof_selected_db; /* Currently selected DB in AOF */
time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
time_t aof_last_fsync; /* UNIX time of last fsync() */
mstime_t aof_flush_postponed_start; /* mstime of postponed AOF flush */
mstime_t aof_last_fsync; /* mstime of last fsync() */
time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
time_t aof_cur_timestamp; /* Current record timestamp in AOF */