2024-03-21 09:30:47 -04:00
|
|
|
// Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
// SPDX-FileCopyrightText: 2024 Redict Contributors
|
|
|
|
// SPDX-FileCopyrightText: 2024 Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
//
|
|
|
|
// SPDX-License-Identifier: BSD-3-Clause
|
2024-03-21 15:11:44 -04:00
|
|
|
// SPDX-License-Identifier: LGPL-3.0-only
|
2009-03-22 05:30:00 -04:00
|
|
|
|
|
|
|
#ifndef __AE_H__
|
|
|
|
#define __AE_H__
|
|
|
|
|
2020-08-28 04:54:10 -04:00
|
|
|
#include "monotonic.h"
|
2016-01-08 09:05:14 -05:00
|
|
|
|
2009-11-23 12:50:39 -05:00
|
|
|
#define AE_OK 0
|
|
|
|
#define AE_ERR -1
|
|
|
|
|
ae.c: introduce the concept of read->write barrier.
AOF fsync=always, and certain Redis Cluster bus operations, require to
fsync data on disk before replying with an acknowledge.
In such case, in order to implement Group Commits, we want to be sure
that queries that are read in a given cycle of the event loop, are never
served to clients in the same event loop iteration. This way, by using
the event loop "before sleep" callback, we can fsync the information
just one time before returning into the event loop for the next cycle.
This is much more efficient compared to calling fsync() multiple times.
Unfortunately because of a bug, this was not always guaranteed: the
actual way the events are installed was the sole thing that could
control. Normally this problem is hard to trigger when AOF is enabled
with fsync=always, because we try to flush the output buffers to the
socekt directly in the beforeSleep() function of Redis. However if the
output buffers are full, we actually install a write event, and in such
a case, this bug could happen.
This change to ae.c modifies the event loop implementation to make this
concept explicit. Write events that are registered with:
AE_WRITABLE|AE_BARRIER
Are guaranteed to never fire after the readable event was fired for the
same file descriptor. In this way we are sure that data is persisted to
disk before the client performing the operation receives an
acknowledged.
However note that this semantics does not provide all the guarantees
that one may believe are automatically provided. Take the example of the
blocking list operations in Redis.
With AOF and fsync=always we could have:
Client A doing: BLPOP myqueue 0
Client B doing: RPUSH myqueue a b c
In this scenario, Client A will get the "a" elements immediately after
the Client B RPUSH will be executed, even before the operation is persisted.
However when Client B will get the acknowledge, it can be sure that
"b,c" are already safe on disk inside the list.
What to note here is that it cannot be assumed that Client A receiving
the element is a guaranteed that the operation succeeded from the point
of view of Client B.
This is due to the fact that the barrier exists within the same socket,
and not between different sockets. However in the case above, the
element "a" was not going to be persisted regardless, so it is a pretty
synthetic argument.
2018-02-23 11:42:24 -05:00
|
|
|
#define AE_NONE 0 /* No events registered. */
|
|
|
|
#define AE_READABLE 1 /* Fire when descriptor is readable. */
|
|
|
|
#define AE_WRITABLE 2 /* Fire when descriptor is writable. */
|
|
|
|
#define AE_BARRIER 4 /* With WRITABLE, never fire the event if the
|
|
|
|
READABLE event already fired in the same event
|
|
|
|
loop iteration. Useful when you want to persist
|
|
|
|
things to disk before sending replies, and want
|
|
|
|
to do that in a group fashion. */
|
2009-11-23 12:50:39 -05:00
|
|
|
|
2020-05-12 07:07:44 -04:00
|
|
|
#define AE_FILE_EVENTS (1<<0)
|
|
|
|
#define AE_TIME_EVENTS (1<<1)
|
2009-11-23 12:50:39 -05:00
|
|
|
#define AE_ALL_EVENTS (AE_FILE_EVENTS|AE_TIME_EVENTS)
|
2020-05-12 07:07:44 -04:00
|
|
|
#define AE_DONT_WAIT (1<<2)
|
|
|
|
#define AE_CALL_BEFORE_SLEEP (1<<3)
|
|
|
|
#define AE_CALL_AFTER_SLEEP (1<<4)
|
2009-11-23 12:50:39 -05:00
|
|
|
|
|
|
|
#define AE_NOMORE -1
|
2016-01-08 09:05:14 -05:00
|
|
|
#define AE_DELETED_EVENT_ID -1
|
2009-11-23 12:50:39 -05:00
|
|
|
|
|
|
|
/* Macros */
|
|
|
|
#define AE_NOTUSED(V) ((void) V)
|
|
|
|
|
2009-03-22 05:30:00 -04:00
|
|
|
struct aeEventLoop;
|
|
|
|
|
|
|
|
/* Types and data structures */
|
|
|
|
typedef void aeFileProc(struct aeEventLoop *eventLoop, int fd, void *clientData, int mask);
|
|
|
|
typedef int aeTimeProc(struct aeEventLoop *eventLoop, long long id, void *clientData);
|
|
|
|
typedef void aeEventFinalizerProc(struct aeEventLoop *eventLoop, void *clientData);
|
2010-01-28 10:12:04 -05:00
|
|
|
typedef void aeBeforeSleepProc(struct aeEventLoop *eventLoop);
|
2009-03-22 05:30:00 -04:00
|
|
|
|
|
|
|
/* File event structure */
|
|
|
|
typedef struct aeFileEvent {
|
ae.c: introduce the concept of read->write barrier.
AOF fsync=always, and certain Redis Cluster bus operations, require to
fsync data on disk before replying with an acknowledge.
In such case, in order to implement Group Commits, we want to be sure
that queries that are read in a given cycle of the event loop, are never
served to clients in the same event loop iteration. This way, by using
the event loop "before sleep" callback, we can fsync the information
just one time before returning into the event loop for the next cycle.
This is much more efficient compared to calling fsync() multiple times.
Unfortunately because of a bug, this was not always guaranteed: the
actual way the events are installed was the sole thing that could
control. Normally this problem is hard to trigger when AOF is enabled
with fsync=always, because we try to flush the output buffers to the
socekt directly in the beforeSleep() function of Redis. However if the
output buffers are full, we actually install a write event, and in such
a case, this bug could happen.
This change to ae.c modifies the event loop implementation to make this
concept explicit. Write events that are registered with:
AE_WRITABLE|AE_BARRIER
Are guaranteed to never fire after the readable event was fired for the
same file descriptor. In this way we are sure that data is persisted to
disk before the client performing the operation receives an
acknowledged.
However note that this semantics does not provide all the guarantees
that one may believe are automatically provided. Take the example of the
blocking list operations in Redis.
With AOF and fsync=always we could have:
Client A doing: BLPOP myqueue 0
Client B doing: RPUSH myqueue a b c
In this scenario, Client A will get the "a" elements immediately after
the Client B RPUSH will be executed, even before the operation is persisted.
However when Client B will get the acknowledge, it can be sure that
"b,c" are already safe on disk inside the list.
What to note here is that it cannot be assumed that Client A receiving
the element is a guaranteed that the operation succeeded from the point
of view of Client B.
This is due to the fact that the barrier exists within the same socket,
and not between different sockets. However in the case above, the
element "a" was not going to be persisted regardless, so it is a pretty
synthetic argument.
2018-02-23 11:42:24 -05:00
|
|
|
int mask; /* one of AE_(READABLE|WRITABLE|BARRIER) */
|
2009-11-23 12:50:39 -05:00
|
|
|
aeFileProc *rfileProc;
|
|
|
|
aeFileProc *wfileProc;
|
2009-03-22 05:30:00 -04:00
|
|
|
void *clientData;
|
|
|
|
} aeFileEvent;
|
|
|
|
|
|
|
|
/* Time event structure */
|
|
|
|
typedef struct aeTimeEvent {
|
|
|
|
long long id; /* time event identifier. */
|
2020-08-28 04:54:10 -04:00
|
|
|
monotime when;
|
2009-03-22 05:30:00 -04:00
|
|
|
aeTimeProc *timeProc;
|
|
|
|
aeEventFinalizerProc *finalizerProc;
|
|
|
|
void *clientData;
|
Fix ae.c when a timer finalizerProc adds an event.
While this feature is not used by Redis, ae.c implements the ability for
a timer to call a finalizer callback when an timer event is deleted.
This feature was bugged since the start, and because it was never used
we never noticed a problem. However Anthony LaTorre was using the same
library in order to implement a different system: he found a bug that he
describes as follows, and which he fixed with the patch in this commit,
sent me by private email:
--- Anthony email ---
've found one bug in the current implementation of the timed events.
It's possible to lose track of a timed event if an event is added in
the finalizerProc of another event.
For example, suppose you start off with three timed events 1, 2, and
3. Then the linked list looks like:
3 -> 2 -> 1
Then, you run processTimeEvents and events 2 and 3 finish, so now the
list looks like:
-1 -> -1 -> 2
Now, on the next iteration of processTimeEvents it starts by deleting
the first event, and suppose this finalizerProc creates a new event,
so that the list looks like this:
4 -> -1 -> 2
On the next iteration of the while loop, when it gets to the second
event, the variable prev is still set to NULL, so that the head of the
event loop after the next event will be set to 2, i.e. after deleting
the next event the event loop will look like:
2
and the event with id 4 will be lost.
I've attached an example program to illustrate the issue. If you run
it you will see that it prints:
```
foo id = 0
spam!
```
But if you uncomment line 29 and run it again it won't print "spam!".
--- End of email ---
Test.c source code is as follows:
#include "ae.h"
#include <stdio.h>
aeEventLoop *el;
int foo(struct aeEventLoop *el, long long id, void *data)
{
printf("foo id = %lld\n", id);
return AE_NOMORE;
}
int spam(struct aeEventLoop *el, long long id, void *data)
{
printf("spam!\n");
return AE_NOMORE;
}
void bar(struct aeEventLoop *el, void *data)
{
aeCreateTimeEvent(el, 0, spam, NULL, NULL);
}
int main(int argc, char **argv)
{
el = aeCreateEventLoop(100);
//aeCreateTimeEvent(el, 0, foo, NULL, NULL);
aeCreateTimeEvent(el, 0, foo, NULL, bar);
aeMain(el);
return 0;
}
Anthony fixed the problem by using a linked list for the list of timers, and
sent me back this patch after he tested the code in production for some time.
The code looks sane to me, so committing it to Redis.
2018-03-28 08:06:08 -04:00
|
|
|
struct aeTimeEvent *prev;
|
2009-03-22 05:30:00 -04:00
|
|
|
struct aeTimeEvent *next;
|
2020-05-14 11:37:24 -04:00
|
|
|
int refcount; /* refcount to prevent timer events from being
|
|
|
|
* freed in recursive time event calls. */
|
2009-03-22 05:30:00 -04:00
|
|
|
} aeTimeEvent;
|
|
|
|
|
2009-11-23 12:50:39 -05:00
|
|
|
/* A fired event */
|
|
|
|
typedef struct aeFiredEvent {
|
|
|
|
int fd;
|
|
|
|
int mask;
|
|
|
|
} aeFiredEvent;
|
|
|
|
|
2009-03-22 05:30:00 -04:00
|
|
|
/* State of an event based program */
|
|
|
|
typedef struct aeEventLoop {
|
2011-12-15 05:42:40 -05:00
|
|
|
int maxfd; /* highest file descriptor currently registered */
|
|
|
|
int setsize; /* max number of file descriptors tracked */
|
2009-03-22 05:30:00 -04:00
|
|
|
long long timeEventNextId;
|
2011-12-15 05:42:40 -05:00
|
|
|
aeFileEvent *events; /* Registered events */
|
|
|
|
aeFiredEvent *fired; /* Fired events */
|
2009-03-22 05:30:00 -04:00
|
|
|
aeTimeEvent *timeEventHead;
|
|
|
|
int stop;
|
2009-11-23 12:50:39 -05:00
|
|
|
void *apidata; /* This is used for polling API specific data */
|
2010-01-28 10:12:04 -05:00
|
|
|
aeBeforeSleepProc *beforesleep;
|
2017-05-03 05:26:21 -04:00
|
|
|
aeBeforeSleepProc *aftersleep;
|
2019-08-11 09:07:53 -04:00
|
|
|
int flags;
|
2009-03-22 05:30:00 -04:00
|
|
|
} aeEventLoop;
|
|
|
|
|
|
|
|
/* Prototypes */
|
2011-12-15 05:42:40 -05:00
|
|
|
aeEventLoop *aeCreateEventLoop(int setsize);
|
2009-03-22 05:30:00 -04:00
|
|
|
void aeDeleteEventLoop(aeEventLoop *eventLoop);
|
|
|
|
void aeStop(aeEventLoop *eventLoop);
|
|
|
|
int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
|
2009-11-23 12:50:39 -05:00
|
|
|
aeFileProc *proc, void *clientData);
|
2009-03-22 05:30:00 -04:00
|
|
|
void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask);
|
2011-11-21 10:05:29 -05:00
|
|
|
int aeGetFileEvents(aeEventLoop *eventLoop, int fd);
|
Add event loop support to the module API (#10001)
Modules can now register sockets/pipe to the Redis main thread event loop and do network operations asynchronously. Previously, modules had to maintain an event loop and another thread for asynchronous network operations.
Also, if a module is calling API functions after doing some network operations, it had to synchronize its event loop thread's access with Redis main thread by locking the GIL, causing contention on the lock. After this commit, no synchronization is needed as module can operate in Redis main thread context. So, this commit may improve the performance for some use cases.
Added three functions to the module API:
* RedisModule_EventLoopAdd(int fd, int mask, RedisModuleEventLoopFunc func, void *user_data)
* RedisModule_EventLoopDel(int fd, int mask)
* RedisModule_EventLoopAddOneShot(RedisModuleEventLoopOneShotFunc func, void *user_data) - This function can be called from other threads to trigger callback on Redis main thread. Callback will be triggered only once. If Redis main thread is sleeping, this call will wake up the Redis main thread.
Event loop callbacks are called by Redis main thread after locking the GIL. Inside callbacks, modules can operate as if they are holding the GIL.
Added REDISMODULE_EVENT_EVENTLOOP event with two subevents:
* REDISMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP
* REDISMODULE_SUBEVENT_EVENTLOOP_AFTER_SLEEP
These events are for modules that want to participate in the before and after sleep action. e.g It might be useful to implement batching : Read data from the network, write all to a file in one go on BEFORE_SLEEP event.
2022-01-18 06:10:07 -05:00
|
|
|
void *aeGetFileClientData(aeEventLoop *eventLoop, int fd);
|
2009-03-22 05:30:00 -04:00
|
|
|
long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
|
|
|
|
aeTimeProc *proc, void *clientData,
|
|
|
|
aeEventFinalizerProc *finalizerProc);
|
|
|
|
int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id);
|
|
|
|
int aeProcessEvents(aeEventLoop *eventLoop, int flags);
|
|
|
|
int aeWait(int fd, int mask, long long milliseconds);
|
|
|
|
void aeMain(aeEventLoop *eventLoop);
|
2009-11-28 11:06:28 -05:00
|
|
|
char *aeGetApiName(void);
|
2010-01-28 10:12:04 -05:00
|
|
|
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep);
|
2017-05-03 05:26:21 -04:00
|
|
|
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep);
|
2013-06-28 10:39:49 -04:00
|
|
|
int aeGetSetSize(aeEventLoop *eventLoop);
|
|
|
|
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize);
|
2019-10-15 10:21:33 -04:00
|
|
|
void aeSetDontWait(aeEventLoop *eventLoop, int noWait);
|
2009-03-22 05:30:00 -04:00
|
|
|
|
|
|
|
#endif
|