2009-03-22 05:30:00 -04:00
|
|
|
/* A simple event-driven programming library. Originally I wrote this code
|
|
|
|
* for the Jim's event-loop (Jim is a Tcl interpreter) but later translated
|
|
|
|
* it in form of a library for easy reuse.
|
|
|
|
*
|
2010-02-19 05:23:57 -05:00
|
|
|
* Copyright (c) 2006-2010, Salvatore Sanfilippo <antirez at gmail dot com>
|
2009-03-22 05:30:00 -04:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without
|
|
|
|
* specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <sys/time.h>
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <stdlib.h>
|
2012-01-06 05:56:07 -05:00
|
|
|
#include <poll.h>
|
2012-02-08 16:24:59 -05:00
|
|
|
#include <string.h>
|
2012-10-05 04:10:34 -04:00
|
|
|
#include <time.h>
|
2013-01-03 08:18:03 -05:00
|
|
|
#include <errno.h>
|
2009-03-22 05:30:00 -04:00
|
|
|
|
|
|
|
#include "ae.h"
|
|
|
|
#include "zmalloc.h"
|
2009-11-23 12:50:39 -05:00
|
|
|
#include "config.h"
|
|
|
|
|
|
|
|
/* Include the best multiplexing layer supported by this system.
|
|
|
|
* The following should be ordered by performances, descending. */
|
2012-03-26 20:58:19 -04:00
|
|
|
#ifdef HAVE_EVPORT
|
|
|
|
#include "ae_evport.c"
|
2009-11-23 12:50:39 -05:00
|
|
|
#else
|
2012-03-26 20:58:19 -04:00
|
|
|
#ifdef HAVE_EPOLL
|
|
|
|
#include "ae_epoll.c"
|
2009-11-28 08:46:21 -05:00
|
|
|
#else
|
2012-03-26 20:58:19 -04:00
|
|
|
#ifdef HAVE_KQUEUE
|
|
|
|
#include "ae_kqueue.c"
|
|
|
|
#else
|
|
|
|
#include "ae_select.c"
|
|
|
|
#endif
|
2009-11-28 08:46:21 -05:00
|
|
|
#endif
|
2009-11-23 12:50:39 -05:00
|
|
|
#endif
|
2009-03-22 05:30:00 -04:00
|
|
|
|
2011-12-15 05:42:40 -05:00
|
|
|
aeEventLoop *aeCreateEventLoop(int setsize) {
|
2009-03-22 05:30:00 -04:00
|
|
|
aeEventLoop *eventLoop;
|
2009-11-23 12:50:39 -05:00
|
|
|
int i;
|
2009-03-22 05:30:00 -04:00
|
|
|
|
2012-01-25 04:35:47 -05:00
|
|
|
if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
|
2011-12-15 05:42:40 -05:00
|
|
|
eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);
|
|
|
|
eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);
|
2012-01-25 04:35:47 -05:00
|
|
|
if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
|
2011-12-15 05:42:40 -05:00
|
|
|
eventLoop->setsize = setsize;
|
2012-08-30 03:08:19 -04:00
|
|
|
eventLoop->lastTime = time(NULL);
|
2009-03-22 05:30:00 -04:00
|
|
|
eventLoop->timeEventHead = NULL;
|
|
|
|
eventLoop->timeEventNextId = 0;
|
|
|
|
eventLoop->stop = 0;
|
2009-11-23 12:50:39 -05:00
|
|
|
eventLoop->maxfd = -1;
|
2010-01-28 10:12:04 -05:00
|
|
|
eventLoop->beforesleep = NULL;
|
2017-05-03 05:26:21 -04:00
|
|
|
eventLoop->aftersleep = NULL;
|
2012-01-25 04:35:47 -05:00
|
|
|
if (aeApiCreate(eventLoop) == -1) goto err;
|
2009-11-23 12:50:39 -05:00
|
|
|
/* Events with mask == AE_NONE are not set. So let's initialize the
|
|
|
|
* vector with it. */
|
2011-12-15 05:42:40 -05:00
|
|
|
for (i = 0; i < setsize; i++)
|
2009-11-23 12:50:39 -05:00
|
|
|
eventLoop->events[i].mask = AE_NONE;
|
2009-03-22 05:30:00 -04:00
|
|
|
return eventLoop;
|
2012-01-25 04:35:47 -05:00
|
|
|
|
|
|
|
err:
|
|
|
|
if (eventLoop) {
|
|
|
|
zfree(eventLoop->events);
|
|
|
|
zfree(eventLoop->fired);
|
|
|
|
zfree(eventLoop);
|
|
|
|
}
|
|
|
|
return NULL;
|
2009-03-22 05:30:00 -04:00
|
|
|
}
|
|
|
|
|
2013-06-28 10:39:49 -04:00
|
|
|
/* Return the current set size. */
|
|
|
|
int aeGetSetSize(aeEventLoop *eventLoop) {
|
|
|
|
return eventLoop->setsize;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Resize the maximum set size of the event loop.
|
|
|
|
* If the requested set size is smaller than the current set size, but
|
|
|
|
* there is already a file descriptor in use that is >= the requested
|
|
|
|
* set size minus one, AE_ERR is returned and the operation is not
|
|
|
|
* performed at all.
|
|
|
|
*
|
|
|
|
* Otherwise AE_OK is returned and the operation is successful. */
|
|
|
|
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
if (setsize == eventLoop->setsize) return AE_OK;
|
|
|
|
if (eventLoop->maxfd >= setsize) return AE_ERR;
|
|
|
|
if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR;
|
|
|
|
|
|
|
|
eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
|
|
|
|
eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
|
|
|
|
eventLoop->setsize = setsize;
|
|
|
|
|
|
|
|
/* Make sure that if we created new slots, they are initialized with
|
|
|
|
* an AE_NONE mask. */
|
|
|
|
for (i = eventLoop->maxfd+1; i < setsize; i++)
|
|
|
|
eventLoop->events[i].mask = AE_NONE;
|
|
|
|
return AE_OK;
|
|
|
|
}
|
|
|
|
|
2009-03-22 05:30:00 -04:00
|
|
|
void aeDeleteEventLoop(aeEventLoop *eventLoop) {
|
2009-11-23 12:50:39 -05:00
|
|
|
aeApiFree(eventLoop);
|
2011-12-16 03:55:01 -05:00
|
|
|
zfree(eventLoop->events);
|
|
|
|
zfree(eventLoop->fired);
|
2009-03-22 05:30:00 -04:00
|
|
|
zfree(eventLoop);
|
|
|
|
}
|
|
|
|
|
|
|
|
void aeStop(aeEventLoop *eventLoop) {
|
|
|
|
eventLoop->stop = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
|
2009-11-23 12:50:39 -05:00
|
|
|
aeFileProc *proc, void *clientData)
|
2009-03-22 05:30:00 -04:00
|
|
|
{
|
2013-01-03 08:18:03 -05:00
|
|
|
if (fd >= eventLoop->setsize) {
|
|
|
|
errno = ERANGE;
|
|
|
|
return AE_ERR;
|
|
|
|
}
|
2009-11-23 12:50:39 -05:00
|
|
|
aeFileEvent *fe = &eventLoop->events[fd];
|
|
|
|
|
|
|
|
if (aeApiAddEvent(eventLoop, fd, mask) == -1)
|
|
|
|
return AE_ERR;
|
|
|
|
fe->mask |= mask;
|
|
|
|
if (mask & AE_READABLE) fe->rfileProc = proc;
|
|
|
|
if (mask & AE_WRITABLE) fe->wfileProc = proc;
|
2009-03-22 05:30:00 -04:00
|
|
|
fe->clientData = clientData;
|
2009-11-23 12:50:39 -05:00
|
|
|
if (fd > eventLoop->maxfd)
|
|
|
|
eventLoop->maxfd = fd;
|
2009-03-22 05:30:00 -04:00
|
|
|
return AE_OK;
|
|
|
|
}
|
|
|
|
|
|
|
|
void aeDeleteFileEvent(aeEventLoop *eventLoop, int fd, int mask)
|
|
|
|
{
|
2011-12-15 05:42:40 -05:00
|
|
|
if (fd >= eventLoop->setsize) return;
|
2009-11-23 12:50:39 -05:00
|
|
|
aeFileEvent *fe = &eventLoop->events[fd];
|
|
|
|
if (fe->mask == AE_NONE) return;
|
2013-12-06 22:27:00 -05:00
|
|
|
|
ae.c: introduce the concept of read->write barrier.
AOF fsync=always, and certain Redis Cluster bus operations, require to
fsync data on disk before replying with an acknowledge.
In such case, in order to implement Group Commits, we want to be sure
that queries that are read in a given cycle of the event loop, are never
served to clients in the same event loop iteration. This way, by using
the event loop "before sleep" callback, we can fsync the information
just one time before returning into the event loop for the next cycle.
This is much more efficient compared to calling fsync() multiple times.
Unfortunately because of a bug, this was not always guaranteed: the
actual way the events are installed was the sole thing that could
control. Normally this problem is hard to trigger when AOF is enabled
with fsync=always, because we try to flush the output buffers to the
socekt directly in the beforeSleep() function of Redis. However if the
output buffers are full, we actually install a write event, and in such
a case, this bug could happen.
This change to ae.c modifies the event loop implementation to make this
concept explicit. Write events that are registered with:
AE_WRITABLE|AE_BARRIER
Are guaranteed to never fire after the readable event was fired for the
same file descriptor. In this way we are sure that data is persisted to
disk before the client performing the operation receives an
acknowledged.
However note that this semantics does not provide all the guarantees
that one may believe are automatically provided. Take the example of the
blocking list operations in Redis.
With AOF and fsync=always we could have:
Client A doing: BLPOP myqueue 0
Client B doing: RPUSH myqueue a b c
In this scenario, Client A will get the "a" elements immediately after
the Client B RPUSH will be executed, even before the operation is persisted.
However when Client B will get the acknowledge, it can be sure that
"b,c" are already safe on disk inside the list.
What to note here is that it cannot be assumed that Client A receiving
the element is a guaranteed that the operation succeeded from the point
of view of Client B.
This is due to the fact that the barrier exists within the same socket,
and not between different sockets. However in the case above, the
element "a" was not going to be persisted regardless, so it is a pretty
synthetic argument.
2018-02-23 11:42:24 -05:00
|
|
|
/* We want to always remove AE_BARRIER if set when AE_WRITABLE
|
|
|
|
* is removed. */
|
|
|
|
if (mask & AE_WRITABLE) mask |= AE_BARRIER;
|
|
|
|
|
2013-12-06 22:27:00 -05:00
|
|
|
aeApiDelEvent(eventLoop, fd, mask);
|
2009-11-23 12:50:39 -05:00
|
|
|
fe->mask = fe->mask & (~mask);
|
|
|
|
if (fd == eventLoop->maxfd && fe->mask == AE_NONE) {
|
|
|
|
/* Update the max fd */
|
|
|
|
int j;
|
|
|
|
|
|
|
|
for (j = eventLoop->maxfd-1; j >= 0; j--)
|
|
|
|
if (eventLoop->events[j].mask != AE_NONE) break;
|
|
|
|
eventLoop->maxfd = j;
|
2009-03-22 05:30:00 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-21 10:05:29 -05:00
|
|
|
int aeGetFileEvents(aeEventLoop *eventLoop, int fd) {
|
2011-12-15 05:42:40 -05:00
|
|
|
if (fd >= eventLoop->setsize) return 0;
|
2011-11-21 10:05:29 -05:00
|
|
|
aeFileEvent *fe = &eventLoop->events[fd];
|
|
|
|
|
|
|
|
return fe->mask;
|
|
|
|
}
|
|
|
|
|
2009-03-22 05:30:00 -04:00
|
|
|
static void aeGetTime(long *seconds, long *milliseconds)
|
|
|
|
{
|
|
|
|
struct timeval tv;
|
|
|
|
|
|
|
|
gettimeofday(&tv, NULL);
|
|
|
|
*seconds = tv.tv_sec;
|
|
|
|
*milliseconds = tv.tv_usec/1000;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void aeAddMillisecondsToNow(long long milliseconds, long *sec, long *ms) {
|
|
|
|
long cur_sec, cur_ms, when_sec, when_ms;
|
|
|
|
|
|
|
|
aeGetTime(&cur_sec, &cur_ms);
|
|
|
|
when_sec = cur_sec + milliseconds/1000;
|
|
|
|
when_ms = cur_ms + milliseconds%1000;
|
|
|
|
if (when_ms >= 1000) {
|
|
|
|
when_sec ++;
|
|
|
|
when_ms -= 1000;
|
|
|
|
}
|
|
|
|
*sec = when_sec;
|
|
|
|
*ms = when_ms;
|
|
|
|
}
|
|
|
|
|
|
|
|
long long aeCreateTimeEvent(aeEventLoop *eventLoop, long long milliseconds,
|
|
|
|
aeTimeProc *proc, void *clientData,
|
|
|
|
aeEventFinalizerProc *finalizerProc)
|
|
|
|
{
|
|
|
|
long long id = eventLoop->timeEventNextId++;
|
|
|
|
aeTimeEvent *te;
|
|
|
|
|
|
|
|
te = zmalloc(sizeof(*te));
|
|
|
|
if (te == NULL) return AE_ERR;
|
|
|
|
te->id = id;
|
|
|
|
aeAddMillisecondsToNow(milliseconds,&te->when_sec,&te->when_ms);
|
|
|
|
te->timeProc = proc;
|
|
|
|
te->finalizerProc = finalizerProc;
|
|
|
|
te->clientData = clientData;
|
Fix ae.c when a timer finalizerProc adds an event.
While this feature is not used by Redis, ae.c implements the ability for
a timer to call a finalizer callback when an timer event is deleted.
This feature was bugged since the start, and because it was never used
we never noticed a problem. However Anthony LaTorre was using the same
library in order to implement a different system: he found a bug that he
describes as follows, and which he fixed with the patch in this commit,
sent me by private email:
--- Anthony email ---
've found one bug in the current implementation of the timed events.
It's possible to lose track of a timed event if an event is added in
the finalizerProc of another event.
For example, suppose you start off with three timed events 1, 2, and
3. Then the linked list looks like:
3 -> 2 -> 1
Then, you run processTimeEvents and events 2 and 3 finish, so now the
list looks like:
-1 -> -1 -> 2
Now, on the next iteration of processTimeEvents it starts by deleting
the first event, and suppose this finalizerProc creates a new event,
so that the list looks like this:
4 -> -1 -> 2
On the next iteration of the while loop, when it gets to the second
event, the variable prev is still set to NULL, so that the head of the
event loop after the next event will be set to 2, i.e. after deleting
the next event the event loop will look like:
2
and the event with id 4 will be lost.
I've attached an example program to illustrate the issue. If you run
it you will see that it prints:
```
foo id = 0
spam!
```
But if you uncomment line 29 and run it again it won't print "spam!".
--- End of email ---
Test.c source code is as follows:
#include "ae.h"
#include <stdio.h>
aeEventLoop *el;
int foo(struct aeEventLoop *el, long long id, void *data)
{
printf("foo id = %lld\n", id);
return AE_NOMORE;
}
int spam(struct aeEventLoop *el, long long id, void *data)
{
printf("spam!\n");
return AE_NOMORE;
}
void bar(struct aeEventLoop *el, void *data)
{
aeCreateTimeEvent(el, 0, spam, NULL, NULL);
}
int main(int argc, char **argv)
{
el = aeCreateEventLoop(100);
//aeCreateTimeEvent(el, 0, foo, NULL, NULL);
aeCreateTimeEvent(el, 0, foo, NULL, bar);
aeMain(el);
return 0;
}
Anthony fixed the problem by using a linked list for the list of timers, and
sent me back this patch after he tested the code in production for some time.
The code looks sane to me, so committing it to Redis.
2018-03-28 08:06:08 -04:00
|
|
|
te->prev = NULL;
|
2009-03-22 05:30:00 -04:00
|
|
|
te->next = eventLoop->timeEventHead;
|
Fix ae.c when a timer finalizerProc adds an event.
While this feature is not used by Redis, ae.c implements the ability for
a timer to call a finalizer callback when an timer event is deleted.
This feature was bugged since the start, and because it was never used
we never noticed a problem. However Anthony LaTorre was using the same
library in order to implement a different system: he found a bug that he
describes as follows, and which he fixed with the patch in this commit,
sent me by private email:
--- Anthony email ---
've found one bug in the current implementation of the timed events.
It's possible to lose track of a timed event if an event is added in
the finalizerProc of another event.
For example, suppose you start off with three timed events 1, 2, and
3. Then the linked list looks like:
3 -> 2 -> 1
Then, you run processTimeEvents and events 2 and 3 finish, so now the
list looks like:
-1 -> -1 -> 2
Now, on the next iteration of processTimeEvents it starts by deleting
the first event, and suppose this finalizerProc creates a new event,
so that the list looks like this:
4 -> -1 -> 2
On the next iteration of the while loop, when it gets to the second
event, the variable prev is still set to NULL, so that the head of the
event loop after the next event will be set to 2, i.e. after deleting
the next event the event loop will look like:
2
and the event with id 4 will be lost.
I've attached an example program to illustrate the issue. If you run
it you will see that it prints:
```
foo id = 0
spam!
```
But if you uncomment line 29 and run it again it won't print "spam!".
--- End of email ---
Test.c source code is as follows:
#include "ae.h"
#include <stdio.h>
aeEventLoop *el;
int foo(struct aeEventLoop *el, long long id, void *data)
{
printf("foo id = %lld\n", id);
return AE_NOMORE;
}
int spam(struct aeEventLoop *el, long long id, void *data)
{
printf("spam!\n");
return AE_NOMORE;
}
void bar(struct aeEventLoop *el, void *data)
{
aeCreateTimeEvent(el, 0, spam, NULL, NULL);
}
int main(int argc, char **argv)
{
el = aeCreateEventLoop(100);
//aeCreateTimeEvent(el, 0, foo, NULL, NULL);
aeCreateTimeEvent(el, 0, foo, NULL, bar);
aeMain(el);
return 0;
}
Anthony fixed the problem by using a linked list for the list of timers, and
sent me back this patch after he tested the code in production for some time.
The code looks sane to me, so committing it to Redis.
2018-03-28 08:06:08 -04:00
|
|
|
if (te->next)
|
|
|
|
te->next->prev = te;
|
2009-03-22 05:30:00 -04:00
|
|
|
eventLoop->timeEventHead = te;
|
|
|
|
return id;
|
|
|
|
}
|
|
|
|
|
|
|
|
int aeDeleteTimeEvent(aeEventLoop *eventLoop, long long id)
|
|
|
|
{
|
2016-01-08 09:05:14 -05:00
|
|
|
aeTimeEvent *te = eventLoop->timeEventHead;
|
2009-03-22 05:30:00 -04:00
|
|
|
while(te) {
|
|
|
|
if (te->id == id) {
|
2016-01-08 09:05:14 -05:00
|
|
|
te->id = AE_DELETED_EVENT_ID;
|
2009-03-22 05:30:00 -04:00
|
|
|
return AE_OK;
|
|
|
|
}
|
|
|
|
te = te->next;
|
|
|
|
}
|
|
|
|
return AE_ERR; /* NO event with the specified ID found */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Search the first timer to fire.
|
|
|
|
* This operation is useful to know how many time the select can be
|
|
|
|
* put in sleep without to delay any event.
|
|
|
|
* If there are no timers NULL is returned.
|
|
|
|
*
|
2009-11-23 06:00:23 -05:00
|
|
|
* Note that's O(N) since time events are unsorted.
|
|
|
|
* Possible optimizations (not needed by Redis so far, but...):
|
|
|
|
* 1) Insert the event in order, so that the nearest is just the head.
|
|
|
|
* Much better but still insertion or deletion of timers is O(N).
|
|
|
|
* 2) Use a skiplist to have this operation as O(1) and insertion as O(log(N)).
|
|
|
|
*/
|
2009-03-22 05:30:00 -04:00
|
|
|
static aeTimeEvent *aeSearchNearestTimer(aeEventLoop *eventLoop)
|
|
|
|
{
|
|
|
|
aeTimeEvent *te = eventLoop->timeEventHead;
|
|
|
|
aeTimeEvent *nearest = NULL;
|
|
|
|
|
|
|
|
while(te) {
|
|
|
|
if (!nearest || te->when_sec < nearest->when_sec ||
|
|
|
|
(te->when_sec == nearest->when_sec &&
|
|
|
|
te->when_ms < nearest->when_ms))
|
|
|
|
nearest = te;
|
|
|
|
te = te->next;
|
|
|
|
}
|
|
|
|
return nearest;
|
|
|
|
}
|
|
|
|
|
2009-11-23 06:00:23 -05:00
|
|
|
/* Process time events */
|
|
|
|
static int processTimeEvents(aeEventLoop *eventLoop) {
|
|
|
|
int processed = 0;
|
Fix ae.c when a timer finalizerProc adds an event.
While this feature is not used by Redis, ae.c implements the ability for
a timer to call a finalizer callback when an timer event is deleted.
This feature was bugged since the start, and because it was never used
we never noticed a problem. However Anthony LaTorre was using the same
library in order to implement a different system: he found a bug that he
describes as follows, and which he fixed with the patch in this commit,
sent me by private email:
--- Anthony email ---
've found one bug in the current implementation of the timed events.
It's possible to lose track of a timed event if an event is added in
the finalizerProc of another event.
For example, suppose you start off with three timed events 1, 2, and
3. Then the linked list looks like:
3 -> 2 -> 1
Then, you run processTimeEvents and events 2 and 3 finish, so now the
list looks like:
-1 -> -1 -> 2
Now, on the next iteration of processTimeEvents it starts by deleting
the first event, and suppose this finalizerProc creates a new event,
so that the list looks like this:
4 -> -1 -> 2
On the next iteration of the while loop, when it gets to the second
event, the variable prev is still set to NULL, so that the head of the
event loop after the next event will be set to 2, i.e. after deleting
the next event the event loop will look like:
2
and the event with id 4 will be lost.
I've attached an example program to illustrate the issue. If you run
it you will see that it prints:
```
foo id = 0
spam!
```
But if you uncomment line 29 and run it again it won't print "spam!".
--- End of email ---
Test.c source code is as follows:
#include "ae.h"
#include <stdio.h>
aeEventLoop *el;
int foo(struct aeEventLoop *el, long long id, void *data)
{
printf("foo id = %lld\n", id);
return AE_NOMORE;
}
int spam(struct aeEventLoop *el, long long id, void *data)
{
printf("spam!\n");
return AE_NOMORE;
}
void bar(struct aeEventLoop *el, void *data)
{
aeCreateTimeEvent(el, 0, spam, NULL, NULL);
}
int main(int argc, char **argv)
{
el = aeCreateEventLoop(100);
//aeCreateTimeEvent(el, 0, foo, NULL, NULL);
aeCreateTimeEvent(el, 0, foo, NULL, bar);
aeMain(el);
return 0;
}
Anthony fixed the problem by using a linked list for the list of timers, and
sent me back this patch after he tested the code in production for some time.
The code looks sane to me, so committing it to Redis.
2018-03-28 08:06:08 -04:00
|
|
|
aeTimeEvent *te;
|
2009-11-23 06:00:23 -05:00
|
|
|
long long maxId;
|
2012-08-30 03:08:19 -04:00
|
|
|
time_t now = time(NULL);
|
|
|
|
|
|
|
|
/* If the system clock is moved to the future, and then set back to the
|
|
|
|
* right value, time events may be delayed in a random way. Often this
|
|
|
|
* means that scheduled operations will not be performed soon enough.
|
|
|
|
*
|
|
|
|
* Here we try to detect system clock skews, and force all the time
|
|
|
|
* events to be processed ASAP when this happens: the idea is that
|
|
|
|
* processing events earlier is less dangerous than delaying them
|
|
|
|
* indefinitely, and practice suggests it is. */
|
|
|
|
if (now < eventLoop->lastTime) {
|
|
|
|
te = eventLoop->timeEventHead;
|
|
|
|
while(te) {
|
|
|
|
te->when_sec = 0;
|
|
|
|
te = te->next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
eventLoop->lastTime = now;
|
2009-11-23 06:00:23 -05:00
|
|
|
|
|
|
|
te = eventLoop->timeEventHead;
|
|
|
|
maxId = eventLoop->timeEventNextId-1;
|
|
|
|
while(te) {
|
|
|
|
long now_sec, now_ms;
|
|
|
|
long long id;
|
|
|
|
|
2016-01-08 09:05:14 -05:00
|
|
|
/* Remove events scheduled for deletion. */
|
|
|
|
if (te->id == AE_DELETED_EVENT_ID) {
|
|
|
|
aeTimeEvent *next = te->next;
|
Fix ae.c when a timer finalizerProc adds an event.
While this feature is not used by Redis, ae.c implements the ability for
a timer to call a finalizer callback when an timer event is deleted.
This feature was bugged since the start, and because it was never used
we never noticed a problem. However Anthony LaTorre was using the same
library in order to implement a different system: he found a bug that he
describes as follows, and which he fixed with the patch in this commit,
sent me by private email:
--- Anthony email ---
've found one bug in the current implementation of the timed events.
It's possible to lose track of a timed event if an event is added in
the finalizerProc of another event.
For example, suppose you start off with three timed events 1, 2, and
3. Then the linked list looks like:
3 -> 2 -> 1
Then, you run processTimeEvents and events 2 and 3 finish, so now the
list looks like:
-1 -> -1 -> 2
Now, on the next iteration of processTimeEvents it starts by deleting
the first event, and suppose this finalizerProc creates a new event,
so that the list looks like this:
4 -> -1 -> 2
On the next iteration of the while loop, when it gets to the second
event, the variable prev is still set to NULL, so that the head of the
event loop after the next event will be set to 2, i.e. after deleting
the next event the event loop will look like:
2
and the event with id 4 will be lost.
I've attached an example program to illustrate the issue. If you run
it you will see that it prints:
```
foo id = 0
spam!
```
But if you uncomment line 29 and run it again it won't print "spam!".
--- End of email ---
Test.c source code is as follows:
#include "ae.h"
#include <stdio.h>
aeEventLoop *el;
int foo(struct aeEventLoop *el, long long id, void *data)
{
printf("foo id = %lld\n", id);
return AE_NOMORE;
}
int spam(struct aeEventLoop *el, long long id, void *data)
{
printf("spam!\n");
return AE_NOMORE;
}
void bar(struct aeEventLoop *el, void *data)
{
aeCreateTimeEvent(el, 0, spam, NULL, NULL);
}
int main(int argc, char **argv)
{
el = aeCreateEventLoop(100);
//aeCreateTimeEvent(el, 0, foo, NULL, NULL);
aeCreateTimeEvent(el, 0, foo, NULL, bar);
aeMain(el);
return 0;
}
Anthony fixed the problem by using a linked list for the list of timers, and
sent me back this patch after he tested the code in production for some time.
The code looks sane to me, so committing it to Redis.
2018-03-28 08:06:08 -04:00
|
|
|
if (te->prev)
|
|
|
|
te->prev->next = te->next;
|
2016-01-08 09:05:14 -05:00
|
|
|
else
|
Fix ae.c when a timer finalizerProc adds an event.
While this feature is not used by Redis, ae.c implements the ability for
a timer to call a finalizer callback when an timer event is deleted.
This feature was bugged since the start, and because it was never used
we never noticed a problem. However Anthony LaTorre was using the same
library in order to implement a different system: he found a bug that he
describes as follows, and which he fixed with the patch in this commit,
sent me by private email:
--- Anthony email ---
've found one bug in the current implementation of the timed events.
It's possible to lose track of a timed event if an event is added in
the finalizerProc of another event.
For example, suppose you start off with three timed events 1, 2, and
3. Then the linked list looks like:
3 -> 2 -> 1
Then, you run processTimeEvents and events 2 and 3 finish, so now the
list looks like:
-1 -> -1 -> 2
Now, on the next iteration of processTimeEvents it starts by deleting
the first event, and suppose this finalizerProc creates a new event,
so that the list looks like this:
4 -> -1 -> 2
On the next iteration of the while loop, when it gets to the second
event, the variable prev is still set to NULL, so that the head of the
event loop after the next event will be set to 2, i.e. after deleting
the next event the event loop will look like:
2
and the event with id 4 will be lost.
I've attached an example program to illustrate the issue. If you run
it you will see that it prints:
```
foo id = 0
spam!
```
But if you uncomment line 29 and run it again it won't print "spam!".
--- End of email ---
Test.c source code is as follows:
#include "ae.h"
#include <stdio.h>
aeEventLoop *el;
int foo(struct aeEventLoop *el, long long id, void *data)
{
printf("foo id = %lld\n", id);
return AE_NOMORE;
}
int spam(struct aeEventLoop *el, long long id, void *data)
{
printf("spam!\n");
return AE_NOMORE;
}
void bar(struct aeEventLoop *el, void *data)
{
aeCreateTimeEvent(el, 0, spam, NULL, NULL);
}
int main(int argc, char **argv)
{
el = aeCreateEventLoop(100);
//aeCreateTimeEvent(el, 0, foo, NULL, NULL);
aeCreateTimeEvent(el, 0, foo, NULL, bar);
aeMain(el);
return 0;
}
Anthony fixed the problem by using a linked list for the list of timers, and
sent me back this patch after he tested the code in production for some time.
The code looks sane to me, so committing it to Redis.
2018-03-28 08:06:08 -04:00
|
|
|
eventLoop->timeEventHead = te->next;
|
|
|
|
if (te->next)
|
|
|
|
te->next->prev = te->prev;
|
2016-01-08 09:05:14 -05:00
|
|
|
if (te->finalizerProc)
|
|
|
|
te->finalizerProc(eventLoop, te->clientData);
|
|
|
|
zfree(te);
|
|
|
|
te = next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2016-04-04 06:23:10 -04:00
|
|
|
/* Make sure we don't process time events created by time events in
|
|
|
|
* this iteration. Note that this check is currently useless: we always
|
|
|
|
* add new timers on the head, however if we change the implementation
|
|
|
|
* detail, this check may be useful again: we keep it here for future
|
|
|
|
* defense. */
|
2009-11-23 06:00:23 -05:00
|
|
|
if (te->id > maxId) {
|
|
|
|
te = te->next;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
aeGetTime(&now_sec, &now_ms);
|
|
|
|
if (now_sec > te->when_sec ||
|
|
|
|
(now_sec == te->when_sec && now_ms >= te->when_ms))
|
|
|
|
{
|
|
|
|
int retval;
|
|
|
|
|
|
|
|
id = te->id;
|
|
|
|
retval = te->timeProc(eventLoop, id, te->clientData);
|
|
|
|
processed++;
|
|
|
|
if (retval != AE_NOMORE) {
|
|
|
|
aeAddMillisecondsToNow(retval,&te->when_sec,&te->when_ms);
|
|
|
|
} else {
|
2016-01-08 09:05:14 -05:00
|
|
|
te->id = AE_DELETED_EVENT_ID;
|
2009-11-23 06:00:23 -05:00
|
|
|
}
|
|
|
|
}
|
2016-01-08 09:05:14 -05:00
|
|
|
te = te->next;
|
2009-11-23 06:00:23 -05:00
|
|
|
}
|
|
|
|
return processed;
|
|
|
|
}
|
|
|
|
|
2009-03-22 05:30:00 -04:00
|
|
|
/* Process every pending time event, then every pending file event
|
|
|
|
* (that may be registered by time event callbacks just processed).
|
|
|
|
* Without special flags the function sleeps until some file event
|
2013-01-16 12:00:20 -05:00
|
|
|
* fires, or when the next time event occurs (if any).
|
2009-03-22 05:30:00 -04:00
|
|
|
*
|
|
|
|
* If flags is 0, the function does nothing and returns.
|
|
|
|
* if flags has AE_ALL_EVENTS set, all the kind of events are processed.
|
|
|
|
* if flags has AE_FILE_EVENTS set, file events are processed.
|
|
|
|
* if flags has AE_TIME_EVENTS set, time events are processed.
|
|
|
|
* if flags has AE_DONT_WAIT set the function returns ASAP until all
|
|
|
|
* the events that's possible to process without to wait are processed.
|
2019-01-06 02:01:25 -05:00
|
|
|
* if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called.
|
2009-03-22 05:30:00 -04:00
|
|
|
*
|
|
|
|
* The function returns the number of events processed. */
|
|
|
|
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
|
|
|
|
{
|
2009-11-23 12:50:39 -05:00
|
|
|
int processed = 0, numevents;
|
2009-03-22 05:30:00 -04:00
|
|
|
|
|
|
|
/* Nothing to do? return ASAP */
|
|
|
|
if (!(flags & AE_TIME_EVENTS) && !(flags & AE_FILE_EVENTS)) return 0;
|
|
|
|
|
|
|
|
/* Note that we want call select() even if there are no
|
|
|
|
* file events to process as long as we want to process time
|
|
|
|
* events, in order to sleep until the next time event is ready
|
|
|
|
* to fire. */
|
2009-11-23 12:50:39 -05:00
|
|
|
if (eventLoop->maxfd != -1 ||
|
|
|
|
((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
|
|
|
|
int j;
|
2009-03-22 05:30:00 -04:00
|
|
|
aeTimeEvent *shortest = NULL;
|
|
|
|
struct timeval tv, *tvp;
|
|
|
|
|
|
|
|
if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
|
|
|
|
shortest = aeSearchNearestTimer(eventLoop);
|
|
|
|
if (shortest) {
|
|
|
|
long now_sec, now_ms;
|
|
|
|
|
|
|
|
aeGetTime(&now_sec, &now_ms);
|
|
|
|
tvp = &tv;
|
2016-04-04 08:08:16 -04:00
|
|
|
|
|
|
|
/* How many milliseconds we need to wait for the next
|
|
|
|
* time event to fire? */
|
|
|
|
long long ms =
|
|
|
|
(shortest->when_sec - now_sec)*1000 +
|
|
|
|
shortest->when_ms - now_ms;
|
|
|
|
|
|
|
|
if (ms > 0) {
|
|
|
|
tvp->tv_sec = ms/1000;
|
|
|
|
tvp->tv_usec = (ms % 1000)*1000;
|
2009-03-22 05:30:00 -04:00
|
|
|
} else {
|
2016-04-04 08:08:16 -04:00
|
|
|
tvp->tv_sec = 0;
|
|
|
|
tvp->tv_usec = 0;
|
2009-03-22 05:30:00 -04:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* If we have to check for events but need to return
|
2013-01-16 12:00:20 -05:00
|
|
|
* ASAP because of AE_DONT_WAIT we need to set the timeout
|
2009-03-22 05:30:00 -04:00
|
|
|
* to zero */
|
|
|
|
if (flags & AE_DONT_WAIT) {
|
|
|
|
tv.tv_sec = tv.tv_usec = 0;
|
|
|
|
tvp = &tv;
|
|
|
|
} else {
|
|
|
|
/* Otherwise we can block */
|
|
|
|
tvp = NULL; /* wait forever */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-03 05:26:21 -04:00
|
|
|
/* Call the multiplexing API, will return only on timeout or when
|
|
|
|
* some event fires. */
|
2009-11-23 12:50:39 -05:00
|
|
|
numevents = aeApiPoll(eventLoop, tvp);
|
2017-05-03 05:26:21 -04:00
|
|
|
|
|
|
|
/* After sleep callback. */
|
2017-07-10 18:13:52 -04:00
|
|
|
if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
|
2017-05-03 05:26:21 -04:00
|
|
|
eventLoop->aftersleep(eventLoop);
|
|
|
|
|
2009-11-23 12:50:39 -05:00
|
|
|
for (j = 0; j < numevents; j++) {
|
|
|
|
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
|
|
|
|
int mask = eventLoop->fired[j].mask;
|
|
|
|
int fd = eventLoop->fired[j].fd;
|
2018-02-27 06:16:26 -05:00
|
|
|
int fired = 0; /* Number of events fired for current fd. */
|
|
|
|
|
|
|
|
/* Normally we execute the readable event first, and the writable
|
|
|
|
* event laster. This is useful as sometimes we may be able
|
|
|
|
* to serve the reply of a query immediately after processing the
|
|
|
|
* query.
|
|
|
|
*
|
|
|
|
* However if AE_BARRIER is set in the mask, our application is
|
|
|
|
* asking us to do the reverse: never fire the writable event
|
|
|
|
* after the readable. In such a case, we invert the calls.
|
|
|
|
* This is useful when, for instance, we want to do things
|
|
|
|
* in the beforeSleep() hook, like fsynching a file to disk,
|
|
|
|
* before replying to a client. */
|
|
|
|
int invert = fe->mask & AE_BARRIER;
|
|
|
|
|
2018-07-04 08:04:06 -04:00
|
|
|
/* Note the "fe->mask & mask & ..." code: maybe an already
|
2018-02-27 06:16:26 -05:00
|
|
|
* processed event removed an element that fired and we still
|
|
|
|
* didn't processed, so we check if the event is still valid.
|
|
|
|
*
|
|
|
|
* Fire the readable event if the call sequence is not
|
|
|
|
* inverted. */
|
|
|
|
if (!invert && fe->mask & mask & AE_READABLE) {
|
2009-11-23 12:50:39 -05:00
|
|
|
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
|
2018-02-27 06:16:26 -05:00
|
|
|
fired++;
|
2010-01-20 13:38:59 -05:00
|
|
|
}
|
2018-02-27 06:16:26 -05:00
|
|
|
|
|
|
|
/* Fire the writable event. */
|
2010-01-20 13:38:59 -05:00
|
|
|
if (fe->mask & mask & AE_WRITABLE) {
|
2018-02-27 06:16:26 -05:00
|
|
|
if (!fired || fe->wfileProc != fe->rfileProc) {
|
|
|
|
fe->wfileProc(eventLoop,fd,fe->clientData,mask);
|
|
|
|
fired++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we have to invert the call, fire the readable event now
|
|
|
|
* after the writable one. */
|
|
|
|
if (invert && fe->mask & mask & AE_READABLE) {
|
|
|
|
if (!fired || fe->wfileProc != fe->rfileProc) {
|
|
|
|
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
|
|
|
|
fired++;
|
ae.c: introduce the concept of read->write barrier.
AOF fsync=always, and certain Redis Cluster bus operations, require to
fsync data on disk before replying with an acknowledge.
In such case, in order to implement Group Commits, we want to be sure
that queries that are read in a given cycle of the event loop, are never
served to clients in the same event loop iteration. This way, by using
the event loop "before sleep" callback, we can fsync the information
just one time before returning into the event loop for the next cycle.
This is much more efficient compared to calling fsync() multiple times.
Unfortunately because of a bug, this was not always guaranteed: the
actual way the events are installed was the sole thing that could
control. Normally this problem is hard to trigger when AOF is enabled
with fsync=always, because we try to flush the output buffers to the
socekt directly in the beforeSleep() function of Redis. However if the
output buffers are full, we actually install a write event, and in such
a case, this bug could happen.
This change to ae.c modifies the event loop implementation to make this
concept explicit. Write events that are registered with:
AE_WRITABLE|AE_BARRIER
Are guaranteed to never fire after the readable event was fired for the
same file descriptor. In this way we are sure that data is persisted to
disk before the client performing the operation receives an
acknowledged.
However note that this semantics does not provide all the guarantees
that one may believe are automatically provided. Take the example of the
blocking list operations in Redis.
With AOF and fsync=always we could have:
Client A doing: BLPOP myqueue 0
Client B doing: RPUSH myqueue a b c
In this scenario, Client A will get the "a" elements immediately after
the Client B RPUSH will be executed, even before the operation is persisted.
However when Client B will get the acknowledge, it can be sure that
"b,c" are already safe on disk inside the list.
What to note here is that it cannot be assumed that Client A receiving
the element is a guaranteed that the operation succeeded from the point
of view of Client B.
This is due to the fact that the barrier exists within the same socket,
and not between different sockets. However in the case above, the
element "a" was not going to be persisted regardless, so it is a pretty
synthetic argument.
2018-02-23 11:42:24 -05:00
|
|
|
}
|
2010-01-20 13:38:59 -05:00
|
|
|
}
|
2018-02-27 06:16:26 -05:00
|
|
|
|
2009-11-23 12:50:39 -05:00
|
|
|
processed++;
|
2009-03-22 05:30:00 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Check time events */
|
2009-11-23 06:00:23 -05:00
|
|
|
if (flags & AE_TIME_EVENTS)
|
|
|
|
processed += processTimeEvents(eventLoop);
|
2009-03-22 05:30:00 -04:00
|
|
|
|
|
|
|
return processed; /* return the number of processed file/time events */
|
|
|
|
}
|
|
|
|
|
2013-01-16 12:00:20 -05:00
|
|
|
/* Wait for milliseconds until the given file descriptor becomes
|
2009-03-22 05:30:00 -04:00
|
|
|
* writable/readable/exception */
|
|
|
|
int aeWait(int fd, int mask, long long milliseconds) {
|
2012-01-06 05:56:07 -05:00
|
|
|
struct pollfd pfd;
|
2009-03-22 05:30:00 -04:00
|
|
|
int retmask = 0, retval;
|
|
|
|
|
2012-01-06 05:56:07 -05:00
|
|
|
memset(&pfd, 0, sizeof(pfd));
|
|
|
|
pfd.fd = fd;
|
|
|
|
if (mask & AE_READABLE) pfd.events |= POLLIN;
|
|
|
|
if (mask & AE_WRITABLE) pfd.events |= POLLOUT;
|
|
|
|
|
|
|
|
if ((retval = poll(&pfd, 1, milliseconds))== 1) {
|
|
|
|
if (pfd.revents & POLLIN) retmask |= AE_READABLE;
|
|
|
|
if (pfd.revents & POLLOUT) retmask |= AE_WRITABLE;
|
2018-07-04 08:04:06 -04:00
|
|
|
if (pfd.revents & POLLERR) retmask |= AE_WRITABLE;
|
2012-05-23 05:19:49 -04:00
|
|
|
if (pfd.revents & POLLHUP) retmask |= AE_WRITABLE;
|
2009-03-22 05:30:00 -04:00
|
|
|
return retmask;
|
|
|
|
} else {
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-11-28 11:06:28 -05:00
|
|
|
void aeMain(aeEventLoop *eventLoop) {
|
2009-03-22 05:30:00 -04:00
|
|
|
eventLoop->stop = 0;
|
2010-01-28 10:12:04 -05:00
|
|
|
while (!eventLoop->stop) {
|
|
|
|
if (eventLoop->beforesleep != NULL)
|
|
|
|
eventLoop->beforesleep(eventLoop);
|
2017-07-10 18:13:52 -04:00
|
|
|
aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
|
2010-01-28 10:12:04 -05:00
|
|
|
}
|
2009-03-22 05:30:00 -04:00
|
|
|
}
|
2009-11-28 11:06:28 -05:00
|
|
|
|
|
|
|
char *aeGetApiName(void) {
|
|
|
|
return aeApiName();
|
|
|
|
}
|
2010-01-28 10:12:04 -05:00
|
|
|
|
|
|
|
void aeSetBeforeSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *beforesleep) {
|
|
|
|
eventLoop->beforesleep = beforesleep;
|
|
|
|
}
|
2017-05-03 05:26:21 -04:00
|
|
|
|
|
|
|
void aeSetAfterSleepProc(aeEventLoop *eventLoop, aeBeforeSleepProc *aftersleep) {
|
|
|
|
eventLoop->aftersleep = aftersleep;
|
|
|
|
}
|