2009-11-23 17:40:27 -05:00
|
|
|
/* Linux epoll(2) based ae.c module
|
2012-11-08 12:25:23 -05:00
|
|
|
*
|
|
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without
|
|
|
|
* specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2009-11-23 17:40:27 -05:00
|
|
|
|
|
|
|
#include <sys/epoll.h>
|
|
|
|
|
|
|
|
typedef struct aeApiState {
|
|
|
|
int epfd;
|
2011-12-15 05:42:40 -05:00
|
|
|
struct epoll_event *events;
|
2009-11-23 17:40:27 -05:00
|
|
|
} aeApiState;
|
|
|
|
|
|
|
|
static int aeApiCreate(aeEventLoop *eventLoop) {
|
|
|
|
aeApiState *state = zmalloc(sizeof(aeApiState));
|
|
|
|
|
|
|
|
if (!state) return -1;
|
2011-12-15 05:50:15 -05:00
|
|
|
state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);
|
2011-12-15 05:42:40 -05:00
|
|
|
if (!state->events) {
|
|
|
|
zfree(state);
|
|
|
|
return -1;
|
|
|
|
}
|
2013-12-05 10:35:32 -05:00
|
|
|
state->epfd = epoll_create(1024); /* 1024 is just a hint for the kernel */
|
2011-12-07 02:58:29 -05:00
|
|
|
if (state->epfd == -1) {
|
2011-12-15 05:42:40 -05:00
|
|
|
zfree(state->events);
|
2011-12-07 02:58:29 -05:00
|
|
|
zfree(state);
|
|
|
|
return -1;
|
|
|
|
}
|
2021-01-19 15:57:30 -05:00
|
|
|
anetCloexec(state->epfd);
|
2009-11-23 17:40:27 -05:00
|
|
|
eventLoop->apidata = state;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-28 10:39:49 -04:00
|
|
|
static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
|
|
|
|
aeApiState *state = eventLoop->apidata;
|
|
|
|
|
|
|
|
state->events = zrealloc(state->events, sizeof(struct epoll_event)*setsize);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2009-11-23 17:40:27 -05:00
|
|
|
static void aeApiFree(aeEventLoop *eventLoop) {
|
|
|
|
aeApiState *state = eventLoop->apidata;
|
|
|
|
|
|
|
|
close(state->epfd);
|
2011-12-15 05:42:40 -05:00
|
|
|
zfree(state->events);
|
2009-11-23 17:40:27 -05:00
|
|
|
zfree(state);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
|
|
|
|
aeApiState *state = eventLoop->apidata;
|
2016-04-25 09:49:57 -04:00
|
|
|
struct epoll_event ee = {0}; /* avoid valgrind warning */
|
2009-12-16 07:30:40 -05:00
|
|
|
/* If the fd was already monitored for some event, we need a MOD
|
|
|
|
* operation. Otherwise we need an ADD operation. */
|
2009-11-23 17:40:27 -05:00
|
|
|
int op = eventLoop->events[fd].mask == AE_NONE ?
|
|
|
|
EPOLL_CTL_ADD : EPOLL_CTL_MOD;
|
|
|
|
|
|
|
|
ee.events = 0;
|
2009-12-16 07:30:40 -05:00
|
|
|
mask |= eventLoop->events[fd].mask; /* Merge old events */
|
2009-11-23 17:40:27 -05:00
|
|
|
if (mask & AE_READABLE) ee.events |= EPOLLIN;
|
|
|
|
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
|
|
|
|
ee.data.fd = fd;
|
|
|
|
if (epoll_ctl(state->epfd,op,fd,&ee) == -1) return -1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int delmask) {
|
|
|
|
aeApiState *state = eventLoop->apidata;
|
2016-04-25 09:49:57 -04:00
|
|
|
struct epoll_event ee = {0}; /* avoid valgrind warning */
|
2009-11-23 17:40:27 -05:00
|
|
|
int mask = eventLoop->events[fd].mask & (~delmask);
|
|
|
|
|
|
|
|
ee.events = 0;
|
|
|
|
if (mask & AE_READABLE) ee.events |= EPOLLIN;
|
|
|
|
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
|
|
|
|
ee.data.fd = fd;
|
|
|
|
if (mask != AE_NONE) {
|
|
|
|
epoll_ctl(state->epfd,EPOLL_CTL_MOD,fd,&ee);
|
|
|
|
} else {
|
|
|
|
/* Note, Kernel < 2.6.9 requires a non null event pointer even for
|
|
|
|
* EPOLL_CTL_DEL. */
|
|
|
|
epoll_ctl(state->epfd,EPOLL_CTL_DEL,fd,&ee);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
|
|
|
|
aeApiState *state = eventLoop->apidata;
|
|
|
|
int retval, numevents = 0;
|
|
|
|
|
2011-12-15 05:42:40 -05:00
|
|
|
retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,
|
Fix busy loop in ae.c when timer event is about to fire (#8764)
The code used to decide on the next time to wake on a timer with
microsecond accuracy, but when deciding to go to sleep it used
milliseconds accuracy (with truncation), this means that it would wake
up too early, see that there's no timer to process, and go to sleep
again for 0ms again and again until the right microsecond arrived.
i.e. a timer for 100ms, would sleep for 99ms, but then do a busy loop
through the kernel in the last millisecond, triggering many calls to
beforeSleep.
The fix is to change all the logic in ae.c to work with microseconds,
which is good since most of the ae backends support micro (or even nano)
seconds. however the epoll backend, doesn't support micro, so to avoid
this problem it needs to round upwards, rather than truncate.
Issue created by the monotonic timer PR #7644 (redis 6.2)
Before that, all the timers in ae.c were in milliseconds (using
mstime), so when it requested the backend to sleep till the next timer
event, it would have worked ok.
2021-04-13 00:35:03 -04:00
|
|
|
tvp ? (tvp->tv_sec*1000 + (tvp->tv_usec + 999)/1000) : -1);
|
2009-11-23 17:40:27 -05:00
|
|
|
if (retval > 0) {
|
|
|
|
int j;
|
|
|
|
|
|
|
|
numevents = retval;
|
|
|
|
for (j = 0; j < numevents; j++) {
|
|
|
|
int mask = 0;
|
|
|
|
struct epoll_event *e = state->events+j;
|
|
|
|
|
|
|
|
if (e->events & EPOLLIN) mask |= AE_READABLE;
|
|
|
|
if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
|
2019-08-11 09:07:53 -04:00
|
|
|
if (e->events & EPOLLERR) mask |= AE_WRITABLE|AE_READABLE;
|
|
|
|
if (e->events & EPOLLHUP) mask |= AE_WRITABLE|AE_READABLE;
|
2009-11-23 17:40:27 -05:00
|
|
|
eventLoop->fired[j].fd = e->data.fd;
|
|
|
|
eventLoop->fired[j].mask = mask;
|
|
|
|
}
|
2021-04-26 08:52:06 -04:00
|
|
|
} else if (retval == -1 && errno != EINTR) {
|
|
|
|
panic("aeApiPoll: epoll_wait, %s", strerror(errno));
|
2009-11-23 17:40:27 -05:00
|
|
|
}
|
2021-04-26 08:52:06 -04:00
|
|
|
|
2009-11-23 17:40:27 -05:00
|
|
|
return numevents;
|
|
|
|
}
|
2009-11-28 11:06:28 -05:00
|
|
|
|
|
|
|
static char *aeApiName(void) {
|
|
|
|
return "epoll";
|
|
|
|
}
|