mirror of
https://codeberg.org/redict/redict.git
synced 2025-01-22 16:18:28 -05:00
306a5ccd2d
Normally we execute the read event first and then the write event. When the barrier is set, we will do it reverse. However, under `kqueue`, if an `fd` has both read and write events, reading the event using `kevent` will generate two events, which will result in uncontrolled read and write timing. This also means that the guarantees of AOF `appendfsync` = `always` are not met on MacOS without this fix. The main change to this pr is to cache the events already obtained when reading them, so that if the same `fd` occurs again, only the mask in the cache is updated, rather than a new event is generated. This was exposed by the following test failure on MacOS: ``` *** [err]: AOF fsync always barrier issue in tests/integration/aof.tcl Expected 544 != 544 (context: type eval line 26 cmd {assert {$size1 != $size2}} proc ::test) ```
191 lines
6.6 KiB
C
191 lines
6.6 KiB
C
/* Kqueue(2)-based ae.c module
|
|
*
|
|
* Copyright (C) 2009 Harish Mallipeddi - harish.mallipeddi@gmail.com
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
* to endorse or promote products derived from this software without
|
|
* specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
|
|
#include <sys/types.h>
|
|
#include <sys/event.h>
|
|
#include <sys/time.h>
|
|
|
|
typedef struct aeApiState {
|
|
int kqfd;
|
|
struct kevent *events;
|
|
|
|
/* Events mask for merge read and write event.
|
|
* To reduce memory consumption, we use 2 bits to store the mask
|
|
* of an event, so that 1 byte will store the mask of 4 events. */
|
|
char *eventsMask;
|
|
} aeApiState;
|
|
|
|
#define EVENT_MASK_MALLOC_SIZE(sz) (((sz) + 3) / 4)
|
|
#define EVENT_MASK_OFFSET(fd) ((fd) % 4 * 2)
|
|
#define EVENT_MASK_ENCODE(fd, mask) (((mask) & 0x3) << EVENT_MASK_OFFSET(fd))
|
|
|
|
static inline int getEventMask(const char *eventsMask, int fd) {
|
|
return (eventsMask[fd/4] >> EVENT_MASK_OFFSET(fd)) & 0x3;
|
|
}
|
|
|
|
static inline void addEventMask(char *eventsMask, int fd, int mask) {
|
|
eventsMask[fd/4] |= EVENT_MASK_ENCODE(fd, mask);
|
|
}
|
|
|
|
static inline void resetEventMask(char *eventsMask, int fd) {
|
|
eventsMask[fd/4] &= ~EVENT_MASK_ENCODE(fd, 0x3);
|
|
}
|
|
|
|
static int aeApiCreate(aeEventLoop *eventLoop) {
|
|
aeApiState *state = zmalloc(sizeof(aeApiState));
|
|
|
|
if (!state) return -1;
|
|
state->events = zmalloc(sizeof(struct kevent)*eventLoop->setsize);
|
|
if (!state->events) {
|
|
zfree(state);
|
|
return -1;
|
|
}
|
|
state->kqfd = kqueue();
|
|
if (state->kqfd == -1) {
|
|
zfree(state->events);
|
|
zfree(state);
|
|
return -1;
|
|
}
|
|
anetCloexec(state->kqfd);
|
|
state->eventsMask = zmalloc(EVENT_MASK_MALLOC_SIZE(eventLoop->setsize));
|
|
memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(eventLoop->setsize));
|
|
eventLoop->apidata = state;
|
|
return 0;
|
|
}
|
|
|
|
static int aeApiResize(aeEventLoop *eventLoop, int setsize) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
|
|
state->events = zrealloc(state->events, sizeof(struct kevent)*setsize);
|
|
state->eventsMask = zrealloc(state->eventsMask, EVENT_MASK_MALLOC_SIZE(setsize));
|
|
memset(state->eventsMask, 0, EVENT_MASK_MALLOC_SIZE(setsize));
|
|
return 0;
|
|
}
|
|
|
|
static void aeApiFree(aeEventLoop *eventLoop) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
|
|
close(state->kqfd);
|
|
zfree(state->events);
|
|
zfree(state->eventsMask);
|
|
zfree(state);
|
|
}
|
|
|
|
static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
struct kevent ke;
|
|
|
|
if (mask & AE_READABLE) {
|
|
EV_SET(&ke, fd, EVFILT_READ, EV_ADD, 0, 0, NULL);
|
|
if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
|
|
}
|
|
if (mask & AE_WRITABLE) {
|
|
EV_SET(&ke, fd, EVFILT_WRITE, EV_ADD, 0, 0, NULL);
|
|
if (kevent(state->kqfd, &ke, 1, NULL, 0, NULL) == -1) return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static void aeApiDelEvent(aeEventLoop *eventLoop, int fd, int mask) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
struct kevent ke;
|
|
|
|
if (mask & AE_READABLE) {
|
|
EV_SET(&ke, fd, EVFILT_READ, EV_DELETE, 0, 0, NULL);
|
|
kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
|
|
}
|
|
if (mask & AE_WRITABLE) {
|
|
EV_SET(&ke, fd, EVFILT_WRITE, EV_DELETE, 0, 0, NULL);
|
|
kevent(state->kqfd, &ke, 1, NULL, 0, NULL);
|
|
}
|
|
}
|
|
|
|
static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
|
|
aeApiState *state = eventLoop->apidata;
|
|
int retval, numevents = 0;
|
|
|
|
if (tvp != NULL) {
|
|
struct timespec timeout;
|
|
timeout.tv_sec = tvp->tv_sec;
|
|
timeout.tv_nsec = tvp->tv_usec * 1000;
|
|
retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize,
|
|
&timeout);
|
|
} else {
|
|
retval = kevent(state->kqfd, NULL, 0, state->events, eventLoop->setsize,
|
|
NULL);
|
|
}
|
|
|
|
if (retval > 0) {
|
|
int j;
|
|
|
|
/* Normally we execute the read event first and then the write event.
|
|
* When the barrier is set, we will do it reverse.
|
|
*
|
|
* However, under kqueue, read and write events would be separate
|
|
* events, which would make it impossible to control the order of
|
|
* reads and writes. So we store the event's mask we've got and merge
|
|
* the same fd events later. */
|
|
for (j = 0; j < retval; j++) {
|
|
struct kevent *e = state->events+j;
|
|
int fd = e->ident;
|
|
int mask = 0;
|
|
|
|
if (e->filter == EVFILT_READ) mask = AE_READABLE;
|
|
else if (e->filter == EVFILT_WRITE) mask = AE_WRITABLE;
|
|
addEventMask(state->eventsMask, fd, mask);
|
|
}
|
|
|
|
/* Re-traversal to merge read and write events, and set the fd's mask to
|
|
* 0 so that events are not added again when the fd is encountered again. */
|
|
numevents = 0;
|
|
for (j = 0; j < retval; j++) {
|
|
struct kevent *e = state->events+j;
|
|
int fd = e->ident;
|
|
int mask = getEventMask(state->eventsMask, fd);
|
|
|
|
if (mask) {
|
|
eventLoop->fired[numevents].fd = fd;
|
|
eventLoop->fired[numevents].mask = mask;
|
|
resetEventMask(state->eventsMask, fd);
|
|
numevents++;
|
|
}
|
|
}
|
|
} else if (retval == -1 && errno != EINTR) {
|
|
panic("aeApiPoll: kevent, %s", strerror(errno));
|
|
}
|
|
|
|
return numevents;
|
|
}
|
|
|
|
static char *aeApiName(void) {
|
|
return "kqueue";
|
|
}
|