redict/src/bio.c
antirez aa96122d96 Mask SIGALRM everything but in the main thread.
This is required to ensure that the signal will be delivered to the main
thread when the watchdog timer expires.
2012-03-27 13:48:57 +02:00

218 lines
7.8 KiB
C

/* Background I/O service for Redis.
*
* This file implements operations that we need to perform in the background.
* Currently there is only a single operation, that is a background close(2)
* system call. This is needed as when the process is the last owner of a
* reference to a file closing it means unlinking it, and the deletion of the
* file is slow, blocking the server.
*
* In the future we'll either continue implementing new things we need or
* we'll switch to libeio. However there are probably long term uses for this
* file as we may want to put here Redis specific background tasks (for instance
* it is not impossible that we'll need a non blocking FLUSHDB/FLUSHALL
* implementation).
*
* DESIGN
* ------
*
* The design is trivial, we have a structure representing a job to perform
* and a different thread and job queue for every job type.
* Every thread wait for new jobs in its queue, and process every job
* sequentially.
*
* Jobs of the same type are guaranteed to be processed from the least
* recently inserted to the most recently inserted (older jobs processed
* first).
*
* Currently there is no way for the creator of the job to be notified about
* the completion of the operation, this will only be added when/if needed.
*/
#include "redis.h"
#include "bio.h"
static pthread_mutex_t bio_mutex[REDIS_BIO_NUM_OPS];
static pthread_cond_t bio_condvar[REDIS_BIO_NUM_OPS];
static list *bio_jobs[REDIS_BIO_NUM_OPS];
/* The following array is used to hold the number of pending jobs for every
* OP type. This allows us to export the bioPendingJobsOfType() API that is
* useful when the main thread wants to perform some operation that may involve
* objects shared with the background thread. The main thread will just wait
* that there are no longer jobs of this type to be executed before performing
* the sensible operation. This data is also useful for reporting. */
static unsigned long long bio_pending[REDIS_BIO_NUM_OPS];
/* This structure represents a background Job. It is only used locally to this
* file as the API deos not expose the internals at all. */
struct bio_job {
time_t time; /* Time at which the job was created. */
/* Job specific arguments pointers. If we need to pass more than three
* arguments we can just pass a pointer to a structure or alike. */
void *arg1, *arg2, *arg3;
};
void *bioProcessBackgroundJobs(void *arg);
/* Make sure we have enough stack to perform all the things we do in the
* main thread. */
#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
/* Initialize the background system, spawning the thread. */
void bioInit(void) {
pthread_attr_t attr;
pthread_t thread;
size_t stacksize;
int j;
/* Initialization of state vars and objects */
for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
pthread_mutex_init(&bio_mutex[j],NULL);
pthread_cond_init(&bio_condvar[j],NULL);
bio_jobs[j] = listCreate();
bio_pending[j] = 0;
}
/* Set the stack size as by default it may be small in some system */
pthread_attr_init(&attr);
pthread_attr_getstacksize(&attr,&stacksize);
if (!stacksize) stacksize = 1; /* The world is full of Solaris Fixes */
while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
pthread_attr_setstacksize(&attr, stacksize);
/* Ready to spawn our threads. We use the single argument the thread
* function accepts in order to pass the job ID the thread is
* responsible of. */
for (j = 0; j < REDIS_BIO_NUM_OPS; j++) {
void *arg = (void*)(unsigned long) j;
if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) {
redisLog(REDIS_WARNING,"Fatal: Can't initialize Background Jobs.");
exit(1);
}
}
}
void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
struct bio_job *job = zmalloc(sizeof(*job));
job->time = time(NULL);
job->arg1 = arg1;
job->arg2 = arg2;
job->arg3 = arg3;
pthread_mutex_lock(&bio_mutex[type]);
listAddNodeTail(bio_jobs[type],job);
bio_pending[type]++;
pthread_cond_signal(&bio_condvar[type]);
pthread_mutex_unlock(&bio_mutex[type]);
}
void *bioProcessBackgroundJobs(void *arg) {
struct bio_job *job;
unsigned long type = (unsigned long) arg;
sigset_t sigset;
pthread_detach(pthread_self());
pthread_mutex_lock(&bio_mutex[type]);
/* Block SIGALRM so we are sure that only the main thread will
* receive the watchdog signal. */
sigemptyset(&sigset);
sigaddset(&sigset, SIGALRM);
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
redisLog(REDIS_WARNING,
"Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
while(1) {
listNode *ln;
/* The loop always starts with the lock hold. */
if (listLength(bio_jobs[type]) == 0) {
pthread_cond_wait(&bio_condvar[type],&bio_mutex[type]);
continue;
}
/* Pop the job from the queue. */
ln = listFirst(bio_jobs[type]);
job = ln->value;
/* It is now possible to unlock the background system as we know have
* a stand alone job structure to process.*/
pthread_mutex_unlock(&bio_mutex[type]);
/* Process the job accordingly to its type. */
if (type == REDIS_BIO_CLOSE_FILE) {
close((long)job->arg1);
} else if (type == REDIS_BIO_AOF_FSYNC) {
aof_fsync((long)job->arg1);
} else {
redisPanic("Wrong job type in bioProcessBackgroundJobs().");
}
zfree(job);
/* Lock again before reiterating the loop, if there are no longer
* jobs to process we'll block again in pthread_cond_wait(). */
pthread_mutex_lock(&bio_mutex[type]);
listDelNode(bio_jobs[type],ln);
bio_pending[type]--;
}
}
/* Return the number of pending jobs of the specified type. */
unsigned long long bioPendingJobsOfType(int type) {
unsigned long long val;
pthread_mutex_lock(&bio_mutex[type]);
val = bio_pending[type];
pthread_mutex_unlock(&bio_mutex[type]);
return val;
}
#if 0 /* We don't use the following code for now, and bioWaitPendingJobsLE
probably needs a rewrite using conditional variables instead of the
current implementation. */
/* Wait until the number of pending jobs of the specified type are
* less or equal to the specified number.
*
* This function may block for long time, it should only be used to perform
* the following tasks:
*
* 1) To avoid that the main thread is pushing jobs of a given time so fast
* that the background thread can't process them at the same speed.
* So before creating a new job of a given type the main thread should
* call something like: bioWaitPendingJobsLE(job_type,10000);
* 2) In order to perform special operations that make it necessary to be sure
* no one is touching shared resourced in the background.
*/
void bioWaitPendingJobsLE(int type, unsigned long long num) {
unsigned long long iteration = 0;
/* We poll the jobs queue aggressively to start, and gradually relax
* the polling speed if it is going to take too much time. */
while(1) {
iteration++;
if (iteration > 1000 && iteration <= 10000) {
usleep(100);
} else if (iteration > 10000) {
usleep(1000);
}
if (bioPendingJobsOfType(type) <= num) break;
}
}
/* Return the older job of the specified type. */
time_t bioOlderJobOfType(int type) {
time_t time;
listNode *ln;
struct bio_job *job;
pthread_mutex_lock(&bio_mutex[type]);
ln = listFirst(bio_jobs[type]);
if (ln == NULL) {
pthread_mutex_unlock(&bio_mutex[type]);
return 0;
}
job = ln->value;
time = job->time;
pthread_mutex_unlock(&bio_mutex[type]);
return time;
}
#endif