/* Background I/O service for Redis. * * This file implements operations that we need to perform in the background. * Currently there is only a single operation, that is a background close(2) * system call. This is needed as when the process is the last owner of a * reference to a file closing it means unlinking it, and the deletion of the * file is slow, blocking the server. * * In the future we'll either continue implementing new things we need or * we'll switch to libeio. However there are probably long term uses for this * file as we may want to put here Redis specific background tasks (for instance * it is not impossible that we'll need a non blocking FLUSHDB/FLUSHALL * implementation). * * DESIGN * ------ * * The design is trivial, we have a structure representing a job to perform * and a different thread and job queue for every job type. * Every thread wait for new jobs in its queue, and process every job * sequentially. * * Jobs of the same type are guaranteed to be processed from the least * recently inserted to the most recently inserted (older jobs processed * first). * * Currently there is no way for the creator of the job to be notified about * the completion of the operation, this will only be added when/if needed. */ #include "redis.h" #include "bio.h" static pthread_mutex_t bio_mutex[REDIS_BIO_NUM_OPS]; static pthread_cond_t bio_condvar[REDIS_BIO_NUM_OPS]; static list *bio_jobs[REDIS_BIO_NUM_OPS]; /* The following array is used to hold the number of pending jobs for every * OP type. This allows us to export the bioPendingJobsOfType() API that is * useful when the main thread wants to perform some operation that may involve * objects shared with the background thread. The main thread will just wait * that there are no longer jobs of this type to be executed before performing * the sensible operation. This data is also useful for reporting. */ static unsigned long long bio_pending[REDIS_BIO_NUM_OPS]; /* This structure represents a background Job. It is only used locally to this * file as the API deos not expose the internals at all. */ struct bio_job { time_t time; /* Time at which the job was created. */ /* Job specific arguments pointers. If we need to pass more than three * arguments we can just pass a pointer to a structure or alike. */ void *arg1, *arg2, *arg3; }; void *bioProcessBackgroundJobs(void *arg); /* Make sure we have enough stack to perform all the things we do in the * main thread. */ #define REDIS_THREAD_STACK_SIZE (1024*1024*4) /* Initialize the background system, spawning the thread. */ void bioInit(void) { pthread_attr_t attr; pthread_t thread; size_t stacksize; int j; /* Initialization of state vars and objects */ for (j = 0; j < REDIS_BIO_NUM_OPS; j++) { pthread_mutex_init(&bio_mutex[j],NULL); pthread_cond_init(&bio_condvar[j],NULL); bio_jobs[j] = listCreate(); bio_pending[j] = 0; } /* Set the stack size as by default it may be small in some system */ pthread_attr_init(&attr); pthread_attr_getstacksize(&attr,&stacksize); if (!stacksize) stacksize = 1; /* The world is full of Solaris Fixes */ while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2; pthread_attr_setstacksize(&attr, stacksize); /* Ready to spawn our threads. We use the single argument the thread * function accepts in order to pass the job ID the thread is * responsible of. */ for (j = 0; j < REDIS_BIO_NUM_OPS; j++) { void *arg = (void*)(unsigned long) j; if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) { redisLog(REDIS_WARNING,"Fatal: Can't initialize Background Jobs."); exit(1); } } } void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) { struct bio_job *job = zmalloc(sizeof(*job)); job->time = time(NULL); job->arg1 = arg1; job->arg2 = arg2; job->arg3 = arg3; pthread_mutex_lock(&bio_mutex[type]); listAddNodeTail(bio_jobs[type],job); bio_pending[type]++; pthread_cond_signal(&bio_condvar[type]); pthread_mutex_unlock(&bio_mutex[type]); } void *bioProcessBackgroundJobs(void *arg) { struct bio_job *job; unsigned long type = (unsigned long) arg; sigset_t sigset; pthread_detach(pthread_self()); pthread_mutex_lock(&bio_mutex[type]); /* Block SIGALRM so we are sure that only the main thread will * receive the watchdog signal. */ sigemptyset(&sigset); sigaddset(&sigset, SIGALRM); if (pthread_sigmask(SIG_BLOCK, &sigset, NULL)) redisLog(REDIS_WARNING, "Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno)); while(1) { listNode *ln; /* The loop always starts with the lock hold. */ if (listLength(bio_jobs[type]) == 0) { pthread_cond_wait(&bio_condvar[type],&bio_mutex[type]); continue; } /* Pop the job from the queue. */ ln = listFirst(bio_jobs[type]); job = ln->value; /* It is now possible to unlock the background system as we know have * a stand alone job structure to process.*/ pthread_mutex_unlock(&bio_mutex[type]); /* Process the job accordingly to its type. */ if (type == REDIS_BIO_CLOSE_FILE) { close((long)job->arg1); } else if (type == REDIS_BIO_AOF_FSYNC) { aof_fsync((long)job->arg1); } else { redisPanic("Wrong job type in bioProcessBackgroundJobs()."); } zfree(job); /* Lock again before reiterating the loop, if there are no longer * jobs to process we'll block again in pthread_cond_wait(). */ pthread_mutex_lock(&bio_mutex[type]); listDelNode(bio_jobs[type],ln); bio_pending[type]--; } } /* Return the number of pending jobs of the specified type. */ unsigned long long bioPendingJobsOfType(int type) { unsigned long long val; pthread_mutex_lock(&bio_mutex[type]); val = bio_pending[type]; pthread_mutex_unlock(&bio_mutex[type]); return val; } #if 0 /* We don't use the following code for now, and bioWaitPendingJobsLE probably needs a rewrite using conditional variables instead of the current implementation. */ /* Wait until the number of pending jobs of the specified type are * less or equal to the specified number. * * This function may block for long time, it should only be used to perform * the following tasks: * * 1) To avoid that the main thread is pushing jobs of a given time so fast * that the background thread can't process them at the same speed. * So before creating a new job of a given type the main thread should * call something like: bioWaitPendingJobsLE(job_type,10000); * 2) In order to perform special operations that make it necessary to be sure * no one is touching shared resourced in the background. */ void bioWaitPendingJobsLE(int type, unsigned long long num) { unsigned long long iteration = 0; /* We poll the jobs queue aggressively to start, and gradually relax * the polling speed if it is going to take too much time. */ while(1) { iteration++; if (iteration > 1000 && iteration <= 10000) { usleep(100); } else if (iteration > 10000) { usleep(1000); } if (bioPendingJobsOfType(type) <= num) break; } } /* Return the older job of the specified type. */ time_t bioOlderJobOfType(int type) { time_t time; listNode *ln; struct bio_job *job; pthread_mutex_lock(&bio_mutex[type]); ln = listFirst(bio_jobs[type]); if (ln == NULL) { pthread_mutex_unlock(&bio_mutex[type]); return 0; } job = ln->value; time = job->time; pthread_mutex_unlock(&bio_mutex[type]); return time; } #endif