redict/src/bio.c

281 lines
11 KiB
C
Raw Normal View History

2011-09-13 09:59:48 -04:00
/* Background I/O service for Redis.
*
* This file implements operations that we need to perform in the background.
* Currently there is only a single operation, that is a background close(2)
* system call. This is needed as when the process is the last owner of a
* reference to a file closing it means unlinking it, and the deletion of the
* file is slow, blocking the server.
*
* In the future we'll either continue implementing new things we need or
* we'll switch to libeio. However there are probably long term uses for this
* file as we may want to put here Redis specific background tasks (for instance
* it is not impossible that we'll need a non blocking FLUSHDB/FLUSHALL
* implementation).
*
* DESIGN
* ------
*
* The design is trivial, we have a structure representing a job to perform
* and a different thread and job queue for every job type.
2018-10-05 12:29:23 -04:00
* Every thread waits for new jobs in its queue, and process every job
* sequentially.
*
2011-09-15 12:23:11 -04:00
* Jobs of the same type are guaranteed to be processed from the least
* recently inserted to the most recently inserted (older jobs processed
* first).
*
2011-09-13 09:59:48 -04:00
* Currently there is no way for the creator of the job to be notified about
* the completion of the operation, this will only be added when/if needed.
*
* ----------------------------------------------------------------------------
*
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
2011-09-13 09:59:48 -04:00
*/
#include "server.h"
2011-09-13 09:59:48 -04:00
#include "bio.h"
2015-07-27 08:55:45 -04:00
static pthread_t bio_threads[BIO_NUM_OPS];
static pthread_mutex_t bio_mutex[BIO_NUM_OPS];
2015-09-23 10:09:16 -04:00
static pthread_cond_t bio_newjob_cond[BIO_NUM_OPS];
static pthread_cond_t bio_step_cond[BIO_NUM_OPS];
2015-07-27 08:55:45 -04:00
static list *bio_jobs[BIO_NUM_OPS];
/* The following array is used to hold the number of pending jobs for every
* OP type. This allows us to export the bioPendingJobsOfType() API that is
* useful when the main thread wants to perform some operation that may involve
* objects shared with the background thread. The main thread will just wait
* that there are no longer jobs of this type to be executed before performing
* the sensible operation. This data is also useful for reporting. */
2015-07-27 08:55:45 -04:00
static unsigned long long bio_pending[BIO_NUM_OPS];
2011-09-13 09:59:48 -04:00
/* This structure represents a background Job. It is only used locally to this
2013-01-16 12:00:20 -05:00
* file as the API does not expose the internals at all. */
2011-09-13 09:59:48 -04:00
struct bio_job {
time_t time; /* Time at which the job was created. */
/* Job specific arguments pointers. If we need to pass more than three
* arguments we can just pass a pointer to a structure or alike. */
void *arg1, *arg2, *arg3;
2011-09-13 10:09:06 -04:00
};
2011-09-13 09:59:48 -04:00
void *bioProcessBackgroundJobs(void *arg);
void lazyfreeFreeObjectFromBioThread(robj *o);
void lazyfreeFreeDatabaseFromBioThread(dict *ht1, dict *ht2);
void lazyfreeFreeSlotsMapFromBioThread(rax *rt);
2011-09-13 09:59:48 -04:00
2011-09-13 10:09:06 -04:00
/* Make sure we have enough stack to perform all the things we do in the
* main thread. */
#define REDIS_THREAD_STACK_SIZE (1024*1024*4)
2011-09-13 09:59:48 -04:00
/* Initialize the background system, spawning the thread. */
void bioInit(void) {
pthread_attr_t attr;
pthread_t thread;
size_t stacksize;
int j;
2011-09-13 09:59:48 -04:00
/* Initialization of state vars and objects */
2015-07-27 08:55:45 -04:00
for (j = 0; j < BIO_NUM_OPS; j++) {
pthread_mutex_init(&bio_mutex[j],NULL);
2015-09-23 10:09:16 -04:00
pthread_cond_init(&bio_newjob_cond[j],NULL);
pthread_cond_init(&bio_step_cond[j],NULL);
bio_jobs[j] = listCreate();
bio_pending[j] = 0;
}
2011-09-13 09:59:48 -04:00
/* Set the stack size as by default it may be small in some system */
pthread_attr_init(&attr);
2011-09-13 10:09:06 -04:00
pthread_attr_getstacksize(&attr,&stacksize);
2011-09-13 09:59:48 -04:00
if (!stacksize) stacksize = 1; /* The world is full of Solaris Fixes */
while (stacksize < REDIS_THREAD_STACK_SIZE) stacksize *= 2;
pthread_attr_setstacksize(&attr, stacksize);
/* Ready to spawn our threads. We use the single argument the thread
* function accepts in order to pass the job ID the thread is
* responsible of. */
2015-07-27 08:55:45 -04:00
for (j = 0; j < BIO_NUM_OPS; j++) {
void *arg = (void*)(unsigned long) j;
if (pthread_create(&thread,&attr,bioProcessBackgroundJobs,arg) != 0) {
2015-07-27 03:41:48 -04:00
serverLog(LL_WARNING,"Fatal: Can't initialize Background Jobs.");
exit(1);
}
bio_threads[j] = thread;
2011-09-13 09:59:48 -04:00
}
}
void bioCreateBackgroundJob(int type, void *arg1, void *arg2, void *arg3) {
2011-09-13 09:59:48 -04:00
struct bio_job *job = zmalloc(sizeof(*job));
job->time = time(NULL);
job->arg1 = arg1;
job->arg2 = arg2;
job->arg3 = arg3;
pthread_mutex_lock(&bio_mutex[type]);
listAddNodeTail(bio_jobs[type],job);
bio_pending[type]++;
2015-09-23 10:09:16 -04:00
pthread_cond_signal(&bio_newjob_cond[type]);
pthread_mutex_unlock(&bio_mutex[type]);
2011-09-13 09:59:48 -04:00
}
void *bioProcessBackgroundJobs(void *arg) {
struct bio_job *job;
unsigned long type = (unsigned long) arg;
sigset_t sigset;
2011-09-13 09:59:48 -04:00
/* Check that the type is within the right interval. */
2015-07-27 08:55:45 -04:00
if (type >= BIO_NUM_OPS) {
2015-07-27 03:41:48 -04:00
serverLog(LL_WARNING,
"Warning: bio thread started with wrong type %lu",type);
return NULL;
}
Threaded IO: set thread name for redis-server Set thread name for each thread of redis-server, this helps us to monitor the utilization and optimise the performance. And suggested-by Salvatore, implement this feature for multi platforms. Currently support linux and bsd, ignore other OS. An exmaple on Linux: # top -d 5 -p `pidof redis-server ` -H PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 3682671 root 20 0 227744 8248 3836 R 99.2 0.0 0:19.53 redis-server 3682677 root 20 0 227744 8248 3836 S 26.4 0.0 0:04.15 io_thd_3 3682675 root 20 0 227744 8248 3836 S 23.6 0.0 0:03.98 io_thd_1 3682676 root 20 0 227744 8248 3836 S 23.6 0.0 0:03.97 io_thd_2 3682672 root 20 0 227744 8248 3836 S 0.2 0.0 0:00.02 bio_close_file 3682673 root 20 0 227744 8248 3836 S 0.2 0.0 0:00.02 bio_aof_fsync 3682674 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 bio_lazy_free 3682678 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682682 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682683 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682684 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682685 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd 3682687 root 20 0 227744 8248 3836 S 0.0 0.0 0:00.00 jemalloc_bg_thd Another exmaple on FreeBSD-12.1: PID USERNAME PRI NICE SIZE RES STATE C TIME WCPU COMMAND 5212 root 100 0 48M 7280K CPU2 2 0:26 99.52% redis-server{redis-server} 5212 root 38 0 48M 7280K umtxn 4 0:06 26.94% redis-server{io_thd_3} 5212 root 36 0 48M 7280K umtxn 6 0:06 26.84% redis-server{io_thd_1} 5212 root 39 0 48M 7280K umtxn 1 0:06 25.30% redis-server{io_thd_2} 5212 root 20 0 48M 7280K uwait 3 0:00 0.00% redis-server{redis-server} 5212 root 21 0 48M 7280K uwait 2 0:00 0.00% redis-server{bio_close_file} 5212 root 21 0 48M 7280K uwait 3 0:00 0.00% redis-server{bio_aof_fsync} 5212 root 21 0 48M 7280K uwait 0 0:00 0.00% redis-server{bio_lazy_free} Signed-off-by: zhenwei pi <pizhenwei@bytedance.com>
2020-04-12 21:58:35 -04:00
switch (type) {
case BIO_CLOSE_FILE:
redis_set_thread_title("bio_close_file");
break;
case BIO_AOF_FSYNC:
redis_set_thread_title("bio_aof_fsync");
break;
case BIO_LAZY_FREE:
redis_set_thread_title("bio_lazy_free");
break;
}
redisSetCpuAffinity(server.bio_cpulist);
makeThreadKillable();
pthread_mutex_lock(&bio_mutex[type]);
/* Block SIGALRM so we are sure that only the main thread will
* receive the watchdog signal. */
sigemptyset(&sigset);
sigaddset(&sigset, SIGALRM);
if (pthread_sigmask(SIG_BLOCK, &sigset, NULL))
2015-07-27 03:41:48 -04:00
serverLog(LL_WARNING,
"Warning: can't mask SIGALRM in bio.c thread: %s", strerror(errno));
2011-09-13 09:59:48 -04:00
while(1) {
listNode *ln;
/* The loop always starts with the lock hold. */
if (listLength(bio_jobs[type]) == 0) {
2015-09-23 10:09:16 -04:00
pthread_cond_wait(&bio_newjob_cond[type],&bio_mutex[type]);
2011-09-13 09:59:48 -04:00
continue;
}
/* Pop the job from the queue. */
ln = listFirst(bio_jobs[type]);
2011-09-13 09:59:48 -04:00
job = ln->value;
/* It is now possible to unlock the background system as we know have
* a stand alone job structure to process.*/
pthread_mutex_unlock(&bio_mutex[type]);
2011-09-13 09:59:48 -04:00
/* Process the job accordingly to its type. */
2015-07-27 08:55:45 -04:00
if (type == BIO_CLOSE_FILE) {
close((long)job->arg1);
2015-07-27 08:55:45 -04:00
} else if (type == BIO_AOF_FSYNC) {
redis_fsync((long)job->arg1);
2015-09-23 10:46:36 -04:00
} else if (type == BIO_LAZY_FREE) {
/* What we free changes depending on what arguments are set:
* arg1 -> free the object at pointer.
* arg2 & arg3 -> free two dictionaries (a Redis DB).
* only arg3 -> free the skiplist. */
if (job->arg1)
lazyfreeFreeObjectFromBioThread(job->arg1);
else if (job->arg2 && job->arg3)
lazyfreeFreeDatabaseFromBioThread(job->arg2,job->arg3);
else if (job->arg3)
lazyfreeFreeSlotsMapFromBioThread(job->arg3);
2011-09-13 09:59:48 -04:00
} else {
2015-07-27 03:41:48 -04:00
serverPanic("Wrong job type in bioProcessBackgroundJobs().");
2011-09-13 09:59:48 -04:00
}
zfree(job);
/* Lock again before reiterating the loop, if there are no longer
* jobs to process we'll block again in pthread_cond_wait(). */
pthread_mutex_lock(&bio_mutex[type]);
listDelNode(bio_jobs[type],ln);
bio_pending[type]--;
2018-09-05 04:51:13 -04:00
/* Unblock threads blocked on bioWaitStepOfType() if any. */
pthread_cond_broadcast(&bio_step_cond[type]);
}
}
/* Return the number of pending jobs of the specified type. */
unsigned long long bioPendingJobsOfType(int type) {
unsigned long long val;
pthread_mutex_lock(&bio_mutex[type]);
val = bio_pending[type];
pthread_mutex_unlock(&bio_mutex[type]);
return val;
}
2015-09-23 10:09:16 -04:00
/* If there are pending jobs for the specified type, the function blocks
* and waits that the next job was processed. Otherwise the function
* does not block and returns ASAP.
*
* The function returns the number of jobs still to process of the
* requested type.
*
* This function is useful when from another thread, we want to wait
* a bio.c thread to do more work in a blocking way.
*/
unsigned long long bioWaitStepOfType(int type) {
unsigned long long val;
pthread_mutex_lock(&bio_mutex[type]);
val = bio_pending[type];
if (val != 0) {
pthread_cond_wait(&bio_step_cond[type],&bio_mutex[type]);
val = bio_pending[type];
}
pthread_mutex_unlock(&bio_mutex[type]);
return val;
}
/* Kill the running bio threads in an unclean way. This function should be
* used only when it's critical to stop the threads for some reason.
* Currently Redis does this only on crash (for instance on SIGSEGV) in order
* to perform a fast memory check without other threads messing with memory. */
void bioKillThreads(void) {
int err, j;
2015-07-27 08:55:45 -04:00
for (j = 0; j < BIO_NUM_OPS; j++) {
if (bio_threads[j] == pthread_self()) continue;
if (bio_threads[j] && pthread_cancel(bio_threads[j]) == 0) {
if ((err = pthread_join(bio_threads[j],NULL)) != 0) {
2015-07-27 03:41:48 -04:00
serverLog(LL_WARNING,
"Bio thread for job type #%d can not be joined: %s",
j, strerror(err));
} else {
2015-07-27 03:41:48 -04:00
serverLog(LL_WARNING,
"Bio thread for job type #%d terminated",j);
}
}
}
}