2012-11-08 12:25:23 -05:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright notice,
|
|
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* * Neither the name of Redis nor the names of its contributors may be used
|
|
|
|
* to endorse or promote products derived from this software without
|
|
|
|
* specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
#ifndef __REDIS_H
|
|
|
|
#define __REDIS_H
|
|
|
|
|
|
|
|
#include "fmacros.h"
|
|
|
|
#include "config.h"
|
|
|
|
#include "solarisfixes.h"
|
2016-05-18 05:45:40 -04:00
|
|
|
#include "rio.h"
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <time.h>
|
|
|
|
#include <limits.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <errno.h>
|
2010-07-01 15:13:38 -04:00
|
|
|
#include <inttypes.h>
|
2010-07-05 14:14:48 -04:00
|
|
|
#include <pthread.h>
|
2010-12-09 11:10:21 -05:00
|
|
|
#include <syslog.h>
|
2011-03-29 11:51:15 -04:00
|
|
|
#include <netinet/in.h>
|
2011-04-30 11:46:52 -04:00
|
|
|
#include <lua.h>
|
2012-01-20 06:54:15 -05:00
|
|
|
#include <signal.h>
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2014-07-01 05:30:15 -04:00
|
|
|
typedef long long mstime_t; /* millisecond time type. */
|
|
|
|
|
2011-06-30 07:27:32 -04:00
|
|
|
#include "ae.h" /* Event driven programming library */
|
|
|
|
#include "sds.h" /* Dynamic safe strings */
|
|
|
|
#include "dict.h" /* Hash tables */
|
|
|
|
#include "adlist.h" /* Linked lists */
|
2010-06-21 18:07:48 -04:00
|
|
|
#include "zmalloc.h" /* total memory usage aware version of malloc/free */
|
2011-06-30 07:27:32 -04:00
|
|
|
#include "anet.h" /* Networking the easy way */
|
2010-06-21 18:07:48 -04:00
|
|
|
#include "ziplist.h" /* Compact list data structure */
|
2011-06-30 07:27:32 -04:00
|
|
|
#include "intset.h" /* Compact integer set structure */
|
|
|
|
#include "version.h" /* Version macro */
|
|
|
|
#include "util.h" /* Misc functions useful in many places */
|
2014-07-01 05:30:15 -04:00
|
|
|
#include "latency.h" /* Latency monitor API */
|
2016-01-29 06:08:10 -05:00
|
|
|
#include "sparkline.h" /* ASCII graphs API */
|
2017-03-27 09:26:56 -04:00
|
|
|
#include "quicklist.h" /* Lists are encoded as linked lists of
|
|
|
|
N-elements flat arrays */
|
|
|
|
#include "rax.h" /* Radix tree */
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2014-11-12 21:58:57 -05:00
|
|
|
/* Following includes allow test functions to be called from Redis main() */
|
|
|
|
#include "zipmap.h"
|
|
|
|
#include "sha1.h"
|
|
|
|
#include "endianconv.h"
|
|
|
|
#include "crc64.h"
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Error codes */
|
2015-07-26 17:17:55 -04:00
|
|
|
#define C_OK 0
|
|
|
|
#define C_ERR -1
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Static server configuration */
|
2018-07-30 07:37:30 -04:00
|
|
|
#define CONFIG_DEFAULT_DYNAMIC_HZ 1 /* Adapt hz to # of clients.*/
|
2018-07-23 08:21:04 -04:00
|
|
|
#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_MIN_HZ 1
|
|
|
|
#define CONFIG_MAX_HZ 500
|
2018-07-23 08:21:04 -04:00
|
|
|
#define MAX_CLIENTS_PER_CLOCK_TICK 200 /* HZ is adapted based on that. */
|
|
|
|
#define CONFIG_DEFAULT_SERVER_PORT 6379 /* TCP port. */
|
|
|
|
#define CONFIG_DEFAULT_TCP_BACKLOG 511 /* TCP listen backlog. */
|
|
|
|
#define CONFIG_DEFAULT_CLIENT_TIMEOUT 0 /* Default client timeout: infinite */
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_DBNUM 16
|
2019-03-27 13:58:45 -04:00
|
|
|
#define CONFIG_DEFAULT_IO_THREADS_NUM 1 /* Single threaded by default */
|
2019-04-30 09:39:27 -04:00
|
|
|
#define CONFIG_DEFAULT_IO_THREADS_DO_READS 0 /* Read + parse from threads? */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_MAX_LINE 1024
|
|
|
|
#define CRON_DBS_PER_CALL 16
|
|
|
|
#define NET_MAX_WRITES_PER_EVENT (1024*64)
|
|
|
|
#define PROTO_SHARED_SELECT_CMDS 10
|
|
|
|
#define OBJ_SHARED_INTEGERS 10000
|
|
|
|
#define OBJ_SHARED_BULKHDR_LEN 32
|
2018-07-23 08:21:04 -04:00
|
|
|
#define LOG_MAX_LEN 1024 /* Default maximum length of syslog messages.*/
|
2015-07-27 03:41:48 -04:00
|
|
|
#define AOF_REWRITE_PERC 100
|
|
|
|
#define AOF_REWRITE_MIN_SIZE (64*1024*1024)
|
|
|
|
#define AOF_REWRITE_ITEMS_PER_CMD 64
|
2016-08-09 05:07:32 -04:00
|
|
|
#define AOF_READ_DIFF_INTERVAL_BYTES (1024*10)
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_DEFAULT_SLOWLOG_LOG_SLOWER_THAN 10000
|
|
|
|
#define CONFIG_DEFAULT_SLOWLOG_MAX_LEN 128
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_MAX_CLIENTS 10000
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_AUTHPASS_MAX_LEN 512
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_SLAVE_PRIORITY 100
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_DEFAULT_REPL_TIMEOUT 60
|
|
|
|
#define CONFIG_DEFAULT_REPL_PING_SLAVE_PERIOD 10
|
|
|
|
#define CONFIG_RUN_ID_SIZE 40
|
|
|
|
#define RDB_EOF_MARK_SIZE 40
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_REPL_BACKLOG_SIZE (1024*1024) /* 1mb */
|
|
|
|
#define CONFIG_DEFAULT_REPL_BACKLOG_TIME_LIMIT (60*60) /* 1 hour */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_REPL_BACKLOG_MIN_SIZE (1024*16) /* 16k */
|
|
|
|
#define CONFIG_BGSAVE_RETRY_DELAY 5 /* Wait a few secs before trying again. */
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_PID_FILE "/var/run/redis.pid"
|
|
|
|
#define CONFIG_DEFAULT_SYSLOG_IDENT "redis"
|
|
|
|
#define CONFIG_DEFAULT_CLUSTER_CONFIG_FILE "nodes.conf"
|
2016-01-21 10:57:35 -05:00
|
|
|
#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_IP NULL /* Auto detect. */
|
|
|
|
#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_PORT 0 /* Use server.port */
|
|
|
|
#define CONFIG_DEFAULT_CLUSTER_ANNOUNCE_BUS_PORT 0 /* Use +10000 offset. */
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_DAEMONIZE 0
|
|
|
|
#define CONFIG_DEFAULT_UNIX_SOCKET_PERM 0
|
2016-07-04 06:08:37 -04:00
|
|
|
#define CONFIG_DEFAULT_TCP_KEEPALIVE 300
|
New security feature: Redis protected mode.
An exposed Redis instance on the internet can be cause of serious
issues. Since Redis, by default, binds to all the interfaces, it is easy
to forget an instance without any protection layer, for error.
Protected mode try to address this feature in a soft way, providing a
layer of protection, but giving clues to Redis users about why the
server is not accepting connections.
When protected mode is enabeld (the default), and if there are no
minumum hints about the fact the server is properly configured (no
"bind" directive is used in order to restrict the server to certain
interfaces, nor a password is set), clients connecting from external
intefaces are refused with an error explaining what to do in order to
fix the issue.
Clients connecting from the IPv4 and IPv6 lookback interfaces are still
accepted normally, similarly Unix domain socket connections are not
restricted in any way.
2016-01-07 07:00:08 -05:00
|
|
|
#define CONFIG_DEFAULT_PROTECTED_MODE 1
|
2019-02-21 11:23:17 -05:00
|
|
|
#define CONFIG_DEFAULT_GOPHER_ENABLED 0
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_LOGFILE ""
|
|
|
|
#define CONFIG_DEFAULT_SYSLOG_ENABLED 0
|
|
|
|
#define CONFIG_DEFAULT_STOP_WRITES_ON_BGSAVE_ERROR 1
|
|
|
|
#define CONFIG_DEFAULT_RDB_COMPRESSION 1
|
|
|
|
#define CONFIG_DEFAULT_RDB_CHECKSUM 1
|
|
|
|
#define CONFIG_DEFAULT_RDB_FILENAME "dump.rdb"
|
|
|
|
#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC 0
|
|
|
|
#define CONFIG_DEFAULT_REPL_DISKLESS_SYNC_DELAY 5
|
2019-07-01 08:22:29 -04:00
|
|
|
#define CONFIG_DEFAULT_RDB_KEY_SAVE_DELAY 0
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_SLAVE_SERVE_STALE_DATA 1
|
|
|
|
#define CONFIG_DEFAULT_SLAVE_READ_ONLY 1
|
2018-08-27 06:09:08 -04:00
|
|
|
#define CONFIG_DEFAULT_SLAVE_IGNORE_MAXMEMORY 1
|
2016-07-27 10:41:20 -04:00
|
|
|
#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_IP NULL
|
|
|
|
#define CONFIG_DEFAULT_SLAVE_ANNOUNCE_PORT 0
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_REPL_DISABLE_TCP_NODELAY 0
|
|
|
|
#define CONFIG_DEFAULT_MAXMEMORY 0
|
|
|
|
#define CONFIG_DEFAULT_MAXMEMORY_SAMPLES 5
|
2016-07-20 09:00:35 -04:00
|
|
|
#define CONFIG_DEFAULT_LFU_LOG_FACTOR 10
|
|
|
|
#define CONFIG_DEFAULT_LFU_DECAY_TIME 1
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_AOF_FILENAME "appendonly.aof"
|
|
|
|
#define CONFIG_DEFAULT_AOF_NO_FSYNC_ON_REWRITE 0
|
|
|
|
#define CONFIG_DEFAULT_AOF_LOAD_TRUNCATED 1
|
2018-03-25 05:43:24 -04:00
|
|
|
#define CONFIG_DEFAULT_AOF_USE_RDB_PREAMBLE 1
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_ACTIVE_REHASHING 1
|
|
|
|
#define CONFIG_DEFAULT_AOF_REWRITE_INCREMENTAL_FSYNC 1
|
2018-03-15 12:44:50 -04:00
|
|
|
#define CONFIG_DEFAULT_RDB_SAVE_INCREMENTAL_FSYNC 1
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_MIN_SLAVES_TO_WRITE 0
|
|
|
|
#define CONFIG_DEFAULT_MIN_SLAVES_MAX_LAG 10
|
2019-02-05 04:48:17 -05:00
|
|
|
#define CONFIG_DEFAULT_ACL_FILENAME ""
|
2015-07-27 03:41:48 -04:00
|
|
|
#define NET_IP_STR_LEN 46 /* INET6_ADDRSTRLEN is 46, but we need to be sure */
|
|
|
|
#define NET_PEER_ID_LEN (NET_IP_STR_LEN+32) /* Must be enough for ip:port */
|
|
|
|
#define CONFIG_BINDADDR_MAX 16
|
|
|
|
#define CONFIG_MIN_RESERVED_FDS 32
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_LATENCY_MONITOR_THRESHOLD 0
|
2015-10-02 09:27:57 -04:00
|
|
|
#define CONFIG_DEFAULT_SLAVE_LAZY_FLUSH 0
|
|
|
|
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EVICTION 0
|
|
|
|
#define CONFIG_DEFAULT_LAZYFREE_LAZY_EXPIRE 0
|
|
|
|
#define CONFIG_DEFAULT_LAZYFREE_LAZY_SERVER_DEL 0
|
2016-12-19 10:41:47 -05:00
|
|
|
#define CONFIG_DEFAULT_ALWAYS_SHOW_LOGO 0
|
2017-01-11 09:43:08 -05:00
|
|
|
#define CONFIG_DEFAULT_ACTIVE_DEFRAG 0
|
2016-12-29 20:37:52 -05:00
|
|
|
#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_LOWER 10 /* don't defrag when fragmentation is below 10% */
|
|
|
|
#define CONFIG_DEFAULT_DEFRAG_THRESHOLD_UPPER 100 /* maximum defrag force at 100% fragmentation */
|
|
|
|
#define CONFIG_DEFAULT_DEFRAG_IGNORE_BYTES (100<<20) /* don't defrag if frag overhead is below 100mb */
|
2018-02-18 10:15:22 -05:00
|
|
|
#define CONFIG_DEFAULT_DEFRAG_CYCLE_MIN 5 /* 5% CPU min (at lower threshold) */
|
2016-12-29 20:37:52 -05:00
|
|
|
#define CONFIG_DEFAULT_DEFRAG_CYCLE_MAX 75 /* 75% CPU max (at upper threshold) */
|
2018-02-18 10:15:22 -05:00
|
|
|
#define CONFIG_DEFAULT_DEFRAG_MAX_SCAN_FIELDS 1000 /* keys with more than 1000 fields will be processed separately */
|
2018-01-11 05:27:03 -05:00
|
|
|
#define CONFIG_DEFAULT_PROTO_MAX_BULK_LEN (512ll*1024*1024) /* Bulk request max size */
|
2019-07-24 05:35:01 -04:00
|
|
|
#define CONFIG_DEFAULT_TRACKING_TABLE_MAX_FILL 10 /* 10% tracking table max fill. */
|
2012-03-08 04:08:44 -05:00
|
|
|
|
2013-08-06 06:55:49 -04:00
|
|
|
#define ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP 20 /* Loopkups per loop. */
|
|
|
|
#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds */
|
|
|
|
#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* CPU max % for keys collection */
|
|
|
|
#define ACTIVE_EXPIRE_CYCLE_SLOW 0
|
|
|
|
#define ACTIVE_EXPIRE_CYCLE_FAST 1
|
|
|
|
|
2014-12-03 06:06:54 -05:00
|
|
|
/* Instantaneous metrics tracking. */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define STATS_METRIC_SAMPLES 16 /* Number of samples per metric. */
|
|
|
|
#define STATS_METRIC_COMMAND 0 /* Number of commands executed. */
|
|
|
|
#define STATS_METRIC_NET_INPUT 1 /* Bytes read to network .*/
|
|
|
|
#define STATS_METRIC_NET_OUTPUT 2 /* Bytes written to network. */
|
|
|
|
#define STATS_METRIC_COUNT 3
|
2014-12-03 06:06:54 -05:00
|
|
|
|
2011-12-31 09:37:33 -05:00
|
|
|
/* Protocol and I/O related defines */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define PROTO_MAX_QUERYBUF_LEN (1024*1024*1024) /* 1GB max query buffer. */
|
|
|
|
#define PROTO_IOBUF_LEN (1024*16) /* Generic I/O buffer size */
|
|
|
|
#define PROTO_REPLY_CHUNK_BYTES (16*1024) /* 16k output buffer */
|
|
|
|
#define PROTO_INLINE_MAX_SIZE (1024*64) /* Max size of inline reads */
|
|
|
|
#define PROTO_MBULK_BIG_ARG (1024*32)
|
2016-05-18 09:23:18 -04:00
|
|
|
#define LONG_STR_SIZE 21 /* Bytes needed for long -> str + '\0' */
|
2018-03-15 12:44:50 -04:00
|
|
|
#define REDIS_AUTOSYNC_BYTES (1024*1024*32) /* fdatasync every 32MB */
|
2015-07-27 03:41:48 -04:00
|
|
|
|
2018-07-01 02:43:53 -04:00
|
|
|
#define LIMIT_PENDING_QUERYBUF (4*1024*1024) /* 4mb */
|
2015-07-27 03:41:48 -04:00
|
|
|
|
|
|
|
/* When configuring the server eventloop, we setup it so that the total number
|
|
|
|
* of file descriptors we can handle are server.maxclients + RESERVED_FDS +
|
|
|
|
* a few more to stay safe. Since RESERVED_FDS defaults to 32, we add 96
|
|
|
|
* in order to make sure of not over provisioning more than 128 fds. */
|
|
|
|
#define CONFIG_FDSET_INCR (CONFIG_MIN_RESERVED_FDS+96)
|
2011-10-31 06:13:28 -04:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Hash table parameters */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define HASHTABLE_MIN_FILL 10 /* Minimal hash table fill 10% */
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2011-09-26 09:40:39 -04:00
|
|
|
/* Command flags. Please check the command table defined in the redis.c file
|
|
|
|
* for more information about the meaning of every flag. */
|
2019-01-23 10:47:29 -05:00
|
|
|
#define CMD_WRITE (1ULL<<0) /* "write" flag */
|
|
|
|
#define CMD_READONLY (1ULL<<1) /* "read-only" flag */
|
|
|
|
#define CMD_DENYOOM (1ULL<<2) /* "use-memory" flag */
|
|
|
|
#define CMD_MODULE (1ULL<<3) /* Command exported by module. */
|
|
|
|
#define CMD_ADMIN (1ULL<<4) /* "admin" flag */
|
|
|
|
#define CMD_PUBSUB (1ULL<<5) /* "pub-sub" flag */
|
|
|
|
#define CMD_NOSCRIPT (1ULL<<6) /* "no-script" flag */
|
|
|
|
#define CMD_RANDOM (1ULL<<7) /* "random" flag */
|
|
|
|
#define CMD_SORT_FOR_SCRIPT (1ULL<<8) /* "to-sort" flag */
|
|
|
|
#define CMD_LOADING (1ULL<<9) /* "ok-loading" flag */
|
|
|
|
#define CMD_STALE (1ULL<<10) /* "ok-stale" flag */
|
|
|
|
#define CMD_SKIP_MONITOR (1ULL<<11) /* "no-monitor" flag */
|
2019-07-19 13:22:40 -04:00
|
|
|
#define CMD_SKIP_SLOWLOG (1ULL<<12) /* "no-slowlog" flag */
|
|
|
|
#define CMD_ASKING (1ULL<<13) /* "cluster-asking" flag */
|
|
|
|
#define CMD_FAST (1ULL<<14) /* "fast" flag */
|
2019-01-22 13:02:50 -05:00
|
|
|
|
|
|
|
/* Command flags used by the module system. */
|
2019-07-19 13:22:40 -04:00
|
|
|
#define CMD_MODULE_GETKEYS (1ULL<<15) /* Use the modules getkeys interface. */
|
|
|
|
#define CMD_MODULE_NO_CLUSTER (1ULL<<16) /* Deny on Redis Cluster. */
|
2019-01-23 10:47:29 -05:00
|
|
|
|
|
|
|
/* Command flags that describe ACLs categories. */
|
2019-07-19 13:22:40 -04:00
|
|
|
#define CMD_CATEGORY_KEYSPACE (1ULL<<17)
|
|
|
|
#define CMD_CATEGORY_READ (1ULL<<18)
|
|
|
|
#define CMD_CATEGORY_WRITE (1ULL<<19)
|
|
|
|
#define CMD_CATEGORY_SET (1ULL<<20)
|
|
|
|
#define CMD_CATEGORY_SORTEDSET (1ULL<<21)
|
|
|
|
#define CMD_CATEGORY_LIST (1ULL<<22)
|
|
|
|
#define CMD_CATEGORY_HASH (1ULL<<23)
|
|
|
|
#define CMD_CATEGORY_STRING (1ULL<<24)
|
|
|
|
#define CMD_CATEGORY_BITMAP (1ULL<<25)
|
|
|
|
#define CMD_CATEGORY_HYPERLOGLOG (1ULL<<26)
|
|
|
|
#define CMD_CATEGORY_GEO (1ULL<<27)
|
|
|
|
#define CMD_CATEGORY_STREAM (1ULL<<28)
|
|
|
|
#define CMD_CATEGORY_PUBSUB (1ULL<<29)
|
|
|
|
#define CMD_CATEGORY_ADMIN (1ULL<<30)
|
|
|
|
#define CMD_CATEGORY_FAST (1ULL<<31)
|
|
|
|
#define CMD_CATEGORY_SLOW (1ULL<<32)
|
|
|
|
#define CMD_CATEGORY_BLOCKING (1ULL<<33)
|
|
|
|
#define CMD_CATEGORY_DANGEROUS (1ULL<<34)
|
|
|
|
#define CMD_CATEGORY_CONNECTION (1ULL<<35)
|
|
|
|
#define CMD_CATEGORY_TRANSACTION (1ULL<<36)
|
|
|
|
#define CMD_CATEGORY_SCRIPTING (1ULL<<37)
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2011-12-21 04:05:32 -05:00
|
|
|
/* AOF states */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define AOF_OFF 0 /* AOF is off */
|
|
|
|
#define AOF_ON 1 /* AOF is on */
|
|
|
|
#define AOF_WAIT_REWRITE 2 /* AOF waits rewrite to start appending */
|
2011-12-21 04:05:32 -05:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Client flags */
|
2019-06-29 20:08:41 -04:00
|
|
|
#define CLIENT_SLAVE (1<<0) /* This client is a repliaca */
|
|
|
|
#define CLIENT_MASTER (1<<1) /* This client is a master */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CLIENT_MONITOR (1<<2) /* This client is a slave monitor, see MONITOR */
|
|
|
|
#define CLIENT_MULTI (1<<3) /* This client is in a MULTI context */
|
|
|
|
#define CLIENT_BLOCKED (1<<4) /* The client is waiting in a blocking operation */
|
|
|
|
#define CLIENT_DIRTY_CAS (1<<5) /* Watched keys modified. EXEC will fail. */
|
|
|
|
#define CLIENT_CLOSE_AFTER_REPLY (1<<6) /* Close after writing entire reply. */
|
|
|
|
#define CLIENT_UNBLOCKED (1<<7) /* This client was unblocked and is stored in
|
2012-11-15 14:11:05 -05:00
|
|
|
server.unblocked_clients */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CLIENT_LUA (1<<8) /* This is a non connected client used by Lua */
|
|
|
|
#define CLIENT_ASKING (1<<9) /* Client issued the ASKING command */
|
|
|
|
#define CLIENT_CLOSE_ASAP (1<<10)/* Close this client ASAP */
|
|
|
|
#define CLIENT_UNIX_SOCKET (1<<11) /* Client connected via Unix domain socket */
|
|
|
|
#define CLIENT_DIRTY_EXEC (1<<12) /* EXEC will fail for errors while queueing */
|
|
|
|
#define CLIENT_MASTER_FORCE_REPLY (1<<13) /* Queue replies even if is master */
|
|
|
|
#define CLIENT_FORCE_AOF (1<<14) /* Force AOF propagation of current cmd. */
|
|
|
|
#define CLIENT_FORCE_REPL (1<<15) /* Force replication of current cmd. */
|
|
|
|
#define CLIENT_PRE_PSYNC (1<<16) /* Instance don't understand PSYNC. */
|
|
|
|
#define CLIENT_READONLY (1<<17) /* Cluster client is in read-only state. */
|
|
|
|
#define CLIENT_PUBSUB (1<<18) /* Client is in Pub/Sub mode. */
|
2015-10-29 06:05:27 -04:00
|
|
|
#define CLIENT_PREVENT_AOF_PROP (1<<19) /* Don't propagate to AOF. */
|
|
|
|
#define CLIENT_PREVENT_REPL_PROP (1<<20) /* Don't propagate to slaves. */
|
|
|
|
#define CLIENT_PREVENT_PROP (CLIENT_PREVENT_AOF_PROP|CLIENT_PREVENT_REPL_PROP)
|
|
|
|
#define CLIENT_PENDING_WRITE (1<<21) /* Client has output to send but a write
|
2015-09-28 12:25:57 -04:00
|
|
|
handler is yet not installed. */
|
2015-10-29 06:05:27 -04:00
|
|
|
#define CLIENT_REPLY_OFF (1<<22) /* Don't send replies to client. */
|
|
|
|
#define CLIENT_REPLY_SKIP_NEXT (1<<23) /* Set CLIENT_REPLY_SKIP for next cmd */
|
|
|
|
#define CLIENT_REPLY_SKIP (1<<24) /* Don't send just this reply. */
|
2015-11-05 04:36:52 -05:00
|
|
|
#define CLIENT_LUA_DEBUG (1<<25) /* Run EVAL in debug mode. */
|
2015-11-06 10:19:59 -05:00
|
|
|
#define CLIENT_LUA_DEBUG_SYNC (1<<26) /* EVAL debugging without fork() */
|
2016-03-06 07:44:24 -05:00
|
|
|
#define CLIENT_MODULE (1<<27) /* Non connected client used by some module. */
|
2018-10-09 07:15:41 -04:00
|
|
|
#define CLIENT_PROTECTED (1<<28) /* Client should not be freed for now. */
|
2019-03-30 06:26:58 -04:00
|
|
|
#define CLIENT_PENDING_READ (1<<29) /* The client has pending reads and was put
|
|
|
|
in the list of clients we can read
|
|
|
|
from. */
|
2019-06-29 20:08:41 -04:00
|
|
|
#define CLIENT_PENDING_COMMAND (1<<30) /* Used in threaded I/O to signal after
|
|
|
|
we return single threaded that the
|
|
|
|
client has already pending commands
|
|
|
|
to be executed. */
|
|
|
|
#define CLIENT_TRACKING (1<<31) /* Client enabled keys tracking in order to
|
|
|
|
perform client side caching. */
|
2019-07-03 13:16:20 -04:00
|
|
|
#define CLIENT_TRACKING_BROKEN_REDIR (1ULL<<32) /* Target client is invalid. */
|
2010-10-15 09:40:25 -04:00
|
|
|
|
2013-12-03 11:43:53 -05:00
|
|
|
/* Client block type (btype field in client structure)
|
2015-07-27 03:41:48 -04:00
|
|
|
* if CLIENT_BLOCKED flag is set. */
|
|
|
|
#define BLOCKED_NONE 0 /* Not blocked, no CLIENT_BLOCKED flag set. */
|
|
|
|
#define BLOCKED_LIST 1 /* BLPOP & co. */
|
|
|
|
#define BLOCKED_WAIT 2 /* WAIT for synchronous replication. */
|
2016-10-07 05:55:35 -04:00
|
|
|
#define BLOCKED_MODULE 3 /* Blocked by a loadable module. */
|
2017-09-06 09:43:28 -04:00
|
|
|
#define BLOCKED_STREAM 4 /* XREAD. */
|
2018-04-29 19:10:42 -04:00
|
|
|
#define BLOCKED_ZSET 5 /* BZPOP et al. */
|
|
|
|
#define BLOCKED_NUM 6 /* Number of blocked states. */
|
2013-12-03 11:43:53 -05:00
|
|
|
|
2010-10-15 09:40:25 -04:00
|
|
|
/* Client request types */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define PROTO_REQ_INLINE 1
|
|
|
|
#define PROTO_REQ_MULTIBULK 2
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2012-01-17 06:43:01 -05:00
|
|
|
/* Client classes for client limits, currently used only for
|
|
|
|
* the max-client-output-buffer limit implementation. */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CLIENT_TYPE_NORMAL 0 /* Normal req-reply clients + MONITORs */
|
|
|
|
#define CLIENT_TYPE_SLAVE 1 /* Slaves. */
|
|
|
|
#define CLIENT_TYPE_PUBSUB 2 /* Clients subscribed to PubSub channels. */
|
2015-07-28 10:58:04 -04:00
|
|
|
#define CLIENT_TYPE_MASTER 3 /* Master. */
|
|
|
|
#define CLIENT_TYPE_OBUF_COUNT 3 /* Number of clients to expose to output
|
|
|
|
buffer configuration. Just the first
|
|
|
|
three: normal, slave, pubsub. */
|
2015-07-27 03:41:48 -04:00
|
|
|
|
|
|
|
/* Slave replication state. Used in server.repl_state for slaves to remember
|
|
|
|
* what to do next. */
|
|
|
|
#define REPL_STATE_NONE 0 /* No active replication */
|
|
|
|
#define REPL_STATE_CONNECT 1 /* Must connect to master */
|
|
|
|
#define REPL_STATE_CONNECTING 2 /* Connecting to master */
|
2015-10-15 04:20:09 -04:00
|
|
|
/* --- Handshake states, must be ordered --- */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define REPL_STATE_RECEIVE_PONG 3 /* Wait for PING reply */
|
2015-08-06 10:49:30 -04:00
|
|
|
#define REPL_STATE_SEND_AUTH 4 /* Send AUTH to master */
|
|
|
|
#define REPL_STATE_RECEIVE_AUTH 5 /* Wait for AUTH reply */
|
|
|
|
#define REPL_STATE_SEND_PORT 6 /* Send REPLCONF listening-port */
|
|
|
|
#define REPL_STATE_RECEIVE_PORT 7 /* Wait for REPLCONF reply */
|
2016-07-27 10:41:20 -04:00
|
|
|
#define REPL_STATE_SEND_IP 8 /* Send REPLCONF ip-address */
|
|
|
|
#define REPL_STATE_RECEIVE_IP 9 /* Wait for REPLCONF reply */
|
|
|
|
#define REPL_STATE_SEND_CAPA 10 /* Send REPLCONF capa */
|
|
|
|
#define REPL_STATE_RECEIVE_CAPA 11 /* Wait for REPLCONF reply */
|
|
|
|
#define REPL_STATE_SEND_PSYNC 12 /* Send PSYNC */
|
|
|
|
#define REPL_STATE_RECEIVE_PSYNC 13 /* Wait for PSYNC reply */
|
2015-10-15 04:20:09 -04:00
|
|
|
/* --- End of handshake states --- */
|
2016-07-27 10:41:20 -04:00
|
|
|
#define REPL_STATE_TRANSFER 14 /* Receiving .rdb from master */
|
|
|
|
#define REPL_STATE_CONNECTED 15 /* Connected to master */
|
2015-07-27 03:41:48 -04:00
|
|
|
|
|
|
|
/* State of slaves from the POV of the master. Used in client->replstate.
|
2012-11-03 06:56:28 -04:00
|
|
|
* In SEND_BULK and ONLINE state the slave receives new updates
|
2015-07-27 03:41:48 -04:00
|
|
|
* in its output queue. In the WAIT_BGSAVE states instead the server is waiting
|
2010-06-21 18:07:48 -04:00
|
|
|
* to start the next background saving in order to send updates to it. */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define SLAVE_STATE_WAIT_BGSAVE_START 6 /* We need to produce a new RDB file. */
|
|
|
|
#define SLAVE_STATE_WAIT_BGSAVE_END 7 /* Waiting RDB file creation to finish. */
|
|
|
|
#define SLAVE_STATE_SEND_BULK 8 /* Sending RDB file to slave. */
|
|
|
|
#define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
|
2012-11-03 06:56:28 -04:00
|
|
|
|
2015-08-06 03:23:23 -04:00
|
|
|
/* Slave capabilities. */
|
|
|
|
#define SLAVE_CAPA_NONE 0
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */
|
|
|
|
#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */
|
2015-08-06 03:23:23 -04:00
|
|
|
|
2012-11-03 06:56:28 -04:00
|
|
|
/* Synchronous read timeout - slave side */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_REPL_SYNCIO_TIMEOUT 5
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* List related stuff */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define LIST_HEAD 0
|
|
|
|
#define LIST_TAIL 1
|
2018-05-11 11:31:46 -04:00
|
|
|
#define ZSET_MIN 0
|
|
|
|
#define ZSET_MAX 1
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Sort operations */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define SORT_OP_GET 0
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Log levels */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define LL_DEBUG 0
|
|
|
|
#define LL_VERBOSE 1
|
|
|
|
#define LL_NOTICE 2
|
|
|
|
#define LL_WARNING 3
|
|
|
|
#define LL_RAW (1<<10) /* Modifier to log without timestamp */
|
|
|
|
#define CONFIG_DEFAULT_VERBOSITY LL_NOTICE
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2015-01-08 15:22:33 -05:00
|
|
|
/* Supervision options */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define SUPERVISED_NONE 0
|
|
|
|
#define SUPERVISED_AUTODETECT 1
|
|
|
|
#define SUPERVISED_SYSTEMD 2
|
|
|
|
#define SUPERVISED_UPSTART 3
|
2015-01-08 15:22:33 -05:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Anti-warning macro... */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define UNUSED(V) ((void) V)
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2017-12-08 03:09:27 -05:00
|
|
|
#define ZSKIPLIST_MAXLEVEL 64 /* Should be enough for 2^64 elements */
|
2010-06-21 18:07:48 -04:00
|
|
|
#define ZSKIPLIST_P 0.25 /* Skiplist P = 1/4 */
|
|
|
|
|
|
|
|
/* Append only defines */
|
2011-12-21 05:58:42 -05:00
|
|
|
#define AOF_FSYNC_NO 0
|
|
|
|
#define AOF_FSYNC_ALWAYS 1
|
|
|
|
#define AOF_FSYNC_EVERYSEC 2
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_AOF_FSYNC AOF_FSYNC_EVERYSEC
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2019-07-01 08:22:29 -04:00
|
|
|
/* Replication diskless load defines */
|
|
|
|
#define REPL_DISKLESS_LOAD_DISABLED 0
|
|
|
|
#define REPL_DISKLESS_LOAD_WHEN_DB_EMPTY 1
|
|
|
|
#define REPL_DISKLESS_LOAD_SWAPDB 2
|
|
|
|
#define CONFIG_DEFAULT_REPL_DISKLESS_LOAD REPL_DISKLESS_LOAD_DISABLED
|
|
|
|
|
2018-06-07 08:24:45 -04:00
|
|
|
/* Zipped structures related defaults */
|
2015-07-26 09:28:00 -04:00
|
|
|
#define OBJ_HASH_MAX_ZIPLIST_ENTRIES 512
|
|
|
|
#define OBJ_HASH_MAX_ZIPLIST_VALUE 64
|
|
|
|
#define OBJ_SET_MAX_INTSET_ENTRIES 512
|
|
|
|
#define OBJ_ZSET_MAX_ZIPLIST_ENTRIES 128
|
|
|
|
#define OBJ_ZSET_MAX_ZIPLIST_VALUE 64
|
2018-06-07 08:24:45 -04:00
|
|
|
#define OBJ_STREAM_NODE_MAX_BYTES 4096
|
|
|
|
#define OBJ_STREAM_NODE_MAX_ENTRIES 100
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2014-12-16 00:49:14 -05:00
|
|
|
/* List defaults */
|
2015-07-26 09:28:00 -04:00
|
|
|
#define OBJ_LIST_MAX_ZIPLIST_SIZE -2
|
|
|
|
#define OBJ_LIST_COMPRESS_DEPTH 0
|
2014-12-16 00:49:14 -05:00
|
|
|
|
2014-04-15 11:46:51 -04:00
|
|
|
/* HyperLogLog defines */
|
2015-07-26 09:14:57 -04:00
|
|
|
#define CONFIG_DEFAULT_HLL_SPARSE_MAX_BYTES 3000
|
2014-04-15 11:46:51 -04:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Sets operations codes */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define SET_OP_UNION 0
|
|
|
|
#define SET_OP_DIFF 1
|
|
|
|
#define SET_OP_INTER 2
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2016-07-15 06:12:52 -04:00
|
|
|
/* Redis maxmemory strategies. Instead of using just incremental number
|
|
|
|
* for this defines, we use a set of flags so that testing for certain
|
|
|
|
* properties common to multiple policies is faster. */
|
|
|
|
#define MAXMEMORY_FLAG_LRU (1<<0)
|
|
|
|
#define MAXMEMORY_FLAG_LFU (1<<1)
|
2016-07-20 13:53:27 -04:00
|
|
|
#define MAXMEMORY_FLAG_ALLKEYS (1<<2)
|
2016-07-15 06:12:52 -04:00
|
|
|
#define MAXMEMORY_FLAG_NO_SHARED_INTEGERS \
|
|
|
|
(MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_LFU)
|
2016-07-20 13:53:27 -04:00
|
|
|
|
2016-07-15 06:12:52 -04:00
|
|
|
#define MAXMEMORY_VOLATILE_LRU ((0<<8)|MAXMEMORY_FLAG_LRU)
|
|
|
|
#define MAXMEMORY_VOLATILE_LFU ((1<<8)|MAXMEMORY_FLAG_LFU)
|
|
|
|
#define MAXMEMORY_VOLATILE_TTL (2<<8)
|
|
|
|
#define MAXMEMORY_VOLATILE_RANDOM (3<<8)
|
2016-07-20 13:53:27 -04:00
|
|
|
#define MAXMEMORY_ALLKEYS_LRU ((4<<8)|MAXMEMORY_FLAG_LRU|MAXMEMORY_FLAG_ALLKEYS)
|
|
|
|
#define MAXMEMORY_ALLKEYS_LFU ((5<<8)|MAXMEMORY_FLAG_LFU|MAXMEMORY_FLAG_ALLKEYS)
|
|
|
|
#define MAXMEMORY_ALLKEYS_RANDOM ((6<<8)|MAXMEMORY_FLAG_ALLKEYS)
|
2016-07-15 06:12:52 -04:00
|
|
|
#define MAXMEMORY_NO_EVICTION (7<<8)
|
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CONFIG_DEFAULT_MAXMEMORY_POLICY MAXMEMORY_NO_EVICTION
|
2010-10-14 15:22:21 -04:00
|
|
|
|
2011-05-06 11:21:27 -04:00
|
|
|
/* Scripting */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define LUA_SCRIPT_TIME_LIMIT 5000 /* milliseconds */
|
2011-05-06 11:21:27 -04:00
|
|
|
|
2011-11-10 11:52:02 -05:00
|
|
|
/* Units */
|
|
|
|
#define UNIT_SECONDS 0
|
|
|
|
#define UNIT_MILLISECONDS 1
|
|
|
|
|
2011-11-18 08:10:48 -05:00
|
|
|
/* SHUTDOWN flags */
|
2015-07-28 05:10:42 -04:00
|
|
|
#define SHUTDOWN_NOFLAGS 0 /* No flags. */
|
|
|
|
#define SHUTDOWN_SAVE 1 /* Force SAVE on SHUTDOWN even if no save
|
|
|
|
points are configured. */
|
|
|
|
#define SHUTDOWN_NOSAVE 2 /* Don't SAVE on SHUTDOWN. */
|
2011-11-18 08:10:48 -05:00
|
|
|
|
2012-02-02 10:30:52 -05:00
|
|
|
/* Command call flags, see call() function */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CMD_CALL_NONE 0
|
2015-10-29 10:57:41 -04:00
|
|
|
#define CMD_CALL_SLOWLOG (1<<0)
|
|
|
|
#define CMD_CALL_STATS (1<<1)
|
|
|
|
#define CMD_CALL_PROPAGATE_AOF (1<<2)
|
|
|
|
#define CMD_CALL_PROPAGATE_REPL (1<<3)
|
|
|
|
#define CMD_CALL_PROPAGATE (CMD_CALL_PROPAGATE_AOF|CMD_CALL_PROPAGATE_REPL)
|
2015-07-27 03:41:48 -04:00
|
|
|
#define CMD_CALL_FULL (CMD_CALL_SLOWLOG | CMD_CALL_STATS | CMD_CALL_PROPAGATE)
|
2012-02-02 10:30:52 -05:00
|
|
|
|
2012-02-28 10:17:00 -05:00
|
|
|
/* Command propagation flags, see propagate() function */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define PROPAGATE_NONE 0
|
|
|
|
#define PROPAGATE_AOF 1
|
|
|
|
#define PROPAGATE_REPL 2
|
2012-02-28 10:17:00 -05:00
|
|
|
|
2014-10-08 03:09:01 -04:00
|
|
|
/* RDB active child save type. */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define RDB_CHILD_TYPE_NONE 0
|
|
|
|
#define RDB_CHILD_TYPE_DISK 1 /* RDB is written to disk. */
|
|
|
|
#define RDB_CHILD_TYPE_SOCKET 2 /* RDB is written to slave socket. */
|
2014-10-08 03:09:01 -04:00
|
|
|
|
2013-01-25 07:19:08 -05:00
|
|
|
/* Keyspace changes notification classes. Every class is associated with a
|
|
|
|
* character for configuration purposes. */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define NOTIFY_KEYSPACE (1<<0) /* K */
|
|
|
|
#define NOTIFY_KEYEVENT (1<<1) /* E */
|
|
|
|
#define NOTIFY_GENERIC (1<<2) /* g */
|
|
|
|
#define NOTIFY_STRING (1<<3) /* $ */
|
|
|
|
#define NOTIFY_LIST (1<<4) /* l */
|
|
|
|
#define NOTIFY_SET (1<<5) /* s */
|
|
|
|
#define NOTIFY_HASH (1<<6) /* h */
|
|
|
|
#define NOTIFY_ZSET (1<<7) /* z */
|
|
|
|
#define NOTIFY_EXPIRED (1<<8) /* x */
|
|
|
|
#define NOTIFY_EVICTED (1<<9) /* e */
|
2017-09-11 12:02:57 -04:00
|
|
|
#define NOTIFY_STREAM (1<<10) /* t */
|
2019-03-21 05:47:14 -04:00
|
|
|
#define NOTIFY_KEY_MISS (1<<11) /* m */
|
|
|
|
#define NOTIFY_ALL (NOTIFY_GENERIC | NOTIFY_STRING | NOTIFY_LIST | NOTIFY_SET | NOTIFY_HASH | NOTIFY_ZSET | NOTIFY_EXPIRED | NOTIFY_EVICTED | NOTIFY_STREAM | NOTIFY_KEY_MISS) /* A flag */
|
2013-01-25 07:19:08 -05:00
|
|
|
|
2014-04-24 15:19:06 -04:00
|
|
|
/* Get the first bind addr or NULL */
|
2015-07-27 03:41:48 -04:00
|
|
|
#define NET_FIRST_BIND_ADDR (server.bindaddr_count ? server.bindaddr[0] : NULL)
|
2014-04-24 15:19:06 -04:00
|
|
|
|
2012-07-23 06:54:52 -04:00
|
|
|
/* Using the following macro you can run code inside serverCron() with the
|
|
|
|
* specified period, specified in milliseconds.
|
2012-12-14 11:10:40 -05:00
|
|
|
* The actual resolution depends on server.hz. */
|
|
|
|
#define run_with_period(_ms_) if ((_ms_ <= 1000/server.hz) || !(server.cronloops%((_ms_)/(1000/server.hz))))
|
2012-07-23 06:54:52 -04:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* We can print the stacktrace, so our assert is defined this way: */
|
2015-07-26 09:29:53 -04:00
|
|
|
#define serverAssertWithInfo(_c,_o,_e) ((_e)?(void)0 : (_serverAssertWithInfo(_c,_o,#_e,__FILE__,__LINE__),_exit(1)))
|
|
|
|
#define serverAssert(_e) ((_e)?(void)0 : (_serverAssert(#_e,__FILE__,__LINE__),_exit(1)))
|
2017-01-18 11:05:10 -05:00
|
|
|
#define serverPanic(...) _serverPanic(__FILE__,__LINE__,__VA_ARGS__),_exit(1)
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* Data types
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
/* A redis object, that is a type able to hold a string / list / set */
|
|
|
|
|
|
|
|
/* The actual Redis Object */
|
2017-09-05 06:13:16 -04:00
|
|
|
#define OBJ_STRING 0 /* String object. */
|
|
|
|
#define OBJ_LIST 1 /* List object. */
|
|
|
|
#define OBJ_SET 2 /* Set object. */
|
|
|
|
#define OBJ_ZSET 3 /* Sorted set object. */
|
|
|
|
#define OBJ_HASH 4 /* Hash object. */
|
2015-07-30 05:46:31 -04:00
|
|
|
|
2016-05-18 05:45:40 -04:00
|
|
|
/* The "module" object type is a special one that signals that the object
|
|
|
|
* is one directly managed by a Redis module. In this case the value points
|
|
|
|
* to a moduleValue struct, which contains the object value (which is only
|
|
|
|
* handled by the module itself) and the RedisModuleType struct which lists
|
|
|
|
* function pointers in order to serialize, deserialize, AOF-rewrite and
|
|
|
|
* free the object.
|
|
|
|
*
|
|
|
|
* Inside the RDB file, module types are encoded as OBJ_MODULE followed
|
|
|
|
* by a 64 bit module type ID, which has a 54 bits module-specific signature
|
|
|
|
* in order to dispatch the loading to the right module, plus a 10 bits
|
|
|
|
* encoding version. */
|
2017-09-05 06:13:16 -04:00
|
|
|
#define OBJ_MODULE 5 /* Module object. */
|
|
|
|
#define OBJ_STREAM 6 /* Stream object. */
|
2016-05-18 05:45:40 -04:00
|
|
|
|
|
|
|
/* Extract encver / signature from a module type ID. */
|
|
|
|
#define REDISMODULE_TYPE_ENCVER_BITS 10
|
|
|
|
#define REDISMODULE_TYPE_ENCVER_MASK ((1<<REDISMODULE_TYPE_ENCVER_BITS)-1)
|
|
|
|
#define REDISMODULE_TYPE_ENCVER(id) (id & REDISMODULE_TYPE_ENCVER_MASK)
|
|
|
|
#define REDISMODULE_TYPE_SIGN(id) ((id & ~((uint64_t)REDISMODULE_TYPE_ENCVER_MASK)) >>REDISMODULE_TYPE_ENCVER_BITS)
|
|
|
|
|
2019-07-21 10:41:03 -04:00
|
|
|
/* Bit flags for moduleTypeAuxSaveFunc */
|
|
|
|
#define REDISMODULE_AUX_BEFORE_RDB (1<<0)
|
|
|
|
#define REDISMODULE_AUX_AFTER_RDB (1<<1)
|
|
|
|
|
2016-05-18 05:45:40 -04:00
|
|
|
struct RedisModule;
|
|
|
|
struct RedisModuleIO;
|
|
|
|
struct RedisModuleDigest;
|
2016-10-06 11:05:38 -04:00
|
|
|
struct RedisModuleCtx;
|
2016-05-18 05:45:40 -04:00
|
|
|
struct redisObject;
|
|
|
|
|
|
|
|
/* Each module type implementation should export a set of methods in order
|
|
|
|
* to serialize and deserialize the value in the RDB file, rewrite the AOF
|
|
|
|
* log, create the digest for "DEBUG DIGEST", and free the value when a key
|
|
|
|
* is deleted. */
|
|
|
|
typedef void *(*moduleTypeLoadFunc)(struct RedisModuleIO *io, int encver);
|
|
|
|
typedef void (*moduleTypeSaveFunc)(struct RedisModuleIO *io, void *value);
|
2019-07-21 10:41:03 -04:00
|
|
|
typedef int (*moduleTypeAuxLoadFunc)(struct RedisModuleIO *rdb, int encver, int when);
|
|
|
|
typedef void (*moduleTypeAuxSaveFunc)(struct RedisModuleIO *rdb, int when);
|
2016-05-18 05:45:40 -04:00
|
|
|
typedef void (*moduleTypeRewriteFunc)(struct RedisModuleIO *io, struct redisObject *key, void *value);
|
|
|
|
typedef void (*moduleTypeDigestFunc)(struct RedisModuleDigest *digest, void *value);
|
2017-01-12 06:47:46 -05:00
|
|
|
typedef size_t (*moduleTypeMemUsageFunc)(const void *value);
|
2016-05-18 05:45:40 -04:00
|
|
|
typedef void (*moduleTypeFreeFunc)(void *value);
|
|
|
|
|
|
|
|
/* The module type, which is referenced in each value of a given type, defines
|
|
|
|
* the methods and links to the module exporting the type. */
|
|
|
|
typedef struct RedisModuleType {
|
|
|
|
uint64_t id; /* Higher 54 bits of type ID + 10 lower bits of encoding ver. */
|
|
|
|
struct RedisModule *module;
|
|
|
|
moduleTypeLoadFunc rdb_load;
|
|
|
|
moduleTypeSaveFunc rdb_save;
|
|
|
|
moduleTypeRewriteFunc aof_rewrite;
|
2016-11-30 05:13:55 -05:00
|
|
|
moduleTypeMemUsageFunc mem_usage;
|
2016-05-18 05:45:40 -04:00
|
|
|
moduleTypeDigestFunc digest;
|
|
|
|
moduleTypeFreeFunc free;
|
2019-07-21 10:41:03 -04:00
|
|
|
moduleTypeAuxLoadFunc aux_load;
|
|
|
|
moduleTypeAuxSaveFunc aux_save;
|
|
|
|
int aux_save_triggers;
|
2016-05-18 05:45:40 -04:00
|
|
|
char name[10]; /* 9 bytes name + null term. Charset: A-Z a-z 0-9 _- */
|
|
|
|
} moduleType;
|
|
|
|
|
|
|
|
/* In Redis objects 'robj' structures of type OBJ_MODULE, the value pointer
|
|
|
|
* is set to the following structure, referencing the moduleType structure
|
|
|
|
* in order to work with the value, and at the same time providing a raw
|
|
|
|
* pointer to the value, as created by the module commands operating with
|
|
|
|
* the module type.
|
|
|
|
*
|
|
|
|
* So for example in order to free such a value, it is possible to use
|
|
|
|
* the following code:
|
|
|
|
*
|
|
|
|
* if (robj->type == OBJ_MODULE) {
|
|
|
|
* moduleValue *mt = robj->ptr;
|
|
|
|
* mt->type->free(mt->value);
|
|
|
|
* zfree(mt); // We need to release this in-the-middle struct as well.
|
|
|
|
* }
|
|
|
|
*/
|
|
|
|
typedef struct moduleValue {
|
|
|
|
moduleType *type;
|
|
|
|
void *value;
|
|
|
|
} moduleValue;
|
|
|
|
|
|
|
|
/* This is a wrapper for the 'rio' streams used inside rdb.c in Redis, so that
|
|
|
|
* the user does not have to take the total count of the written bytes nor
|
|
|
|
* to care about error conditions. */
|
|
|
|
typedef struct RedisModuleIO {
|
|
|
|
size_t bytes; /* Bytes read / written so far. */
|
|
|
|
rio *rio; /* Rio stream. */
|
|
|
|
moduleType *type; /* Module type doing the operation. */
|
|
|
|
int error; /* True if error condition happened. */
|
RDB modules values serialization format version 2.
The original RDB serialization format was not parsable without the
module loaded, becuase the structure was managed only by the module
itself. Moreover RDB is a streaming protocol in the sense that it is
both produce di an append-only fashion, and is also sometimes directly
sent to the socket (in the case of diskless replication).
The fact that modules values cannot be parsed without the relevant
module loaded is a problem in many ways: RDB checking tools must have
loaded modules even for doing things not involving the value at all,
like splitting an RDB into N RDBs by key or alike, or just checking the
RDB for sanity.
In theory module values could be just a blob of data with a prefixed
length in order for us to be able to skip it. However prefixing the values
with a length would mean one of the following:
1. To be able to write some data at a previous offset. This breaks
stremaing.
2. To bufferize values before outputting them. This breaks performances.
3. To have some chunked RDB output format. This breaks simplicity.
Moreover, the above solution, still makes module values a totally opaque
matter, with the fowllowing problems:
1. The RDB check tool can just skip the value without being able to at
least check the general structure. For datasets composed mostly of
modules values this means to just check the outer level of the RDB not
actually doing any checko on most of the data itself.
2. It is not possible to do any recovering or processing of data for which a
module no longer exists in the future, or is unknown.
So this commit implements a different solution. The modules RDB
serialization API is composed if well defined calls to store integers,
floats, doubles or strings. After this commit, the parts generated by
the module API have a one-byte prefix for each of the above emitted
parts, and there is a final EOF byte as well. So even if we don't know
exactly how to interpret a module value, we can always parse it at an
high level, check the overall structure, understand the types used to
store the information, and easily skip the whole value.
The change is backward compatible: older RDB files can be still loaded
since the new encoding has a new RDB type: MODULE_2 (of value 7).
The commit also implements the ability to check RDB files for sanity
taking advantage of the new feature.
2017-06-27 07:09:33 -04:00
|
|
|
int ver; /* Module serialization version: 1 (old),
|
|
|
|
* 2 (current version with opcodes annotation). */
|
2016-10-06 11:10:47 -04:00
|
|
|
struct RedisModuleCtx *ctx; /* Optional context, see RM_GetContextFromIO()*/
|
2016-11-30 14:47:02 -05:00
|
|
|
struct redisObject *key; /* Optional name of key processed */
|
2016-05-18 05:45:40 -04:00
|
|
|
} RedisModuleIO;
|
|
|
|
|
RDB modules values serialization format version 2.
The original RDB serialization format was not parsable without the
module loaded, becuase the structure was managed only by the module
itself. Moreover RDB is a streaming protocol in the sense that it is
both produce di an append-only fashion, and is also sometimes directly
sent to the socket (in the case of diskless replication).
The fact that modules values cannot be parsed without the relevant
module loaded is a problem in many ways: RDB checking tools must have
loaded modules even for doing things not involving the value at all,
like splitting an RDB into N RDBs by key or alike, or just checking the
RDB for sanity.
In theory module values could be just a blob of data with a prefixed
length in order for us to be able to skip it. However prefixing the values
with a length would mean one of the following:
1. To be able to write some data at a previous offset. This breaks
stremaing.
2. To bufferize values before outputting them. This breaks performances.
3. To have some chunked RDB output format. This breaks simplicity.
Moreover, the above solution, still makes module values a totally opaque
matter, with the fowllowing problems:
1. The RDB check tool can just skip the value without being able to at
least check the general structure. For datasets composed mostly of
modules values this means to just check the outer level of the RDB not
actually doing any checko on most of the data itself.
2. It is not possible to do any recovering or processing of data for which a
module no longer exists in the future, or is unknown.
So this commit implements a different solution. The modules RDB
serialization API is composed if well defined calls to store integers,
floats, doubles or strings. After this commit, the parts generated by
the module API have a one-byte prefix for each of the above emitted
parts, and there is a final EOF byte as well. So even if we don't know
exactly how to interpret a module value, we can always parse it at an
high level, check the overall structure, understand the types used to
store the information, and easily skip the whole value.
The change is backward compatible: older RDB files can be still loaded
since the new encoding has a new RDB type: MODULE_2 (of value 7).
The commit also implements the ability to check RDB files for sanity
taking advantage of the new feature.
2017-06-27 07:09:33 -04:00
|
|
|
/* Macro to initialize an IO context. Note that the 'ver' field is populated
|
|
|
|
* inside rdb.c according to the version of the value to load. */
|
2016-11-30 14:47:02 -05:00
|
|
|
#define moduleInitIOContext(iovar,mtype,rioptr,keyptr) do { \
|
2016-05-18 05:45:40 -04:00
|
|
|
iovar.rio = rioptr; \
|
|
|
|
iovar.type = mtype; \
|
|
|
|
iovar.bytes = 0; \
|
|
|
|
iovar.error = 0; \
|
RDB modules values serialization format version 2.
The original RDB serialization format was not parsable without the
module loaded, becuase the structure was managed only by the module
itself. Moreover RDB is a streaming protocol in the sense that it is
both produce di an append-only fashion, and is also sometimes directly
sent to the socket (in the case of diskless replication).
The fact that modules values cannot be parsed without the relevant
module loaded is a problem in many ways: RDB checking tools must have
loaded modules even for doing things not involving the value at all,
like splitting an RDB into N RDBs by key or alike, or just checking the
RDB for sanity.
In theory module values could be just a blob of data with a prefixed
length in order for us to be able to skip it. However prefixing the values
with a length would mean one of the following:
1. To be able to write some data at a previous offset. This breaks
stremaing.
2. To bufferize values before outputting them. This breaks performances.
3. To have some chunked RDB output format. This breaks simplicity.
Moreover, the above solution, still makes module values a totally opaque
matter, with the fowllowing problems:
1. The RDB check tool can just skip the value without being able to at
least check the general structure. For datasets composed mostly of
modules values this means to just check the outer level of the RDB not
actually doing any checko on most of the data itself.
2. It is not possible to do any recovering or processing of data for which a
module no longer exists in the future, or is unknown.
So this commit implements a different solution. The modules RDB
serialization API is composed if well defined calls to store integers,
floats, doubles or strings. After this commit, the parts generated by
the module API have a one-byte prefix for each of the above emitted
parts, and there is a final EOF byte as well. So even if we don't know
exactly how to interpret a module value, we can always parse it at an
high level, check the overall structure, understand the types used to
store the information, and easily skip the whole value.
The change is backward compatible: older RDB files can be still loaded
since the new encoding has a new RDB type: MODULE_2 (of value 7).
The commit also implements the ability to check RDB files for sanity
taking advantage of the new feature.
2017-06-27 07:09:33 -04:00
|
|
|
iovar.ver = 0; \
|
2016-11-30 14:47:02 -05:00
|
|
|
iovar.key = keyptr; \
|
2016-10-06 11:05:38 -04:00
|
|
|
iovar.ctx = NULL; \
|
2016-05-18 05:45:40 -04:00
|
|
|
} while(0);
|
|
|
|
|
2017-07-06 04:29:19 -04:00
|
|
|
/* This is a structure used to export DEBUG DIGEST capabilities to Redis
|
|
|
|
* modules. We want to capture both the ordered and unordered elements of
|
|
|
|
* a data structure, so that a digest can be created in a way that correctly
|
|
|
|
* reflects the values. See the DEBUG DIGEST command implementation for more
|
|
|
|
* background. */
|
|
|
|
typedef struct RedisModuleDigest {
|
|
|
|
unsigned char o[20]; /* Ordered elements. */
|
|
|
|
unsigned char x[20]; /* Xored elements. */
|
|
|
|
} RedisModuleDigest;
|
|
|
|
|
|
|
|
/* Just start with a digest composed of all zero bytes. */
|
|
|
|
#define moduleInitDigestContext(mdvar) do { \
|
|
|
|
memset(mdvar.o,0,sizeof(mdvar.o)); \
|
|
|
|
memset(mdvar.x,0,sizeof(mdvar.x)); \
|
|
|
|
} while(0);
|
|
|
|
|
2015-07-30 05:46:31 -04:00
|
|
|
/* Objects encoding. Some kind of objects like Strings and Hashes can be
|
|
|
|
* internally represented in multiple ways. The 'encoding' field of the object
|
|
|
|
* is set to one of this fields for this object. */
|
|
|
|
#define OBJ_ENCODING_RAW 0 /* Raw representation */
|
|
|
|
#define OBJ_ENCODING_INT 1 /* Encoded as integer */
|
|
|
|
#define OBJ_ENCODING_HT 2 /* Encoded as hash table */
|
|
|
|
#define OBJ_ENCODING_ZIPMAP 3 /* Encoded as zipmap */
|
2016-09-13 04:26:36 -04:00
|
|
|
#define OBJ_ENCODING_LINKEDLIST 4 /* No longer used: old list encoding. */
|
2015-07-30 05:46:31 -04:00
|
|
|
#define OBJ_ENCODING_ZIPLIST 5 /* Encoded as ziplist */
|
|
|
|
#define OBJ_ENCODING_INTSET 6 /* Encoded as intset */
|
|
|
|
#define OBJ_ENCODING_SKIPLIST 7 /* Encoded as skiplist */
|
|
|
|
#define OBJ_ENCODING_EMBSTR 8 /* Embedded sds string encoding */
|
|
|
|
#define OBJ_ENCODING_QUICKLIST 9 /* Encoded as linked list of ziplists */
|
2017-08-30 06:40:27 -04:00
|
|
|
#define OBJ_ENCODING_STREAM 10 /* Encoded as a radix tree of listpacks */
|
2015-07-30 05:46:31 -04:00
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
#define LRU_BITS 24
|
|
|
|
#define LRU_CLOCK_MAX ((1<<LRU_BITS)-1) /* Max value of obj->lru */
|
|
|
|
#define LRU_CLOCK_RESOLUTION 1000 /* LRU clock resolution in ms */
|
2015-07-30 05:46:31 -04:00
|
|
|
|
|
|
|
#define OBJ_SHARED_REFCOUNT INT_MAX
|
2010-06-21 18:07:48 -04:00
|
|
|
typedef struct redisObject {
|
|
|
|
unsigned type:4;
|
|
|
|
unsigned encoding:4;
|
2017-05-09 05:57:09 -04:00
|
|
|
unsigned lru:LRU_BITS; /* LRU time (relative to global lru_clock) or
|
2016-07-15 06:12:52 -04:00
|
|
|
* LFU data (least significant 8 bits frequency
|
2017-10-15 08:17:55 -04:00
|
|
|
* and most significant 16 bits access time). */
|
2010-06-21 18:07:48 -04:00
|
|
|
int refcount;
|
|
|
|
void *ptr;
|
|
|
|
} robj;
|
|
|
|
|
2019-07-08 06:04:37 -04:00
|
|
|
/* The a string name for an object's type as listed above
|
|
|
|
* Native types are checked against the OBJ_STRING, OBJ_LIST, OBJ_* defines,
|
|
|
|
* and Module types have their registered name returned. */
|
|
|
|
char *getObjectTypeName(robj*);
|
2019-06-10 12:41:44 -04:00
|
|
|
|
2013-01-16 12:00:20 -05:00
|
|
|
/* Macro used to initialize a Redis object allocated on the stack.
|
2010-06-21 18:07:48 -04:00
|
|
|
* Note that this macro is taken near the structure definition to make sure
|
|
|
|
* we'll update it when the structure is changed, to avoid bugs like
|
|
|
|
* bug #85 introduced exactly in this way. */
|
|
|
|
#define initStaticStringObject(_var,_ptr) do { \
|
|
|
|
_var.refcount = 1; \
|
2015-07-26 09:28:00 -04:00
|
|
|
_var.type = OBJ_STRING; \
|
|
|
|
_var.encoding = OBJ_ENCODING_RAW; \
|
2010-06-21 18:07:48 -04:00
|
|
|
_var.ptr = _ptr; \
|
2016-04-25 09:49:57 -04:00
|
|
|
} while(0)
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2016-07-07 09:04:25 -04:00
|
|
|
struct evictionPoolEntry; /* Defined in evict.c */
|
2014-03-20 06:57:29 -04:00
|
|
|
|
2018-07-16 11:56:54 -04:00
|
|
|
/* This structure is used in order to represent the output buffer of a client,
|
|
|
|
* which is actually a linked list of blocks like that, that is: client->reply. */
|
2018-02-21 13:18:34 -05:00
|
|
|
typedef struct clientReplyBlock {
|
|
|
|
size_t size, used;
|
|
|
|
char buf[];
|
|
|
|
} clientReplyBlock;
|
|
|
|
|
2014-03-20 06:57:29 -04:00
|
|
|
/* Redis database representation. There are multiple databases identified
|
|
|
|
* by integers from 0 (the default database) up to the max configured
|
|
|
|
* database. The database number is the 'id' field in the structure. */
|
2010-06-21 18:07:48 -04:00
|
|
|
typedef struct redisDb {
|
|
|
|
dict *dict; /* The keyspace for this DB */
|
|
|
|
dict *expires; /* Timeout of keys with a timeout set */
|
2016-07-13 04:45:37 -04:00
|
|
|
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
|
A reimplementation of blocking operation internals.
Redis provides support for blocking operations such as BLPOP or BRPOP.
This operations are identical to normal LPOP and RPOP operations as long
as there are elements in the target list, but if the list is empty they
block waiting for new data to arrive to the list.
All the clients blocked waiting for th same list are served in a FIFO
way, so the first that blocked is the first to be served when there is
more data pushed by another client into the list.
The previous implementation of blocking operations was conceived to
serve clients in the context of push operations. For for instance:
1) There is a client "A" blocked on list "foo".
2) The client "B" performs `LPUSH foo somevalue`.
3) The client "A" is served in the context of the "B" LPUSH,
synchronously.
Processing things in a synchronous way was useful as if "A" pushes a
value that is served by "B", from the point of view of the database is a
NOP (no operation) thing, that is, nothing is replicated, nothing is
written in the AOF file, and so forth.
However later we implemented two things:
1) Variadic LPUSH that could add multiple values to a list in the
context of a single call.
2) BRPOPLPUSH that was a version of BRPOP that also provided a "PUSH"
side effect when receiving data.
This forced us to make the synchronous implementation more complex. If
client "B" is waiting for data, and "A" pushes three elemnents in a
single call, we needed to propagate an LPUSH with a missing argument
in the AOF and replication link. We also needed to make sure to
replicate the LPUSH side of BRPOPLPUSH, but only if in turn did not
happened to serve another blocking client into another list ;)
This were complex but with a few of mutually recursive functions
everything worked as expected... until one day we introduced scripting
in Redis.
Scripting + synchronous blocking operations = Issue #614.
Basically you can't "rewrite" a script to have just a partial effect on
the replicas and AOF file if the script happened to serve a few blocked
clients.
The solution to all this problems, implemented by this commit, is to
change the way we serve blocked clients. Instead of serving the blocked
clients synchronously, in the context of the command performing the PUSH
operation, it is now an asynchronous and iterative process:
1) If a key that has clients blocked waiting for data is the subject of
a list push operation, We simply mark keys as "ready" and put it into a
queue.
2) Every command pushing stuff on lists, as a variadic LPUSH, a script,
or whatever it is, is replicated verbatim without any rewriting.
3) Every time a Redis command, a MULTI/EXEC block, or a script,
completed its execution, we run the list of keys ready to serve blocked
clients (as more data arrived), and process this list serving the
blocked clients.
4) As a result of "3" maybe more keys are ready again for other clients
(as a result of BRPOPLPUSH we may have push operations), so we iterate
back to step "3" if it's needed.
The new code has a much simpler semantics, and a simpler to understand
implementation, with the disadvantage of not being able to "optmize out"
a PUSH+BPOP as a No OP.
This commit will be tested with care before the final merge, more tests
will be added likely.
2012-09-04 04:37:49 -04:00
|
|
|
dict *ready_keys; /* Blocked keys that received a PUSH */
|
2010-06-21 18:07:48 -04:00
|
|
|
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
|
2014-03-20 06:57:29 -04:00
|
|
|
int id; /* Database ID */
|
2013-08-06 09:00:43 -04:00
|
|
|
long long avg_ttl; /* Average TTL, just for stats */
|
2018-02-18 10:15:22 -05:00
|
|
|
list *defrag_later; /* List of key names to attempt to defrag one by one, gradually. */
|
2010-06-21 18:07:48 -04:00
|
|
|
} redisDb;
|
|
|
|
|
|
|
|
/* Client MULTI/EXEC state */
|
|
|
|
typedef struct multiCmd {
|
|
|
|
robj **argv;
|
|
|
|
int argc;
|
|
|
|
struct redisCommand *cmd;
|
|
|
|
} multiCmd;
|
|
|
|
|
|
|
|
typedef struct multiState {
|
|
|
|
multiCmd *commands; /* Array of MULTI commands */
|
|
|
|
int count; /* Total number of MULTI commands */
|
2018-12-11 05:39:18 -05:00
|
|
|
int cmd_flags; /* The accumulated command flags OR-ed together.
|
|
|
|
So if at least a command has a given flag, it
|
|
|
|
will be set in this field. */
|
2013-05-27 13:33:03 -04:00
|
|
|
int minreplicas; /* MINREPLICAS for synchronous replication */
|
|
|
|
time_t minreplicas_timeout; /* MINREPLICAS timeout as unixtime. */
|
2010-06-21 18:07:48 -04:00
|
|
|
} multiState;
|
|
|
|
|
2013-12-03 11:43:53 -05:00
|
|
|
/* This structure holds the blocking operation state for a client.
|
|
|
|
* The fields used depend on client->btype. */
|
2010-11-08 17:38:01 -05:00
|
|
|
typedef struct blockingState {
|
2013-12-03 11:43:53 -05:00
|
|
|
/* Generic fields. */
|
|
|
|
mstime_t timeout; /* Blocking operation timeout. If UNIX current time
|
|
|
|
* is > timeout then the operation timed out. */
|
|
|
|
|
2018-04-29 19:10:42 -04:00
|
|
|
/* BLOCKED_LIST, BLOCKED_ZSET and BLOCKED_STREAM */
|
2012-12-02 14:36:18 -05:00
|
|
|
dict *keys; /* The keys we are waiting to terminate a blocking
|
2017-09-06 09:43:28 -04:00
|
|
|
* operation such as BLPOP or XREAD. Or NULL. */
|
2010-11-08 17:38:01 -05:00
|
|
|
robj *target; /* The key that should receive the element,
|
|
|
|
* for BRPOPLPUSH. */
|
2013-12-03 11:43:53 -05:00
|
|
|
|
2017-09-07 03:30:50 -04:00
|
|
|
/* BLOCK_STREAM */
|
|
|
|
size_t xread_count; /* XREAD COUNT option. */
|
2018-01-19 10:39:09 -05:00
|
|
|
robj *xread_group; /* XREADGROUP group name. */
|
|
|
|
robj *xread_consumer; /* XREADGROUP consumer name. */
|
2017-09-07 03:30:50 -04:00
|
|
|
mstime_t xread_retry_time, xread_retry_ttl;
|
2018-07-09 07:26:40 -04:00
|
|
|
int xread_group_noack;
|
2017-09-07 03:30:50 -04:00
|
|
|
|
2015-07-27 03:41:48 -04:00
|
|
|
/* BLOCKED_WAIT */
|
2013-12-03 11:43:53 -05:00
|
|
|
int numreplicas; /* Number of replicas we are waiting for ACK. */
|
|
|
|
long long reploffset; /* Replication offset to reach. */
|
2016-10-07 05:55:35 -04:00
|
|
|
|
|
|
|
/* BLOCKED_MODULE */
|
|
|
|
void *module_blocked_handle; /* RedisModuleBlockedClient structure.
|
|
|
|
which is opaque for the Redis core, only
|
|
|
|
handled in module.c. */
|
2010-11-08 17:38:01 -05:00
|
|
|
} blockingState;
|
|
|
|
|
A reimplementation of blocking operation internals.
Redis provides support for blocking operations such as BLPOP or BRPOP.
This operations are identical to normal LPOP and RPOP operations as long
as there are elements in the target list, but if the list is empty they
block waiting for new data to arrive to the list.
All the clients blocked waiting for th same list are served in a FIFO
way, so the first that blocked is the first to be served when there is
more data pushed by another client into the list.
The previous implementation of blocking operations was conceived to
serve clients in the context of push operations. For for instance:
1) There is a client "A" blocked on list "foo".
2) The client "B" performs `LPUSH foo somevalue`.
3) The client "A" is served in the context of the "B" LPUSH,
synchronously.
Processing things in a synchronous way was useful as if "A" pushes a
value that is served by "B", from the point of view of the database is a
NOP (no operation) thing, that is, nothing is replicated, nothing is
written in the AOF file, and so forth.
However later we implemented two things:
1) Variadic LPUSH that could add multiple values to a list in the
context of a single call.
2) BRPOPLPUSH that was a version of BRPOP that also provided a "PUSH"
side effect when receiving data.
This forced us to make the synchronous implementation more complex. If
client "B" is waiting for data, and "A" pushes three elemnents in a
single call, we needed to propagate an LPUSH with a missing argument
in the AOF and replication link. We also needed to make sure to
replicate the LPUSH side of BRPOPLPUSH, but only if in turn did not
happened to serve another blocking client into another list ;)
This were complex but with a few of mutually recursive functions
everything worked as expected... until one day we introduced scripting
in Redis.
Scripting + synchronous blocking operations = Issue #614.
Basically you can't "rewrite" a script to have just a partial effect on
the replicas and AOF file if the script happened to serve a few blocked
clients.
The solution to all this problems, implemented by this commit, is to
change the way we serve blocked clients. Instead of serving the blocked
clients synchronously, in the context of the command performing the PUSH
operation, it is now an asynchronous and iterative process:
1) If a key that has clients blocked waiting for data is the subject of
a list push operation, We simply mark keys as "ready" and put it into a
queue.
2) Every command pushing stuff on lists, as a variadic LPUSH, a script,
or whatever it is, is replicated verbatim without any rewriting.
3) Every time a Redis command, a MULTI/EXEC block, or a script,
completed its execution, we run the list of keys ready to serve blocked
clients (as more data arrived), and process this list serving the
blocked clients.
4) As a result of "3" maybe more keys are ready again for other clients
(as a result of BRPOPLPUSH we may have push operations), so we iterate
back to step "3" if it's needed.
The new code has a much simpler semantics, and a simpler to understand
implementation, with the disadvantage of not being able to "optmize out"
a PUSH+BPOP as a No OP.
This commit will be tested with care before the final merge, more tests
will be added likely.
2012-09-04 04:37:49 -04:00
|
|
|
/* The following structure represents a node in the server.ready_keys list,
|
|
|
|
* where we accumulate all the keys that had clients blocked with a blocking
|
|
|
|
* operation such as B[LR]POP, but received new data in the context of the
|
|
|
|
* last executed command.
|
|
|
|
*
|
|
|
|
* After the execution of every command or script, we run this list to check
|
|
|
|
* if as a result we should serve data to clients blocked, unblocking them.
|
|
|
|
* Note that server.ready_keys will not have duplicates as there dictionary
|
|
|
|
* also called ready_keys in every structure representing a Redis database,
|
|
|
|
* where we make sure to remember if a given key was already added in the
|
|
|
|
* server.ready_keys list. */
|
|
|
|
typedef struct readyList {
|
|
|
|
redisDb *db;
|
|
|
|
robj *key;
|
|
|
|
} readyList;
|
|
|
|
|
2019-01-10 10:33:48 -05:00
|
|
|
/* This structure represents a Redis user. This is useful for ACLs, the
|
|
|
|
* user is associated to the connection after the connection is authenticated.
|
|
|
|
* If there is no associated user, the connection uses the default user. */
|
2019-01-23 02:10:57 -05:00
|
|
|
#define USER_COMMAND_BITS_COUNT 1024 /* The total number of command bits
|
|
|
|
in the user structure. The last valid
|
|
|
|
command ID we can set in the user
|
|
|
|
is USER_COMMAND_BITS_COUNT-1. */
|
2019-01-10 10:33:48 -05:00
|
|
|
#define USER_FLAG_ENABLED (1<<0) /* The user is active. */
|
2019-01-31 10:49:22 -05:00
|
|
|
#define USER_FLAG_DISABLED (1<<1) /* The user is disabled. */
|
|
|
|
#define USER_FLAG_ALLKEYS (1<<2) /* The user can mention any key. */
|
|
|
|
#define USER_FLAG_ALLCOMMANDS (1<<3) /* The user can run all commands. */
|
|
|
|
#define USER_FLAG_NOPASS (1<<4) /* The user requires no password, any
|
2019-01-15 07:16:31 -05:00
|
|
|
provided password will work. For the
|
|
|
|
default user, this also means that
|
|
|
|
no AUTH is needed, and every
|
|
|
|
connection is immediately
|
|
|
|
authenticated. */
|
2019-01-10 10:33:48 -05:00
|
|
|
typedef struct user {
|
2019-01-15 12:16:20 -05:00
|
|
|
sds name; /* The username as an SDS string. */
|
2019-01-10 10:33:48 -05:00
|
|
|
uint64_t flags; /* See USER_FLAG_* */
|
|
|
|
|
|
|
|
/* The bit in allowed_commands is set if this user has the right to
|
|
|
|
* execute this command. In commands having subcommands, if this bit is
|
|
|
|
* set, then all the subcommands are also available.
|
|
|
|
*
|
|
|
|
* If the bit for a given command is NOT set and the command has
|
|
|
|
* subcommands, Redis will also check allowed_subcommands in order to
|
|
|
|
* understand if the command can be executed. */
|
2019-01-23 02:10:57 -05:00
|
|
|
uint64_t allowed_commands[USER_COMMAND_BITS_COUNT/64];
|
2019-01-10 10:33:48 -05:00
|
|
|
|
|
|
|
/* This array points, for each command ID (corresponding to the command
|
|
|
|
* bit set in allowed_commands), to an array of SDS strings, terminated by
|
|
|
|
* a NULL pointer, with all the sub commands that can be executed for
|
|
|
|
* this command. When no subcommands matching is used, the field is just
|
2019-01-23 02:10:57 -05:00
|
|
|
* set to NULL to avoid allocating USER_COMMAND_BITS_COUNT pointers. */
|
2019-01-10 10:33:48 -05:00
|
|
|
sds **allowed_subcommands;
|
|
|
|
list *passwords; /* A list of SDS valid passwords for this user. */
|
2019-01-10 11:01:12 -05:00
|
|
|
list *patterns; /* A list of allowed key patterns. If this field is NULL
|
|
|
|
the user cannot mention any key in a command, unless
|
|
|
|
the flag ALLKEYS is set in the user. */
|
2019-01-10 10:33:48 -05:00
|
|
|
} user;
|
|
|
|
|
2013-01-16 12:00:20 -05:00
|
|
|
/* With multiplexing we need to take per-client state.
|
2013-11-20 01:14:27 -05:00
|
|
|
* Clients are taken in a linked list. */
|
2015-07-26 09:20:46 -04:00
|
|
|
typedef struct client {
|
2014-06-16 08:22:55 -04:00
|
|
|
uint64_t id; /* Client incremental unique ID. */
|
2015-08-06 03:41:11 -04:00
|
|
|
int fd; /* Client socket. */
|
2018-11-21 05:53:18 -05:00
|
|
|
int resp; /* RESP protocol version. Can be 2 or 3. */
|
2015-08-06 03:41:11 -04:00
|
|
|
redisDb *db; /* Pointer to currently SELECTed DB. */
|
|
|
|
robj *name; /* As set by CLIENT SETNAME. */
|
|
|
|
sds querybuf; /* Buffer we use to accumulate client queries. */
|
2018-08-13 12:43:36 -04:00
|
|
|
size_t qb_pos; /* The position we have read in querybuf. */
|
2018-07-03 07:25:41 -04:00
|
|
|
sds pending_querybuf; /* If this client is flagged as master, this buffer
|
|
|
|
represents the yet not applied portion of the
|
|
|
|
replication stream that we are receiving from
|
|
|
|
the master. */
|
2015-08-06 03:41:11 -04:00
|
|
|
size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
|
|
|
|
int argc; /* Num of arguments of current command. */
|
|
|
|
robj **argv; /* Arguments of current command. */
|
|
|
|
struct redisCommand *cmd, *lastcmd; /* Last command executed. */
|
2019-01-14 07:21:21 -05:00
|
|
|
user *user; /* User associated with this connection. If the
|
|
|
|
user is set to NULL the connection can do
|
|
|
|
anything (admin). */
|
2015-08-06 03:41:11 -04:00
|
|
|
int reqtype; /* Request protocol type: PROTO_REQ_* */
|
|
|
|
int multibulklen; /* Number of multi bulk arguments left to read. */
|
|
|
|
long bulklen; /* Length of bulk argument in multi bulk request. */
|
|
|
|
list *reply; /* List of reply objects to send to the client. */
|
|
|
|
unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
|
2015-01-18 16:46:25 -05:00
|
|
|
size_t sentlen; /* Amount of bytes already sent in the current
|
2013-01-30 12:33:16 -05:00
|
|
|
buffer or object being sent. */
|
2015-08-06 03:41:11 -04:00
|
|
|
time_t ctime; /* Client creation time. */
|
|
|
|
time_t lastinteraction; /* Time of the last interaction, used for timeout */
|
2012-01-23 10:12:37 -05:00
|
|
|
time_t obuf_soft_limit_reached_time;
|
2019-07-03 13:16:20 -04:00
|
|
|
uint64_t flags; /* Client flags: CLIENT_* macros. */
|
2019-01-18 05:49:30 -05:00
|
|
|
int authenticated; /* Needed when the default user requires auth. */
|
2015-08-06 03:41:11 -04:00
|
|
|
int replstate; /* Replication state if this is a slave. */
|
2019-08-05 11:38:15 -04:00
|
|
|
int repl_put_online_on_ack; /* Install slave write handler on first ACK. */
|
2015-08-06 03:41:11 -04:00
|
|
|
int repldbfd; /* Replication DB file descriptor. */
|
|
|
|
off_t repldboff; /* Replication DB file offset. */
|
|
|
|
off_t repldbsize; /* Replication DB file size. */
|
|
|
|
sds replpreamble; /* Replication DB preamble. */
|
Fix PSYNC2 incomplete command bug as described in #3899.
This bug was discovered by @kevinmcgehee and constituted a major hidden
bug in the PSYNC2 implementation, caused by the propagation from the
master of incomplete commands to slaves.
The bug had several results:
1. Borrowing from Kevin text in the issue: "Given that slaves blindly
copy over their master's input into their own replication backlog over
successive read syscalls, it's possible that with large commands or
small TCP buffers, partial commands are present in this buffer. If the
master were to fail before successfully propagating the entire command
to a slave, the slaves will never execute the partial command (since the
client is invalidated) but will copy it to replication backlog which may
relay those invalid bytes to its slaves on PSYNC2, corrupting the
backlog and possibly other valid commands that follow the failover.
Simple command boundaries aren't sufficient to capture this, either,
because in the case of a MULTI/EXEC block, if the master successfully
propagates a subset of the commands but not the EXEC, then the
transaction in the backlog becomes corrupt and could corrupt other
slaves that consume this data."
2. As identified by @yangsiran later, there is another effect of the
bug. For the same mechanism of the first problem, a slave having another
slave, could receive a full resynchronization request with an already
half-applied command in the backlog. Once the RDB is ready, it will be
sent to the slave, and the replication will continue sending to the
sub-slave the other half of the command, which is not valid.
The fix, designed by @yangsiran and @antirez, and implemented by
@antirez, uses a secondary buffer in order to feed the sub-masters and
update the replication backlog and offsets, only when a given part of
the query buffer is actually *applied* to the state of the instance,
that is, when the command gets processed and the command is not pending
in the Redis transaction buffer because of CLIENT_MULTI state.
Given that now the backlog and offsets representation are in agreement
with the actual processed commands, both issue 1 and 2 should no longer
be possible.
Thanks to @kevinmcgehee, @yangsiran and @oranagra for their work in
identifying and designing a fix for this problem.
2017-04-19 04:25:45 -04:00
|
|
|
long long read_reploff; /* Read replication offset if this is a master. */
|
|
|
|
long long reploff; /* Applied replication offset if this is a master. */
|
2015-08-06 03:41:11 -04:00
|
|
|
long long repl_ack_off; /* Replication ack offset, if this is a slave. */
|
|
|
|
long long repl_ack_time;/* Replication ack time, if this is a slave. */
|
2015-08-04 10:56:00 -04:00
|
|
|
long long psync_initial_offset; /* FULLRESYNC reply offset other slaves
|
|
|
|
copying this slave output buffer
|
|
|
|
should use. */
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
char replid[CONFIG_RUN_ID_SIZE+1]; /* Master replication ID (if master). */
|
|
|
|
int slave_listening_port; /* As configured with: SLAVECONF listening-port */
|
2016-07-27 10:41:20 -04:00
|
|
|
char slave_ip[NET_IP_STR_LEN]; /* Optionally given by REPLCONF ip-address */
|
2015-08-06 03:23:23 -04:00
|
|
|
int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
|
2010-06-21 18:07:48 -04:00
|
|
|
multiState mstate; /* MULTI/EXEC state */
|
2015-07-27 03:41:48 -04:00
|
|
|
int btype; /* Type of blocking op if CLIENT_BLOCKED. */
|
2013-12-04 09:52:20 -05:00
|
|
|
blockingState bpop; /* blocking state */
|
|
|
|
long long woff; /* Last write global replication offset. */
|
2010-06-21 18:07:48 -04:00
|
|
|
list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
|
|
|
|
dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
|
|
|
|
list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
|
2014-04-28 11:36:57 -04:00
|
|
|
sds peerid; /* Cached peer ID. */
|
2017-11-30 05:11:05 -05:00
|
|
|
listNode *client_list_node; /* list node in client list */
|
2010-08-30 08:44:34 -04:00
|
|
|
|
2019-06-29 20:08:41 -04:00
|
|
|
/* If this client is in tracking mode and this field is non zero,
|
|
|
|
* invalidation messages for keys fetched by this client will be send to
|
|
|
|
* the specified client ID. */
|
|
|
|
uint64_t client_tracking_redirection;
|
|
|
|
|
2010-08-30 08:44:34 -04:00
|
|
|
/* Response buffer */
|
2015-02-24 11:31:04 -05:00
|
|
|
int bufpos;
|
2015-07-27 03:41:48 -04:00
|
|
|
char buf[PROTO_REPLY_CHUNK_BYTES];
|
2015-07-26 09:20:46 -04:00
|
|
|
} client;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
struct saveparam {
|
|
|
|
time_t seconds;
|
|
|
|
int changes;
|
|
|
|
};
|
|
|
|
|
2016-06-13 03:39:44 -04:00
|
|
|
struct moduleLoadQueueEntry {
|
2016-06-05 03:03:34 -04:00
|
|
|
sds path;
|
|
|
|
int argc;
|
2016-06-13 03:51:06 -04:00
|
|
|
robj **argv;
|
2016-06-05 03:03:34 -04:00
|
|
|
};
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
struct sharedObjectsStruct {
|
2018-11-26 12:57:37 -05:00
|
|
|
robj *crlf, *ok, *err, *emptybulk, *czero, *cone, *pong, *space,
|
2019-09-02 06:50:47 -04:00
|
|
|
*colon, *queued, *null[4], *nullarray[4], *emptymap[4], *emptyset[4],
|
2018-11-26 12:55:05 -05:00
|
|
|
*emptyarray, *wrongtypeerr, *nokeyerr, *syntaxerr, *sameobjecterr,
|
2012-03-07 07:05:46 -05:00
|
|
|
*outofrangeerr, *noscripterr, *loadingerr, *slowscripterr, *bgsaveerr,
|
2013-05-29 05:36:44 -04:00
|
|
|
*masterdownerr, *roslaveerr, *execaborterr, *noautherr, *noreplicaserr,
|
2014-05-12 04:01:59 -04:00
|
|
|
*busykeyerr, *oomerr, *plus, *messagebulk, *pmessagebulk, *subscribebulk,
|
2015-10-02 09:27:57 -04:00
|
|
|
*unsubscribebulk, *psubscribebulk, *punsubscribebulk, *del, *unlink,
|
2018-08-14 08:58:58 -04:00
|
|
|
*rpop, *lpop, *lpush, *rpoplpush, *zpopmin, *zpopmax, *emptyscan,
|
2015-07-27 03:41:48 -04:00
|
|
|
*select[PROTO_SHARED_SELECT_CMDS],
|
|
|
|
*integers[OBJ_SHARED_INTEGERS],
|
|
|
|
*mbulkhdr[OBJ_SHARED_BULKHDR_LEN], /* "*<value>\r\n" */
|
|
|
|
*bulkhdr[OBJ_SHARED_BULKHDR_LEN]; /* "$<value>\r\n" */
|
2015-08-04 03:20:55 -04:00
|
|
|
sds minstring, maxstring;
|
2010-06-21 18:07:48 -04:00
|
|
|
};
|
|
|
|
|
2011-04-28 13:00:33 -04:00
|
|
|
/* ZSETs use a specialized version of Skiplists */
|
|
|
|
typedef struct zskiplistNode {
|
2015-08-04 03:20:55 -04:00
|
|
|
sds ele;
|
2011-04-28 13:00:33 -04:00
|
|
|
double score;
|
|
|
|
struct zskiplistNode *backward;
|
|
|
|
struct zskiplistLevel {
|
|
|
|
struct zskiplistNode *forward;
|
2017-12-08 03:09:27 -05:00
|
|
|
unsigned long span;
|
2011-04-28 13:00:33 -04:00
|
|
|
} level[];
|
|
|
|
} zskiplistNode;
|
|
|
|
|
|
|
|
typedef struct zskiplist {
|
|
|
|
struct zskiplistNode *header, *tail;
|
|
|
|
unsigned long length;
|
|
|
|
int level;
|
|
|
|
} zskiplist;
|
|
|
|
|
|
|
|
typedef struct zset {
|
|
|
|
dict *dict;
|
|
|
|
zskiplist *zsl;
|
|
|
|
} zset;
|
|
|
|
|
2012-01-23 10:12:37 -05:00
|
|
|
typedef struct clientBufferLimitsConfig {
|
2012-01-24 04:43:30 -05:00
|
|
|
unsigned long long hard_limit_bytes;
|
|
|
|
unsigned long long soft_limit_bytes;
|
2012-01-23 10:12:37 -05:00
|
|
|
time_t soft_limit_seconds;
|
|
|
|
} clientBufferLimitsConfig;
|
|
|
|
|
2015-07-28 10:58:04 -04:00
|
|
|
extern clientBufferLimitsConfig clientBufferLimitsDefaults[CLIENT_TYPE_OBUF_COUNT];
|
2013-05-13 12:34:18 -04:00
|
|
|
|
2012-02-28 18:46:50 -05:00
|
|
|
/* The redisOp structure defines a Redis Operation, that is an instance of
|
|
|
|
* a command with an argument vector, database ID, propagation target
|
2015-07-27 03:41:48 -04:00
|
|
|
* (PROPAGATE_*), and command pointer.
|
2012-02-28 18:46:50 -05:00
|
|
|
*
|
|
|
|
* Currently only used to additionally propagate more commands to AOF/Replication
|
|
|
|
* after the propagation of the executed command. */
|
|
|
|
typedef struct redisOp {
|
2012-02-28 12:03:08 -05:00
|
|
|
robj **argv;
|
|
|
|
int argc, dbid, target;
|
|
|
|
struct redisCommand *cmd;
|
2012-02-28 18:46:50 -05:00
|
|
|
} redisOp;
|
|
|
|
|
|
|
|
/* Defines an array of Redis operations. There is an API to add to this
|
|
|
|
* structure in a easy way.
|
|
|
|
*
|
|
|
|
* redisOpArrayInit();
|
|
|
|
* redisOpArrayAppend();
|
|
|
|
* redisOpArrayFree();
|
|
|
|
*/
|
|
|
|
typedef struct redisOpArray {
|
|
|
|
redisOp *ops;
|
|
|
|
int numops;
|
|
|
|
} redisOpArray;
|
2012-02-28 12:03:08 -05:00
|
|
|
|
2016-09-15 03:42:51 -04:00
|
|
|
/* This structure is returned by the getMemoryOverheadData() function in
|
|
|
|
* order to return memory overhead information. */
|
|
|
|
struct redisMemOverhead {
|
2016-09-16 04:43:19 -04:00
|
|
|
size_t peak_allocated;
|
2016-09-15 03:42:51 -04:00
|
|
|
size_t total_allocated;
|
|
|
|
size_t startup_allocated;
|
|
|
|
size_t repl_backlog;
|
|
|
|
size_t clients_slaves;
|
|
|
|
size_t clients_normal;
|
|
|
|
size_t aof_buffer;
|
2018-07-22 14:16:00 -04:00
|
|
|
size_t lua_caches;
|
2016-09-15 03:42:51 -04:00
|
|
|
size_t overhead_total;
|
|
|
|
size_t dataset;
|
2016-09-16 10:36:53 -04:00
|
|
|
size_t total_keys;
|
|
|
|
size_t bytes_per_key;
|
2016-09-15 11:33:11 -04:00
|
|
|
float dataset_perc;
|
2016-09-16 04:43:19 -04:00
|
|
|
float peak_perc;
|
2018-02-18 10:36:21 -05:00
|
|
|
float total_frag;
|
2018-12-02 08:29:20 -05:00
|
|
|
ssize_t total_frag_bytes;
|
2018-02-18 10:36:21 -05:00
|
|
|
float allocator_frag;
|
2018-12-02 08:29:20 -05:00
|
|
|
ssize_t allocator_frag_bytes;
|
2018-02-18 10:36:21 -05:00
|
|
|
float allocator_rss;
|
2018-12-02 08:29:20 -05:00
|
|
|
ssize_t allocator_rss_bytes;
|
2018-02-18 10:36:21 -05:00
|
|
|
float rss_extra;
|
|
|
|
size_t rss_extra_bytes;
|
2016-09-15 03:42:51 -04:00
|
|
|
size_t num_dbs;
|
|
|
|
struct {
|
|
|
|
size_t dbid;
|
|
|
|
size_t overhead_ht_main;
|
|
|
|
size_t overhead_ht_expires;
|
|
|
|
} *db;
|
|
|
|
};
|
|
|
|
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
/* This structure can be optionally passed to RDB save/load functions in
|
|
|
|
* order to implement additional functionalities, by storing and loading
|
|
|
|
* metadata to the RDB file.
|
|
|
|
*
|
|
|
|
* Currently the only use is to select a DB at load time, useful in
|
|
|
|
* replication in order to make sure that chained slaves (slaves of slaves)
|
|
|
|
* select the correct DB and are able to accept the stream coming from the
|
|
|
|
* top-level master. */
|
|
|
|
typedef struct rdbSaveInfo {
|
2016-11-10 06:35:29 -05:00
|
|
|
/* Used saving and loading. */
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
int repl_stream_db; /* DB to select in server.master client. */
|
2016-11-10 06:35:29 -05:00
|
|
|
|
|
|
|
/* Used only loading. */
|
|
|
|
int repl_id_is_set; /* True if repl_id field is set. */
|
|
|
|
char repl_id[CONFIG_RUN_ID_SIZE+1]; /* Replication ID. */
|
|
|
|
long long repl_offset; /* Replication offset. */
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
} rdbSaveInfo;
|
|
|
|
|
2016-11-10 06:35:29 -05:00
|
|
|
#define RDB_SAVE_INFO_INIT {-1,0,"000000000000000000000000000000",-1}
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
|
2018-06-19 09:59:45 -04:00
|
|
|
struct malloc_stats {
|
2018-02-18 10:36:21 -05:00
|
|
|
size_t zmalloc_used;
|
|
|
|
size_t process_rss;
|
|
|
|
size_t allocator_allocated;
|
|
|
|
size_t allocator_active;
|
|
|
|
size_t allocator_resident;
|
2018-06-19 09:59:45 -04:00
|
|
|
};
|
2018-02-18 10:36:21 -05:00
|
|
|
|
2011-03-29 11:51:15 -04:00
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* Global server state
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
2013-10-09 09:37:20 -04:00
|
|
|
struct clusterState;
|
|
|
|
|
2014-08-07 06:14:21 -04:00
|
|
|
/* AIX defines hz to __hz, we don't use this define and in order to allow
|
|
|
|
* Redis build on AIX we need to undef it. */
|
2014-07-29 17:39:37 -04:00
|
|
|
#ifdef _AIX
|
|
|
|
#undef hz
|
|
|
|
#endif
|
|
|
|
|
2016-09-19 07:45:20 -04:00
|
|
|
#define CHILD_INFO_MAGIC 0xC17DDA7A12345678LL
|
|
|
|
#define CHILD_INFO_TYPE_RDB 0
|
|
|
|
#define CHILD_INFO_TYPE_AOF 1
|
2019-07-17 01:51:02 -04:00
|
|
|
#define CHILD_INFO_TYPE_MODULE 3
|
2016-09-19 07:45:20 -04:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
struct redisServer {
|
2011-01-13 18:19:19 -05:00
|
|
|
/* General */
|
2014-05-22 12:48:37 -04:00
|
|
|
pid_t pid; /* Main process pid. */
|
2013-05-09 10:57:59 -04:00
|
|
|
char *configfile; /* Absolute config file path, or NULL */
|
2015-10-13 04:58:08 -04:00
|
|
|
char *executable; /* Absolute executable file path. */
|
|
|
|
char **exec_argv; /* Executable argv vector (copy). */
|
2018-07-30 07:37:30 -04:00
|
|
|
int dynamic_hz; /* Change hz value depending on # of clients. */
|
2018-07-23 08:13:58 -04:00
|
|
|
int config_hz; /* Configured HZ value. May be different than
|
|
|
|
the actual 'hz' field value if dynamic-hz
|
|
|
|
is enabled. */
|
2012-12-14 11:10:40 -05:00
|
|
|
int hz; /* serverCron() calls frequency in hertz */
|
2011-01-13 18:19:19 -05:00
|
|
|
redisDb *db;
|
2013-03-06 10:28:26 -05:00
|
|
|
dict *commands; /* Command table */
|
|
|
|
dict *orig_commands; /* Command table before command renaming. */
|
2011-01-13 18:19:19 -05:00
|
|
|
aeEventLoop *el;
|
2019-03-30 06:26:58 -04:00
|
|
|
_Atomic unsigned int lruclock; /* Clock for LRU eviction */
|
2011-12-21 04:05:32 -05:00
|
|
|
int shutdown_asap; /* SHUTDOWN needed ASAP */
|
|
|
|
int activerehashing; /* Incremental rehash in serverCron() */
|
2016-12-29 20:37:52 -05:00
|
|
|
int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */
|
2011-12-21 04:05:32 -05:00
|
|
|
char *pidfile; /* PID file path */
|
2012-02-02 04:02:40 -05:00
|
|
|
int arch_bits; /* 32 or 64 depending on sizeof(long) */
|
2012-03-07 07:05:46 -05:00
|
|
|
int cronloops; /* Number of times the cron function run */
|
2015-07-27 03:41:48 -04:00
|
|
|
char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */
|
2012-07-23 06:54:52 -04:00
|
|
|
int sentinel_mode; /* True if this instance is a Sentinel. */
|
2016-09-13 11:39:22 -04:00
|
|
|
size_t initial_memory_usage; /* Bytes used after initialization. */
|
2016-12-19 10:41:47 -05:00
|
|
|
int always_show_logo; /* Show logo even for non-stdout logging. */
|
2016-03-06 07:44:24 -05:00
|
|
|
/* Modules */
|
2018-12-20 06:06:24 -05:00
|
|
|
dict *moduleapi; /* Exported core APIs dictionary for modules. */
|
|
|
|
dict *sharedapi; /* Like moduleapi but containing the APIs that
|
|
|
|
modules share with each other. */
|
2016-03-06 07:44:24 -05:00
|
|
|
list *loadmodule_queue; /* List of modules to load at startup. */
|
2017-04-10 03:33:21 -04:00
|
|
|
int module_blocked_pipe[2]; /* Pipe used to awake the event loop if a
|
|
|
|
client blocked on a module command needs
|
|
|
|
to be processed. */
|
2019-07-17 01:51:02 -04:00
|
|
|
pid_t module_child_pid; /* PID of module child */
|
2011-01-13 18:19:19 -05:00
|
|
|
/* Networking */
|
2011-12-21 04:05:32 -05:00
|
|
|
int port; /* TCP listening port */
|
2014-01-31 08:55:43 -05:00
|
|
|
int tcp_backlog; /* TCP listen() backlog */
|
2015-07-27 03:41:48 -04:00
|
|
|
char *bindaddr[CONFIG_BINDADDR_MAX]; /* Addresses we should bind to */
|
2013-07-04 12:50:15 -04:00
|
|
|
int bindaddr_count; /* Number of addresses in server.bindaddr[] */
|
2011-12-21 04:05:32 -05:00
|
|
|
char *unixsocket; /* UNIX socket path */
|
|
|
|
mode_t unixsocketperm; /* UNIX socket permission */
|
2015-07-27 03:41:48 -04:00
|
|
|
int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
|
2013-07-05 05:47:20 -04:00
|
|
|
int ipfd_count; /* Used slots in ipfd[] */
|
2011-12-21 04:05:32 -05:00
|
|
|
int sofd; /* Unix socket file descriptor */
|
2015-07-27 03:41:48 -04:00
|
|
|
int cfd[CONFIG_BINDADDR_MAX];/* Cluster bus listening socket */
|
2013-07-05 05:47:20 -04:00
|
|
|
int cfd_count; /* Used slots in cfd[] */
|
2011-12-21 04:05:32 -05:00
|
|
|
list *clients; /* List of active clients */
|
2012-01-23 10:12:37 -05:00
|
|
|
list *clients_to_close; /* Clients to close asynchronously */
|
2015-09-28 12:25:57 -04:00
|
|
|
list *clients_pending_write; /* There is to write or install handler. */
|
2019-03-30 06:26:58 -04:00
|
|
|
list *clients_pending_read; /* Client has pending read socket buffers. */
|
2011-12-21 04:05:32 -05:00
|
|
|
list *slaves, *monitors; /* List of slaves and MONITORs */
|
2015-07-26 09:20:46 -04:00
|
|
|
client *current_client; /* Current client, only used on crash report */
|
2018-06-27 07:26:01 -04:00
|
|
|
rax *clients_index; /* Active clients dictionary by client ID. */
|
2014-02-04 09:52:09 -05:00
|
|
|
int clients_paused; /* True if clients are currently paused */
|
|
|
|
mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
|
2012-11-11 18:45:10 -05:00
|
|
|
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
|
|
|
|
dict *migrate_cached_sockets;/* MIGRATE cached sockets */
|
2019-03-30 06:26:58 -04:00
|
|
|
_Atomic uint64_t next_client_id; /* Next client unique ID. Incremental. */
|
New security feature: Redis protected mode.
An exposed Redis instance on the internet can be cause of serious
issues. Since Redis, by default, binds to all the interfaces, it is easy
to forget an instance without any protection layer, for error.
Protected mode try to address this feature in a soft way, providing a
layer of protection, but giving clues to Redis users about why the
server is not accepting connections.
When protected mode is enabeld (the default), and if there are no
minumum hints about the fact the server is properly configured (no
"bind" directive is used in order to restrict the server to certain
interfaces, nor a password is set), clients connecting from external
intefaces are refused with an error explaining what to do in order to
fix the issue.
Clients connecting from the IPv4 and IPv6 lookback interfaces are still
accepted normally, similarly Unix domain socket connections are not
restricted in any way.
2016-01-07 07:00:08 -05:00
|
|
|
int protected_mode; /* Don't accept external connections. */
|
2019-02-21 11:23:17 -05:00
|
|
|
int gopher_enabled; /* If true the server will reply to gopher
|
|
|
|
queries. Will still serve RESP2 queries. */
|
2017-10-24 02:35:05 -04:00
|
|
|
int io_threads_num; /* Number of IO threads to use. */
|
2019-04-30 09:39:27 -04:00
|
|
|
int io_threads_do_reads; /* Read and parse from IO threads? */
|
2017-10-24 02:35:05 -04:00
|
|
|
|
2010-11-08 05:52:03 -05:00
|
|
|
/* RDB / AOF loading information */
|
2011-12-21 04:05:32 -05:00
|
|
|
int loading; /* We are loading data from disk if true */
|
2010-11-08 05:52:03 -05:00
|
|
|
off_t loading_total_bytes;
|
|
|
|
off_t loading_loaded_bytes;
|
|
|
|
time_t loading_start_time;
|
2012-12-12 08:59:22 -05:00
|
|
|
off_t loading_process_events_interval_bytes;
|
2010-11-04 14:59:21 -04:00
|
|
|
/* Fast pointers to often looked up command */
|
2018-05-11 11:31:46 -04:00
|
|
|
struct redisCommand *delCommand, *multiCommand, *lpushCommand,
|
|
|
|
*lpopCommand, *rpopCommand, *zpopminCommand,
|
|
|
|
*zpopmaxCommand, *sremCommand, *execCommand,
|
2018-10-10 06:51:02 -04:00
|
|
|
*expireCommand, *pexpireCommand, *xclaimCommand,
|
|
|
|
*xgroupCommand;
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Fields used only for stats */
|
2011-12-21 04:05:32 -05:00
|
|
|
time_t stat_starttime; /* Server start time */
|
|
|
|
long long stat_numcommands; /* Number of processed commands */
|
|
|
|
long long stat_numconnections; /* Number of connections received */
|
|
|
|
long long stat_expiredkeys; /* Number of expired keys */
|
Track number of logically expired keys still in memory.
This commit adds two new fields in the INFO output, stats section:
expired_stale_perc:0.34
expired_time_cap_reached_count:58
The first field is an estimate of the number of keys that are yet in
memory but are already logically expired. They reason why those keys are
yet not reclaimed is because the active expire cycle can't spend more
time on the process of reclaiming the keys, and at the same time nobody
is accessing such keys. However as the active expire cycle runs, while
it will eventually have to return to the caller, because of time limit
or because there are less than 25% of keys logically expired in each
given database, it collects the stats in order to populate this INFO
field.
Note that expired_stale_perc is a running average, where the current
sample accounts for 5% and the history for 95%, so you'll see it
changing smoothly over time.
The other field, expired_time_cap_reached_count, counts the number
of times the expire cycle had to stop, even if still it was finding a
sizeable number of keys yet to expire, because of the time limit.
This allows people handling operations to understand if the Redis
server, during mass-expiration events, is able to collect keys fast
enough usually. It is normal for this field to increment during mass
expires, but normally it should very rarely increment. When instead it
constantly increments, it means that the current workloads is using
a very important percentage of CPU time to expire keys.
This feature was created thanks to the hints of Rashmi Ramesh and
Bart Robinson from Twitter. In private email exchanges, they noted how
it was important to improve the observability of this parameter in the
Redis server. Actually in big deployments, the amount of keys that are
yet to expire in each server, even if they are logically expired, may
account for a very big amount of wasted memory.
2018-02-19 05:12:49 -05:00
|
|
|
double stat_expired_stale_perc; /* Percentage of keys probably expired */
|
|
|
|
long long stat_expired_time_cap_reached_count; /* Early expire cylce stops.*/
|
2011-12-21 04:05:32 -05:00
|
|
|
long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
|
|
|
|
long long stat_keyspace_hits; /* Number of successful lookups of keys */
|
|
|
|
long long stat_keyspace_misses; /* Number of failed lookups of keys */
|
2016-12-29 20:37:52 -05:00
|
|
|
long long stat_active_defrag_hits; /* number of allocations moved */
|
|
|
|
long long stat_active_defrag_misses; /* number of allocations scanned but not moved */
|
|
|
|
long long stat_active_defrag_key_hits; /* number of keys with moved allocations */
|
|
|
|
long long stat_active_defrag_key_misses;/* number of keys scanned and not moved */
|
2018-02-18 10:15:22 -05:00
|
|
|
long long stat_active_defrag_scanned; /* number of dictEntries scanned */
|
2011-12-21 04:05:32 -05:00
|
|
|
size_t stat_peak_memory; /* Max used memory record */
|
2013-01-16 12:00:20 -05:00
|
|
|
long long stat_fork_time; /* Time needed to perform latest fork() */
|
2014-07-08 11:05:56 -04:00
|
|
|
double stat_fork_rate; /* Fork rate in GB/sec. */
|
2011-12-21 04:05:32 -05:00
|
|
|
long long stat_rejected_conn; /* Clients rejected because of maxclients */
|
2013-02-12 09:24:25 -05:00
|
|
|
long long stat_sync_full; /* Number of full resyncs with slaves. */
|
|
|
|
long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
|
|
|
|
long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
|
2011-12-21 04:05:32 -05:00
|
|
|
list *slowlog; /* SLOWLOG list of commands */
|
|
|
|
long long slowlog_entry_id; /* SLOWLOG current entry ID */
|
|
|
|
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
|
|
|
|
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
|
2018-06-19 09:59:45 -04:00
|
|
|
struct malloc_stats cron_malloc_stats; /* sampled in serverCron(). */
|
2019-03-30 06:26:58 -04:00
|
|
|
_Atomic long long stat_net_input_bytes; /* Bytes read from network. */
|
|
|
|
_Atomic long long stat_net_output_bytes; /* Bytes written to network. */
|
2016-09-19 07:45:20 -04:00
|
|
|
size_t stat_rdb_cow_bytes; /* Copy on write bytes during RDB saving. */
|
|
|
|
size_t stat_aof_cow_bytes; /* Copy on write bytes during AOF rewrite. */
|
2019-07-17 01:51:02 -04:00
|
|
|
size_t stat_module_cow_bytes; /* Copy on write bytes during module fork. */
|
2014-12-03 06:06:54 -05:00
|
|
|
/* The following two are used to track instantaneous metrics, like
|
|
|
|
* number of operations per second, network traffic. */
|
|
|
|
struct {
|
|
|
|
long long last_sample_time; /* Timestamp of last sample in ms */
|
|
|
|
long long last_sample_count;/* Count in last sample */
|
2015-07-27 03:41:48 -04:00
|
|
|
long long samples[STATS_METRIC_SAMPLES];
|
2014-12-03 06:06:54 -05:00
|
|
|
int idx;
|
2015-07-27 03:41:48 -04:00
|
|
|
} inst_metric[STATS_METRIC_COUNT];
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Configuration */
|
2011-12-21 04:05:32 -05:00
|
|
|
int verbosity; /* Loglevel in redis.conf */
|
|
|
|
int maxidletime; /* Client timeout in seconds */
|
2013-02-08 10:40:59 -05:00
|
|
|
int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
|
2013-03-27 12:55:02 -04:00
|
|
|
int active_expire_enabled; /* Can be disabled for testing purposes. */
|
2016-12-29 20:37:52 -05:00
|
|
|
int active_defrag_enabled;
|
|
|
|
size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */
|
|
|
|
int active_defrag_threshold_lower; /* minimum percentage of fragmentation to start active defrag */
|
|
|
|
int active_defrag_threshold_upper; /* maximum percentage of fragmentation at which we use maximum effort */
|
|
|
|
int active_defrag_cycle_min; /* minimal effort for defrag in CPU percentage */
|
|
|
|
int active_defrag_cycle_max; /* maximal effort for defrag in CPU percentage */
|
2018-02-18 10:15:22 -05:00
|
|
|
unsigned long active_defrag_max_scan_fields; /* maximum number of fields of set/hash/zset/list to process from within the main dict scan */
|
2019-03-30 06:26:58 -04:00
|
|
|
_Atomic size_t client_max_querybuf_len; /* Limit for client query buffer length */
|
2011-12-21 04:05:32 -05:00
|
|
|
int dbnum; /* Total number of configured DBs */
|
2015-01-08 15:22:33 -05:00
|
|
|
int supervised; /* 1 if supervised, 0 otherwise. */
|
2015-07-27 03:41:48 -04:00
|
|
|
int supervised_mode; /* See SUPERVISED_* */
|
2011-12-21 04:05:32 -05:00
|
|
|
int daemonize; /* True if running as a daemon */
|
2015-07-28 10:58:04 -04:00
|
|
|
clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
|
2011-12-21 04:05:32 -05:00
|
|
|
/* AOF persistence */
|
2019-07-01 08:22:29 -04:00
|
|
|
int aof_enabled; /* AOF configuration */
|
2015-07-27 03:41:48 -04:00
|
|
|
int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */
|
2011-12-21 05:58:42 -05:00
|
|
|
int aof_fsync; /* Kind of fsync() policy */
|
|
|
|
char *aof_filename; /* Name of the AOF file */
|
|
|
|
int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
|
|
|
|
int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
|
|
|
|
off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
|
|
|
|
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
|
|
|
|
off_t aof_current_size; /* AOF current size. */
|
2019-04-29 02:38:28 -04:00
|
|
|
off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */
|
2011-12-21 05:58:42 -05:00
|
|
|
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
|
2011-12-21 06:17:02 -05:00
|
|
|
pid_t aof_child_pid; /* PID if rewriting process */
|
Allow an AOF rewrite buffer > 2GB (Fix for issue #504).
During the AOF rewrite process, the parent process needs to accumulate
the new writes in an in-memory buffer: when the child will terminate the
AOF rewriting process this buffer (that ist the difference between the
dataset when the rewrite was started, and the current dataset) is
flushed to the new AOF file.
We used to implement this buffer using an sds.c string, but sds.c has a
2GB limit. Sometimes the dataset can be big enough, the amount of writes
so high, and the rewrite process slow enough that we overflow the 2GB
limit, causing a crash, documented on github by issue #504.
In order to prevent this from happening, this commit introduces a new
system to accumulate writes, implemented by a linked list of blocks of
10 MB each, so that we also avoid paying the reallocation cost.
Note that theoretically modern operating systems may implement realloc()
simply as a remaping of the old pages, thus with very good performances,
see for instance the mremap() syscall on Linux. However this is not
always true, and jemalloc by default avoids doing this because there are
issues with the current implementation of mremap().
For this reason we are using a linked list of blocks instead of a single
block that gets reallocated again and again.
The changes in this commit lacks testing, that will be performed before
merging into the unstable branch. This fix will not enter 2.4 because it
is too invasive. However 2.4 will log a warning when the AOF rewrite
buffer is near to the 2GB limit.
2012-05-22 07:03:41 -04:00
|
|
|
list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
|
2011-12-21 06:17:02 -05:00
|
|
|
sds aof_buf; /* AOF buffer, written before entering the event loop */
|
|
|
|
int aof_fd; /* File descriptor of currently selected AOF file */
|
|
|
|
int aof_selected_db; /* Currently selected DB in AOF */
|
2011-12-21 04:05:32 -05:00
|
|
|
time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
|
2011-12-21 06:17:02 -05:00
|
|
|
time_t aof_last_fsync; /* UNIX time of last fsync() */
|
2012-05-25 06:11:30 -04:00
|
|
|
time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
|
|
|
|
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
|
2015-07-26 17:17:55 -04:00
|
|
|
int aof_lastbgrewrite_status; /* C_OK or C_ERR */
|
2012-03-25 05:27:35 -04:00
|
|
|
unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
|
2018-03-15 12:44:50 -04:00
|
|
|
int aof_rewrite_incremental_fsync;/* fsync incrementally while aof rewriting? */
|
|
|
|
int rdb_save_incremental_fsync; /* fsync incrementally while rdb saving? */
|
2015-07-26 17:17:55 -04:00
|
|
|
int aof_last_write_status; /* C_OK or C_ERR */
|
2014-02-12 06:47:10 -05:00
|
|
|
int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
|
2014-09-05 05:48:35 -04:00
|
|
|
int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
|
2016-08-09 10:41:40 -04:00
|
|
|
int aof_use_rdb_preamble; /* Use RDB preamble on AOF rewrites. */
|
2014-07-04 09:22:49 -04:00
|
|
|
/* AOF pipes used to communicate between parent and child during rewrite. */
|
|
|
|
int aof_pipe_write_data_to_child;
|
|
|
|
int aof_pipe_read_data_from_parent;
|
|
|
|
int aof_pipe_write_ack_to_parent;
|
|
|
|
int aof_pipe_read_ack_from_child;
|
|
|
|
int aof_pipe_write_ack_to_child;
|
|
|
|
int aof_pipe_read_ack_from_parent;
|
|
|
|
int aof_stop_sending_diff; /* If true stop sending accumulated diffs
|
|
|
|
to child process. */
|
|
|
|
sds aof_child_diff; /* AOF diff accumulator child side. */
|
2011-12-21 04:05:32 -05:00
|
|
|
/* RDB persistence */
|
|
|
|
long long dirty; /* Changes to DB from the last save */
|
|
|
|
long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
|
2011-12-21 06:22:13 -05:00
|
|
|
pid_t rdb_child_pid; /* PID of RDB saving child */
|
2011-12-21 04:05:32 -05:00
|
|
|
struct saveparam *saveparams; /* Save points array for RDB */
|
|
|
|
int saveparamslen; /* Number of saving points */
|
2011-12-21 06:22:13 -05:00
|
|
|
char *rdb_filename; /* Name of RDB file */
|
|
|
|
int rdb_compression; /* Use compression in RDB? */
|
2012-04-10 09:47:10 -04:00
|
|
|
int rdb_checksum; /* Use RDB checksum? */
|
2013-01-16 12:00:20 -05:00
|
|
|
time_t lastsave; /* Unix time of last successful save */
|
2013-04-02 08:05:50 -04:00
|
|
|
time_t lastbgsave_try; /* Unix time of last attempted bgsave */
|
2012-05-25 06:11:30 -04:00
|
|
|
time_t rdb_save_time_last; /* Time used by last RDB save run. */
|
|
|
|
time_t rdb_save_time_start; /* Current RDB save start time. */
|
2016-07-21 12:34:53 -04:00
|
|
|
int rdb_bgsave_scheduled; /* BGSAVE when possible if true. */
|
2014-10-08 03:09:01 -04:00
|
|
|
int rdb_child_type; /* Type of save by active child. */
|
2015-07-26 17:17:55 -04:00
|
|
|
int lastbgsave_status; /* C_OK or C_ERR */
|
2012-03-07 12:02:26 -05:00
|
|
|
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
|
2014-10-14 09:29:07 -04:00
|
|
|
int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */
|
|
|
|
int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */
|
2019-07-01 08:22:29 -04:00
|
|
|
int rdb_key_save_delay; /* Delay in microseconds between keys while
|
|
|
|
* writing the RDB. (for testings) */
|
2016-09-19 07:45:20 -04:00
|
|
|
/* Pipe and data structures for child -> parent info sharing. */
|
|
|
|
int child_info_pipe[2]; /* Pipe used to write the child_info_data. */
|
|
|
|
struct {
|
|
|
|
int process_type; /* AOF or RDB child? */
|
|
|
|
size_t cow_size; /* Copy on write size. */
|
|
|
|
unsigned long long magic; /* Magic value to make sure data is valid. */
|
|
|
|
} child_info_data;
|
2012-02-28 12:03:08 -05:00
|
|
|
/* Propagation of commands in AOF / replication */
|
2012-02-28 18:46:50 -05:00
|
|
|
redisOpArray also_propagate; /* Additional command to propagate. */
|
2011-01-07 12:15:14 -05:00
|
|
|
/* Logging */
|
2011-12-21 04:05:32 -05:00
|
|
|
char *logfile; /* Path of log file */
|
|
|
|
int syslog_enabled; /* Is syslog enabled? */
|
|
|
|
char *syslog_ident; /* Syslog ident */
|
|
|
|
int syslog_facility; /* Syslog facility */
|
2013-01-30 12:33:16 -05:00
|
|
|
/* Replication (master) */
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
char replid[CONFIG_RUN_ID_SIZE+1]; /* My current replication ID. */
|
|
|
|
char replid2[CONFIG_RUN_ID_SIZE+1]; /* replid inherited from master*/
|
|
|
|
long long master_repl_offset; /* My current replication offset */
|
|
|
|
long long second_replid_offset; /* Accept offsets up to this for replid2. */
|
2013-01-30 12:33:16 -05:00
|
|
|
int slaveseldb; /* Last SELECTed DB in replication output */
|
|
|
|
int repl_ping_slave_period; /* Master pings the slave every N seconds */
|
|
|
|
char *repl_backlog; /* Replication backlog for partial syncs */
|
|
|
|
long long repl_backlog_size; /* Backlog circular buffer size */
|
|
|
|
long long repl_backlog_histlen; /* Backlog actual data length */
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
long long repl_backlog_idx; /* Backlog circular buffer current offset,
|
|
|
|
that is the next byte will'll write to.*/
|
|
|
|
long long repl_backlog_off; /* Replication "master offset" of first
|
|
|
|
byte in the replication backlog buffer.*/
|
2013-01-30 12:33:16 -05:00
|
|
|
time_t repl_backlog_time_limit; /* Time without slaves after the backlog
|
|
|
|
gets released. */
|
|
|
|
time_t repl_no_slaves_since; /* We have no slaves since that time.
|
|
|
|
Only valid if server.slaves len is 0. */
|
2013-05-29 05:36:44 -04:00
|
|
|
int repl_min_slaves_to_write; /* Min number of slaves to write. */
|
|
|
|
int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
|
|
|
|
int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
|
2019-07-01 08:22:29 -04:00
|
|
|
int repl_diskless_sync; /* Master send RDB to slaves sockets directly. */
|
|
|
|
int repl_diskless_load; /* Slave parse RDB directly from the socket.
|
|
|
|
* see REPL_DISKLESS_LOAD_* enum */
|
2014-10-27 05:36:30 -04:00
|
|
|
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
|
2013-01-30 12:33:16 -05:00
|
|
|
/* Replication (slave) */
|
2019-02-12 03:44:50 -05:00
|
|
|
char *masteruser; /* AUTH with this user and masterauth with master */
|
2011-12-21 04:05:32 -05:00
|
|
|
char *masterauth; /* AUTH with this password with master */
|
|
|
|
char *masterhost; /* Hostname of master */
|
|
|
|
int masterport; /* Port of master */
|
|
|
|
int repl_timeout; /* Timeout after N seconds of master idle */
|
2015-07-26 09:20:46 -04:00
|
|
|
client *master; /* Client that is master for this slave */
|
|
|
|
client *cached_master; /* Cached master to be reused for PSYNC. */
|
2011-12-21 04:05:32 -05:00
|
|
|
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
|
2011-12-21 06:23:18 -05:00
|
|
|
int repl_state; /* Replication status if the instance is a slave */
|
2012-08-24 13:28:44 -04:00
|
|
|
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
|
|
|
|
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
|
|
|
|
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
|
2011-12-21 04:05:32 -05:00
|
|
|
int repl_transfer_s; /* Slave -> Master SYNC socket */
|
|
|
|
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
|
|
|
|
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
|
|
|
|
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
|
2010-11-04 14:59:21 -04:00
|
|
|
int repl_serve_stale_data; /* Serve stale data when link is down? */
|
2012-03-20 12:32:48 -04:00
|
|
|
int repl_slave_ro; /* Slave is read only? */
|
2018-08-27 06:09:08 -04:00
|
|
|
int repl_slave_ignore_maxmemory; /* If true slaves do not evict. */
|
2011-12-21 04:05:32 -05:00
|
|
|
time_t repl_down_since; /* Unix time at which link with master went down */
|
2013-01-31 05:14:15 -05:00
|
|
|
int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
|
2012-08-28 11:20:26 -04:00
|
|
|
int slave_priority; /* Reported in INFO and used by Sentinel. */
|
2016-07-27 10:41:20 -04:00
|
|
|
int slave_announce_port; /* Give the master this listening port. */
|
|
|
|
char *slave_announce_ip; /* Give the master this ip address. */
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
/* The following two fields is where we store master PSYNC replid/offset
|
|
|
|
* while the PSYNC is in progress. At the end we'll copy the fields into
|
|
|
|
* the server->master client structure. */
|
|
|
|
char master_replid[CONFIG_RUN_ID_SIZE+1]; /* Master PSYNC runid. */
|
|
|
|
long long master_initial_offset; /* Master PSYNC offset. */
|
2015-10-02 09:27:57 -04:00
|
|
|
int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */
|
2013-06-24 04:26:04 -04:00
|
|
|
/* Replication script cache. */
|
|
|
|
dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */
|
|
|
|
list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */
|
2014-08-13 05:44:38 -04:00
|
|
|
unsigned int repl_scriptcache_size; /* Max number of elements. */
|
2013-12-04 09:52:20 -05:00
|
|
|
/* Synchronous replication. */
|
|
|
|
list *clients_waiting_acks; /* Clients waiting in WAIT command. */
|
|
|
|
int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
|
2010-11-04 12:29:53 -04:00
|
|
|
/* Limits */
|
2014-08-13 05:44:38 -04:00
|
|
|
unsigned int maxclients; /* Max number of simultaneous clients */
|
2011-12-21 04:05:32 -05:00
|
|
|
unsigned long long maxmemory; /* Max number of memory bytes to use */
|
2013-01-16 12:00:20 -05:00
|
|
|
int maxmemory_policy; /* Policy for key eviction */
|
2011-12-21 04:05:32 -05:00
|
|
|
int maxmemory_samples; /* Pricision of random sampling */
|
2017-10-12 23:09:48 -04:00
|
|
|
int lfu_log_factor; /* LFU logarithmic counter factor. */
|
|
|
|
int lfu_decay_time; /* LFU counter decay factor. */
|
2018-01-11 05:27:03 -05:00
|
|
|
long long proto_max_bulk_len; /* Protocol bulk length maximum size. */
|
2010-11-04 12:29:53 -04:00
|
|
|
/* Blocked clients */
|
2017-09-09 05:10:59 -04:00
|
|
|
unsigned int blocked_clients; /* # of clients executing a blocking cmd.*/
|
|
|
|
unsigned int blocked_clients_by_type[BLOCKED_NUM];
|
2010-12-29 13:39:42 -05:00
|
|
|
list *unblocked_clients; /* list of clients to unblock before next loop */
|
A reimplementation of blocking operation internals.
Redis provides support for blocking operations such as BLPOP or BRPOP.
This operations are identical to normal LPOP and RPOP operations as long
as there are elements in the target list, but if the list is empty they
block waiting for new data to arrive to the list.
All the clients blocked waiting for th same list are served in a FIFO
way, so the first that blocked is the first to be served when there is
more data pushed by another client into the list.
The previous implementation of blocking operations was conceived to
serve clients in the context of push operations. For for instance:
1) There is a client "A" blocked on list "foo".
2) The client "B" performs `LPUSH foo somevalue`.
3) The client "A" is served in the context of the "B" LPUSH,
synchronously.
Processing things in a synchronous way was useful as if "A" pushes a
value that is served by "B", from the point of view of the database is a
NOP (no operation) thing, that is, nothing is replicated, nothing is
written in the AOF file, and so forth.
However later we implemented two things:
1) Variadic LPUSH that could add multiple values to a list in the
context of a single call.
2) BRPOPLPUSH that was a version of BRPOP that also provided a "PUSH"
side effect when receiving data.
This forced us to make the synchronous implementation more complex. If
client "B" is waiting for data, and "A" pushes three elemnents in a
single call, we needed to propagate an LPUSH with a missing argument
in the AOF and replication link. We also needed to make sure to
replicate the LPUSH side of BRPOPLPUSH, but only if in turn did not
happened to serve another blocking client into another list ;)
This were complex but with a few of mutually recursive functions
everything worked as expected... until one day we introduced scripting
in Redis.
Scripting + synchronous blocking operations = Issue #614.
Basically you can't "rewrite" a script to have just a partial effect on
the replicas and AOF file if the script happened to serve a few blocked
clients.
The solution to all this problems, implemented by this commit, is to
change the way we serve blocked clients. Instead of serving the blocked
clients synchronously, in the context of the command performing the PUSH
operation, it is now an asynchronous and iterative process:
1) If a key that has clients blocked waiting for data is the subject of
a list push operation, We simply mark keys as "ready" and put it into a
queue.
2) Every command pushing stuff on lists, as a variadic LPUSH, a script,
or whatever it is, is replicated verbatim without any rewriting.
3) Every time a Redis command, a MULTI/EXEC block, or a script,
completed its execution, we run the list of keys ready to serve blocked
clients (as more data arrived), and process this list serving the
blocked clients.
4) As a result of "3" maybe more keys are ready again for other clients
(as a result of BRPOPLPUSH we may have push operations), so we iterate
back to step "3" if it's needed.
The new code has a much simpler semantics, and a simpler to understand
implementation, with the disadvantage of not being able to "optmize out"
a PUSH+BPOP as a No OP.
This commit will be tested with care before the final merge, more tests
will be added likely.
2012-09-04 04:37:49 -04:00
|
|
|
list *ready_keys; /* List of readyList structures for BLPOP & co */
|
2019-06-29 20:08:41 -04:00
|
|
|
/* Client side caching. */
|
|
|
|
unsigned int tracking_clients; /* # of clients with tracking enabled.*/
|
2019-07-23 04:57:22 -04:00
|
|
|
int tracking_table_max_fill; /* Max fill percentage. */
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Sort parameters - qsort_r() is only available under BSD so we
|
|
|
|
* have to take this state global, in order to pass it to sortCompare() */
|
|
|
|
int sort_desc;
|
|
|
|
int sort_alpha;
|
|
|
|
int sort_bypattern;
|
2013-07-12 06:02:36 -04:00
|
|
|
int sort_store;
|
2011-12-21 04:05:32 -05:00
|
|
|
/* Zip structure config, see redis.conf for more information */
|
2012-01-03 01:14:10 -05:00
|
|
|
size_t hash_max_ziplist_entries;
|
|
|
|
size_t hash_max_ziplist_value;
|
2010-07-02 13:57:12 -04:00
|
|
|
size_t set_max_intset_entries;
|
2011-03-09 08:01:57 -05:00
|
|
|
size_t zset_max_ziplist_entries;
|
|
|
|
size_t zset_max_ziplist_value;
|
2014-04-15 11:46:51 -04:00
|
|
|
size_t hll_sparse_max_bytes;
|
2018-06-07 08:24:45 -04:00
|
|
|
size_t stream_node_max_bytes;
|
|
|
|
int64_t stream_node_max_entries;
|
2014-12-16 00:49:14 -05:00
|
|
|
/* List parameters */
|
|
|
|
int list_max_ziplist_size;
|
|
|
|
int list_compress_depth;
|
|
|
|
/* time cache */
|
2019-03-30 06:26:58 -04:00
|
|
|
_Atomic time_t unixtime; /* Unix time sampled every cron cycle. */
|
|
|
|
time_t timezone; /* Cached timezone. As set by tzset(). */
|
|
|
|
int daylight_active; /* Currently in daylight saving time. */
|
|
|
|
long long mstime; /* 'unixtime' with milliseconds resolution. */
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Pubsub */
|
2011-12-21 04:05:32 -05:00
|
|
|
dict *pubsub_channels; /* Map channels to list of subscribed clients */
|
|
|
|
list *pubsub_patterns; /* A list of pubsub_patterns */
|
2013-01-25 07:19:08 -05:00
|
|
|
int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
|
2015-07-27 03:41:48 -04:00
|
|
|
xor of NOTIFY_... flags. */
|
2011-04-28 13:00:33 -04:00
|
|
|
/* Cluster */
|
2013-02-14 07:20:56 -05:00
|
|
|
int cluster_enabled; /* Is cluster enabled? */
|
2013-10-09 10:18:33 -04:00
|
|
|
mstime_t cluster_node_timeout; /* Cluster node timeout. */
|
2013-02-14 07:20:56 -05:00
|
|
|
char *cluster_configfile; /* Cluster auto-generated config file name. */
|
2013-10-09 09:37:20 -04:00
|
|
|
struct clusterState *cluster; /* State of the cluster */
|
2014-01-31 05:12:34 -05:00
|
|
|
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
|
2014-05-22 10:57:47 -04:00
|
|
|
int cluster_slave_validity_factor; /* Slave max data age for failover. */
|
2014-09-17 05:10:09 -04:00
|
|
|
int cluster_require_full_coverage; /* If true, put the cluster down if
|
2015-10-29 08:50:04 -04:00
|
|
|
there is at least an uncovered slot.*/
|
2018-03-14 08:46:36 -04:00
|
|
|
int cluster_slave_no_failover; /* Prevent slave from starting a failover
|
|
|
|
if the master is in failure state. */
|
2016-01-21 10:57:35 -05:00
|
|
|
char *cluster_announce_ip; /* IP address to announce on cluster bus. */
|
|
|
|
int cluster_announce_port; /* base port to announce on cluster bus. */
|
|
|
|
int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
|
2018-09-19 05:20:52 -04:00
|
|
|
int cluster_module_flags; /* Set of flags that Redis modules are able
|
|
|
|
to set in order to suppress certain
|
|
|
|
native Redis Cluster features. Check the
|
|
|
|
REDISMODULE_CLUSTER_FLAG_*. */
|
2011-04-30 11:46:52 -04:00
|
|
|
/* Scripting */
|
2011-07-13 09:38:03 -04:00
|
|
|
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
|
2015-07-26 09:20:46 -04:00
|
|
|
client *lua_client; /* The "fake client" to query Redis from Lua */
|
|
|
|
client *lua_caller; /* The client running EVAL right now, or NULL */
|
2011-12-21 04:05:32 -05:00
|
|
|
dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
|
2018-04-30 12:33:01 -04:00
|
|
|
unsigned long long lua_scripts_mem; /* Cached scripts' memory + oh */
|
2014-02-03 09:45:40 -05:00
|
|
|
mstime_t lua_time_limit; /* Script timeout in milliseconds */
|
|
|
|
mstime_t lua_time_start; /* Start time of script, milliseconds time */
|
2011-11-18 08:10:48 -05:00
|
|
|
int lua_write_dirty; /* True if a write command was called during the
|
|
|
|
execution of the current script. */
|
2011-09-27 09:30:31 -04:00
|
|
|
int lua_random_dirty; /* True if a random command was called during the
|
2011-11-18 08:10:48 -05:00
|
|
|
execution of the current script. */
|
2015-10-29 07:39:07 -04:00
|
|
|
int lua_replicate_commands; /* True if we are doing single commands repl. */
|
|
|
|
int lua_multi_emitted;/* True if we already proagated MULTI. */
|
2015-10-29 08:50:04 -04:00
|
|
|
int lua_repl; /* Script replication flags for redis.set_repl(). */
|
2011-10-27 08:49:10 -04:00
|
|
|
int lua_timedout; /* True if we reached the time limit for script
|
|
|
|
execution. */
|
2011-11-18 08:10:48 -05:00
|
|
|
int lua_kill; /* Kill the script if true. */
|
2015-10-30 05:13:04 -04:00
|
|
|
int lua_always_replicate_commands; /* Default replication type. */
|
2015-10-02 09:27:57 -04:00
|
|
|
/* Lazy free */
|
|
|
|
int lazyfree_lazy_eviction;
|
|
|
|
int lazyfree_lazy_expire;
|
|
|
|
int lazyfree_lazy_server_del;
|
2014-07-01 05:30:15 -04:00
|
|
|
/* Latency monitor */
|
|
|
|
long long latency_monitor_threshold;
|
|
|
|
dict *latency_events;
|
2019-02-05 04:48:17 -05:00
|
|
|
/* ACLs */
|
|
|
|
char *acl_filename; /* ACL Users file. NULL if not configured. */
|
2013-01-16 12:00:20 -05:00
|
|
|
/* Assert & bug reporting */
|
2016-06-20 16:08:06 -04:00
|
|
|
const char *assert_failed;
|
|
|
|
const char *assert_file;
|
2011-11-24 09:47:26 -05:00
|
|
|
int assert_line;
|
2011-12-21 04:05:32 -05:00
|
|
|
int bug_report_start; /* True if bug report header was already logged. */
|
2012-03-27 05:47:51 -04:00
|
|
|
int watchdog_period; /* Software watchdog period in ms. 0 = off */
|
2014-06-17 23:05:30 -04:00
|
|
|
/* System hardware info */
|
|
|
|
size_t system_memory_size; /* Total memory in system as reported by OS */
|
2010-06-21 18:07:48 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct pubsubPattern {
|
2015-07-26 09:20:46 -04:00
|
|
|
client *client;
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *pattern;
|
|
|
|
} pubsubPattern;
|
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
typedef void redisCommandProc(client *c);
|
2014-03-10 08:18:41 -04:00
|
|
|
typedef int *redisGetKeysProc(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2010-06-21 18:07:48 -04:00
|
|
|
struct redisCommand {
|
|
|
|
char *name;
|
|
|
|
redisCommandProc *proc;
|
|
|
|
int arity;
|
2019-01-23 10:47:29 -05:00
|
|
|
char *sflags; /* Flags as string representation, one char per flag. */
|
|
|
|
uint64_t flags; /* The actual flags, obtained from the 'sflags' field. */
|
2011-03-23 13:09:17 -04:00
|
|
|
/* Use a function to determine keys arguments in a command line.
|
2011-06-25 06:22:03 -04:00
|
|
|
* Used for Redis Cluster redirect. */
|
2011-03-23 13:09:17 -04:00
|
|
|
redisGetKeysProc *getkeys_proc;
|
2010-06-21 18:07:48 -04:00
|
|
|
/* What keys should be loaded in background when calling this command? */
|
2011-03-23 13:09:17 -04:00
|
|
|
int firstkey; /* The first argument that's a key (0 = no keys) */
|
2013-01-16 12:00:20 -05:00
|
|
|
int lastkey; /* The last argument that's a key */
|
2011-03-23 13:09:17 -04:00
|
|
|
int keystep; /* The step between first and last key */
|
2011-01-23 16:14:15 -05:00
|
|
|
long long microseconds, calls;
|
2019-01-09 11:20:47 -05:00
|
|
|
int id; /* Command ID. This is a progressive ID starting from 0 that
|
|
|
|
is assigned at runtime, and is used in order to check
|
|
|
|
ACLs. A connection is able to execute a given command if
|
|
|
|
the user associated to the connection has this command
|
|
|
|
bit set in the bitmap of allowed commands. */
|
2010-06-21 18:07:48 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
struct redisFunctionSym {
|
|
|
|
char *name;
|
|
|
|
unsigned long pointer;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct _redisSortObject {
|
|
|
|
robj *obj;
|
|
|
|
union {
|
|
|
|
double score;
|
|
|
|
robj *cmpobj;
|
|
|
|
} u;
|
|
|
|
} redisSortObject;
|
|
|
|
|
|
|
|
typedef struct _redisSortOperation {
|
|
|
|
int type;
|
|
|
|
robj *pattern;
|
|
|
|
} redisSortOperation;
|
|
|
|
|
|
|
|
/* Structure to hold list iteration abstraction. */
|
|
|
|
typedef struct {
|
|
|
|
robj *subject;
|
|
|
|
unsigned char encoding;
|
|
|
|
unsigned char direction; /* Iteration direction */
|
2014-11-13 14:11:47 -05:00
|
|
|
quicklistIter *iter;
|
2010-06-21 18:07:48 -04:00
|
|
|
} listTypeIterator;
|
|
|
|
|
|
|
|
/* Structure for an entry while iterating over a list. */
|
|
|
|
typedef struct {
|
|
|
|
listTypeIterator *li;
|
2014-11-13 14:11:47 -05:00
|
|
|
quicklistEntry entry; /* Entry in quicklist */
|
2010-06-21 18:07:48 -04:00
|
|
|
} listTypeEntry;
|
|
|
|
|
2010-07-02 13:57:12 -04:00
|
|
|
/* Structure to hold set iteration abstraction. */
|
|
|
|
typedef struct {
|
|
|
|
robj *subject;
|
|
|
|
int encoding;
|
|
|
|
int ii; /* intset iterator */
|
|
|
|
dictIterator *di;
|
2010-08-21 05:25:13 -04:00
|
|
|
} setTypeIterator;
|
2010-07-02 13:57:12 -04:00
|
|
|
|
2013-01-16 12:00:20 -05:00
|
|
|
/* Structure to hold hash iteration abstraction. Note that iteration over
|
2010-06-21 18:07:48 -04:00
|
|
|
* hashes involves both fields and values. Because it is possible that
|
|
|
|
* not both are required, store pointers in the iterator to avoid
|
|
|
|
* unnecessary memory allocation for fields/values. */
|
|
|
|
typedef struct {
|
2012-01-03 01:14:10 -05:00
|
|
|
robj *subject;
|
2010-06-21 18:07:48 -04:00
|
|
|
int encoding;
|
2012-01-03 01:14:10 -05:00
|
|
|
|
|
|
|
unsigned char *fptr, *vptr;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
dictIterator *di;
|
|
|
|
dictEntry *de;
|
|
|
|
} hashTypeIterator;
|
|
|
|
|
2018-03-19 09:16:13 -04:00
|
|
|
#include "stream.h" /* Stream data type header file. */
|
|
|
|
|
2015-07-26 09:28:00 -04:00
|
|
|
#define OBJ_HASH_KEY 1
|
|
|
|
#define OBJ_HASH_VALUE 2
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* Extern declarations
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
extern struct redisServer server;
|
|
|
|
extern struct sharedObjectsStruct shared;
|
2015-07-31 12:01:23 -04:00
|
|
|
extern dictType objectKeyPointerValueDictType;
|
2017-09-06 09:43:28 -04:00
|
|
|
extern dictType objectKeyHeapPointerValueDictType;
|
2010-06-21 18:07:48 -04:00
|
|
|
extern dictType setDictType;
|
|
|
|
extern dictType zsetDictType;
|
2011-03-29 11:51:15 -04:00
|
|
|
extern dictType clusterNodesDictType;
|
2013-11-29 11:37:06 -05:00
|
|
|
extern dictType clusterNodesBlackListDictType;
|
2011-07-13 09:38:03 -04:00
|
|
|
extern dictType dbDictType;
|
2012-11-22 09:50:00 -05:00
|
|
|
extern dictType shaScriptObjectDictType;
|
2010-06-21 18:07:48 -04:00
|
|
|
extern double R_Zero, R_PosInf, R_NegInf, R_Nan;
|
2012-03-27 12:18:57 -04:00
|
|
|
extern dictType hashDictType;
|
2013-06-24 04:26:04 -04:00
|
|
|
extern dictType replScriptCacheDictType;
|
2015-09-28 04:47:45 -04:00
|
|
|
extern dictType keyptrDictType;
|
2016-03-06 07:44:24 -05:00
|
|
|
extern dictType modulesDictType;
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/*-----------------------------------------------------------------------------
|
|
|
|
* Functions prototypes
|
|
|
|
*----------------------------------------------------------------------------*/
|
|
|
|
|
2016-03-06 07:44:24 -05:00
|
|
|
/* Modules */
|
|
|
|
void moduleInitModulesSystem(void);
|
2016-06-05 03:03:34 -04:00
|
|
|
int moduleLoad(const char *path, void **argv, int argc);
|
2016-03-06 07:44:24 -05:00
|
|
|
void moduleLoadFromQueue(void);
|
2016-04-27 12:09:31 -04:00
|
|
|
int *moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2016-05-18 05:45:40 -04:00
|
|
|
moduleType *moduleTypeLookupModuleByID(uint64_t id);
|
|
|
|
void moduleTypeNameByID(char *name, uint64_t moduleid);
|
2016-10-06 11:05:38 -04:00
|
|
|
void moduleFreeContext(struct RedisModuleCtx *ctx);
|
2016-10-07 05:55:35 -04:00
|
|
|
void unblockClientFromModule(client *c);
|
|
|
|
void moduleHandleBlockedClients(void);
|
|
|
|
void moduleBlockedClientTimedOut(client *c);
|
2017-04-10 03:33:21 -04:00
|
|
|
void moduleBlockedClientPipeReadable(aeEventLoop *el, int fd, void *privdata, int mask);
|
2017-05-03 05:26:21 -04:00
|
|
|
size_t moduleCount(void);
|
|
|
|
void moduleAcquireGIL(void);
|
|
|
|
void moduleReleaseGIL(void);
|
2017-11-27 09:29:55 -05:00
|
|
|
void moduleNotifyKeyspaceEvent(int type, const char *event, robj *key, int dbid);
|
2018-02-23 09:19:37 -05:00
|
|
|
void moduleCallCommandFilters(client *c);
|
2019-07-17 01:51:02 -04:00
|
|
|
void ModuleForkDoneHandler(int exitcode, int bysignal);
|
2019-08-25 03:11:48 -04:00
|
|
|
void TerminateModuleForkChild(int wait);
|
2019-07-21 10:41:03 -04:00
|
|
|
ssize_t rdbSaveModulesAux(rio *rdb, int when);
|
2019-07-30 08:11:57 -04:00
|
|
|
int moduleAllDatatypesHandleErrors();
|
2016-03-06 07:44:24 -05:00
|
|
|
|
2011-02-25 13:11:25 -05:00
|
|
|
/* Utils */
|
|
|
|
long long ustime(void);
|
2011-11-08 18:03:03 -05:00
|
|
|
long long mstime(void);
|
2018-04-05 07:24:22 -04:00
|
|
|
void getRandomHexChars(char *p, size_t len);
|
|
|
|
void getRandomBytes(unsigned char *p, size_t len);
|
2012-04-09 06:20:47 -04:00
|
|
|
uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l);
|
2012-04-07 06:11:23 -04:00
|
|
|
void exitFromChild(int retcode);
|
2013-06-26 09:19:06 -04:00
|
|
|
size_t redisPopcount(void *s, long count);
|
2013-02-26 05:52:12 -05:00
|
|
|
void redisSetProcTitle(char *title);
|
2011-02-25 13:11:25 -05:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* networking.c -- Networking and Client related operations */
|
2015-07-26 09:20:46 -04:00
|
|
|
client *createClient(int fd);
|
2010-06-21 18:07:48 -04:00
|
|
|
void closeTimedoutClients(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
void freeClient(client *c);
|
|
|
|
void freeClientAsync(client *c);
|
|
|
|
void resetClient(client *c);
|
2010-06-21 18:07:48 -04:00
|
|
|
void sendReplyToClient(aeEventLoop *el, int fd, void *privdata, int mask);
|
2018-11-08 06:28:56 -05:00
|
|
|
void *addReplyDeferredLen(client *c);
|
|
|
|
void setDeferredArrayLen(client *c, void *node, long length);
|
|
|
|
void setDeferredMapLen(client *c, void *node, long length);
|
|
|
|
void setDeferredSetLen(client *c, void *node, long length);
|
|
|
|
void setDeferredAttributeLen(client *c, void *node, long length);
|
|
|
|
void setDeferredPushLen(client *c, void *node, long length);
|
2015-07-26 09:20:46 -04:00
|
|
|
void processInputBuffer(client *c);
|
2018-09-03 12:17:25 -04:00
|
|
|
void processInputBufferAndReplicate(client *c);
|
2019-02-21 17:13:08 -05:00
|
|
|
void processGopherRequest(client *c);
|
2013-05-14 11:15:25 -04:00
|
|
|
void acceptHandler(aeEventLoop *el, int fd, void *privdata, int mask);
|
2010-10-13 12:34:24 -04:00
|
|
|
void acceptTcpHandler(aeEventLoop *el, int fd, void *privdata, int mask);
|
|
|
|
void acceptUnixHandler(aeEventLoop *el, int fd, void *privdata, int mask);
|
2010-06-21 18:07:48 -04:00
|
|
|
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask);
|
2018-11-27 05:58:55 -05:00
|
|
|
void addReplyNull(client *c);
|
2018-11-30 10:31:02 -05:00
|
|
|
void addReplyNullArray(client *c);
|
2018-12-04 12:00:35 -05:00
|
|
|
void addReplyBool(client *c, int b);
|
2018-12-10 10:55:20 -05:00
|
|
|
void addReplyVerbatim(client *c, const char *s, size_t len, const char *ext);
|
2018-12-17 10:59:19 -05:00
|
|
|
void addReplyProto(client *c, const char *s, size_t len);
|
2019-03-24 07:10:55 -04:00
|
|
|
void AddReplyFromClient(client *c, client *src);
|
2015-07-26 09:20:46 -04:00
|
|
|
void addReplyBulk(client *c, robj *obj);
|
|
|
|
void addReplyBulkCString(client *c, const char *s);
|
|
|
|
void addReplyBulkCBuffer(client *c, const void *p, size_t len);
|
|
|
|
void addReplyBulkLongLong(client *c, long long ll);
|
|
|
|
void addReply(client *c, robj *obj);
|
|
|
|
void addReplySds(client *c, sds s);
|
|
|
|
void addReplyBulkSds(client *c, sds s);
|
|
|
|
void addReplyError(client *c, const char *err);
|
|
|
|
void addReplyStatus(client *c, const char *status);
|
|
|
|
void addReplyDouble(client *c, double d);
|
2016-02-18 16:08:47 -05:00
|
|
|
void addReplyHumanLongDouble(client *c, long double d);
|
2015-07-26 09:20:46 -04:00
|
|
|
void addReplyLongLong(client *c, long long ll);
|
2018-11-08 07:05:50 -05:00
|
|
|
void addReplyArrayLen(client *c, long length);
|
|
|
|
void addReplyMapLen(client *c, long length);
|
|
|
|
void addReplySetLen(client *c, long length);
|
|
|
|
void addReplyAttributeLen(client *c, long length);
|
|
|
|
void addReplyPushLen(client *c, long length);
|
2017-11-27 10:57:44 -05:00
|
|
|
void addReplyHelp(client *c, const char **help);
|
2018-07-02 12:49:34 -04:00
|
|
|
void addReplySubcommandSyntaxError(client *c);
|
2018-12-04 06:46:16 -05:00
|
|
|
void addReplyLoadedModules(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void copyClientOutputBuffer(client *dst, client *src);
|
2016-05-18 01:08:43 -04:00
|
|
|
size_t sdsZmallocSize(sds s);
|
|
|
|
size_t getStringObjectSdsUsedMemory(robj *o);
|
2018-02-21 13:18:34 -05:00
|
|
|
void freeClientReplyValue(void *o);
|
2010-06-21 18:07:48 -04:00
|
|
|
void *dupClientReplyValue(void *o);
|
2011-01-14 04:20:02 -05:00
|
|
|
void getClientsMaxBuffers(unsigned long *longest_output_list,
|
|
|
|
unsigned long *biggest_input_buffer);
|
2015-07-26 09:20:46 -04:00
|
|
|
char *getClientPeerId(client *client);
|
|
|
|
sds catClientInfoString(sds s, client *client);
|
2018-06-28 05:43:05 -04:00
|
|
|
sds getAllClientsInfoString(int type);
|
2015-07-26 09:20:46 -04:00
|
|
|
void rewriteClientCommandVector(client *c, int argc, ...);
|
|
|
|
void rewriteClientCommandArgument(client *c, int i, robj *newval);
|
|
|
|
void replaceClientCommandVector(client *c, int argc, robj **argv);
|
|
|
|
unsigned long getClientOutputBufferMemoryUsage(client *c);
|
2012-01-23 10:12:37 -05:00
|
|
|
void freeClientsInAsyncFreeQueue(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
void asyncCloseClientOnOutputBufferLimitReached(client *c);
|
|
|
|
int getClientType(client *c);
|
2014-06-16 04:43:05 -04:00
|
|
|
int getClientTypeByName(char *name);
|
|
|
|
char *getClientTypeName(int class);
|
2012-02-06 10:56:42 -05:00
|
|
|
void flushSlavesOutputBuffers(void);
|
2012-03-29 03:24:02 -04:00
|
|
|
void disconnectSlaves(void);
|
2013-08-22 08:05:07 -04:00
|
|
|
int listenToPort(int port, int *fds, int *count);
|
2014-02-04 09:52:09 -05:00
|
|
|
void pauseClients(mstime_t duration);
|
|
|
|
int clientsArePaused(void);
|
2014-04-24 11:36:47 -04:00
|
|
|
int processEventsWhileBlocked(void);
|
2015-09-30 11:23:34 -04:00
|
|
|
int handleClientsWithPendingWrites(void);
|
2017-10-24 02:35:05 -04:00
|
|
|
int handleClientsWithPendingWritesUsingThreads(void);
|
2019-03-31 15:59:50 -04:00
|
|
|
int handleClientsWithPendingReadsUsingThreads(void);
|
2019-03-25 13:05:06 -04:00
|
|
|
int stopThreadedIOIfNeeded(void);
|
2015-09-30 10:41:48 -04:00
|
|
|
int clientHasPendingReplies(client *c);
|
2015-09-30 10:56:02 -04:00
|
|
|
void unlinkClient(client *c);
|
2015-11-06 10:19:59 -05:00
|
|
|
int writeToClient(int fd, client *c, int handler_installed);
|
2017-12-05 09:59:56 -05:00
|
|
|
void linkClient(client *c);
|
2018-10-09 07:15:41 -04:00
|
|
|
void protectClient(client *c);
|
|
|
|
void unprotectClient(client *c);
|
2017-10-24 02:35:05 -04:00
|
|
|
void initThreadedIO(void);
|
2019-07-03 13:16:20 -04:00
|
|
|
client *lookupClientByID(uint64_t id);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2010-09-02 13:52:24 -04:00
|
|
|
#ifdef __GNUC__
|
2015-07-26 09:20:46 -04:00
|
|
|
void addReplyErrorFormat(client *c, const char *fmt, ...)
|
2010-09-02 13:52:24 -04:00
|
|
|
__attribute__((format(printf, 2, 3)));
|
2015-07-26 09:20:46 -04:00
|
|
|
void addReplyStatusFormat(client *c, const char *fmt, ...)
|
2010-09-02 13:52:24 -04:00
|
|
|
__attribute__((format(printf, 2, 3)));
|
|
|
|
#else
|
2015-07-26 09:20:46 -04:00
|
|
|
void addReplyErrorFormat(client *c, const char *fmt, ...);
|
|
|
|
void addReplyStatusFormat(client *c, const char *fmt, ...);
|
2010-09-02 13:52:24 -04:00
|
|
|
#endif
|
|
|
|
|
2019-06-29 20:08:41 -04:00
|
|
|
/* Client side caching (tracking mode) */
|
2019-06-30 06:19:04 -04:00
|
|
|
void enableTracking(client *c, uint64_t redirect_to);
|
2019-06-29 20:08:41 -04:00
|
|
|
void disableTracking(client *c);
|
2019-07-03 06:42:16 -04:00
|
|
|
void trackingRememberKeys(client *c);
|
2019-07-03 13:16:20 -04:00
|
|
|
void trackingInvalidateKey(robj *keyobj);
|
2019-07-17 08:33:52 -04:00
|
|
|
void trackingInvalidateKeysOnFlush(int dbid);
|
2019-07-22 12:59:53 -04:00
|
|
|
void trackingLimitUsedSlots(void);
|
2019-07-23 05:02:14 -04:00
|
|
|
unsigned long long trackingGetUsedSlots(void);
|
2019-06-29 20:08:41 -04:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* List data type */
|
|
|
|
void listTypeTryConversion(robj *subject, robj *value);
|
|
|
|
void listTypePush(robj *subject, robj *value, int where);
|
|
|
|
robj *listTypePop(robj *subject, int where);
|
2016-06-20 16:08:06 -04:00
|
|
|
unsigned long listTypeLength(const robj *subject);
|
2012-01-31 04:35:52 -05:00
|
|
|
listTypeIterator *listTypeInitIterator(robj *subject, long index, unsigned char direction);
|
2010-06-21 18:07:48 -04:00
|
|
|
void listTypeReleaseIterator(listTypeIterator *li);
|
|
|
|
int listTypeNext(listTypeIterator *li, listTypeEntry *entry);
|
|
|
|
robj *listTypeGet(listTypeEntry *entry);
|
|
|
|
void listTypeInsert(listTypeEntry *entry, robj *value, int where);
|
|
|
|
int listTypeEqual(listTypeEntry *entry, robj *o);
|
2014-11-13 14:11:47 -05:00
|
|
|
void listTypeDelete(listTypeIterator *iter, listTypeEntry *entry);
|
2010-06-21 18:07:48 -04:00
|
|
|
void listTypeConvert(robj *subject, int enc);
|
2015-07-26 09:20:46 -04:00
|
|
|
void unblockClientWaitingData(client *c);
|
|
|
|
void popGenericCommand(client *c, int where);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* MULTI/EXEC/WATCH... */
|
2015-07-26 09:20:46 -04:00
|
|
|
void unwatchAllKeys(client *c);
|
|
|
|
void initClientMultiState(client *c);
|
|
|
|
void freeClientMultiState(client *c);
|
|
|
|
void queueMultiCommand(client *c);
|
2010-06-21 18:07:48 -04:00
|
|
|
void touchWatchedKey(redisDb *db, robj *key);
|
|
|
|
void touchWatchedKeysOnFlush(int dbid);
|
2015-07-26 09:20:46 -04:00
|
|
|
void discardTransaction(client *c);
|
|
|
|
void flagTransaction(client *c);
|
2015-10-29 07:39:07 -04:00
|
|
|
void execCommandPropagateMulti(client *c);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Redis object implementation */
|
2013-01-24 05:27:10 -05:00
|
|
|
void decrRefCount(robj *o);
|
|
|
|
void decrRefCountVoid(void *o);
|
2010-06-21 18:07:48 -04:00
|
|
|
void incrRefCount(robj *o);
|
2015-07-30 05:46:31 -04:00
|
|
|
robj *makeObjectShared(robj *o);
|
2011-07-13 09:38:03 -04:00
|
|
|
robj *resetRefCount(robj *obj);
|
2010-06-21 18:07:48 -04:00
|
|
|
void freeStringObject(robj *o);
|
|
|
|
void freeListObject(robj *o);
|
|
|
|
void freeSetObject(robj *o);
|
|
|
|
void freeZsetObject(robj *o);
|
|
|
|
void freeHashObject(robj *o);
|
|
|
|
robj *createObject(int type, void *ptr);
|
2015-03-11 11:59:56 -04:00
|
|
|
robj *createStringObject(const char *ptr, size_t len);
|
|
|
|
robj *createRawStringObject(const char *ptr, size_t len);
|
|
|
|
robj *createEmbeddedStringObject(const char *ptr, size_t len);
|
2016-06-22 13:57:24 -04:00
|
|
|
robj *dupStringObject(const robj *o);
|
2015-07-31 12:01:23 -04:00
|
|
|
int isSdsRepresentableAsLongLong(sds s, long long *llval);
|
2011-04-27 07:24:52 -04:00
|
|
|
int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *tryObjectEncoding(robj *o);
|
|
|
|
robj *getDecodedObject(robj *o);
|
|
|
|
size_t stringObjectLen(robj *o);
|
|
|
|
robj *createStringObjectFromLongLong(long long value);
|
2018-06-18 10:54:13 -04:00
|
|
|
robj *createStringObjectFromLongLongForValue(long long value);
|
2014-12-02 12:19:30 -05:00
|
|
|
robj *createStringObjectFromLongDouble(long double value, int humanfriendly);
|
2014-11-13 14:11:47 -05:00
|
|
|
robj *createQuicklistObject(void);
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *createZiplistObject(void);
|
|
|
|
robj *createSetObject(void);
|
2010-07-02 13:57:12 -04:00
|
|
|
robj *createIntsetObject(void);
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *createHashObject(void);
|
|
|
|
robj *createZsetObject(void);
|
2011-03-08 10:08:52 -05:00
|
|
|
robj *createZsetZiplistObject(void);
|
2017-08-30 06:40:27 -04:00
|
|
|
robj *createStreamObject(void);
|
2016-05-18 05:45:40 -04:00
|
|
|
robj *createModuleObject(moduleType *mt, void *value);
|
2015-07-26 09:20:46 -04:00
|
|
|
int getLongFromObjectOrReply(client *c, robj *o, long *target, const char *msg);
|
|
|
|
int checkType(client *c, robj *o, int type);
|
|
|
|
int getLongLongFromObjectOrReply(client *c, robj *o, long long *target, const char *msg);
|
|
|
|
int getDoubleFromObjectOrReply(client *c, robj *o, double *target, const char *msg);
|
2016-06-20 16:08:06 -04:00
|
|
|
int getDoubleFromObject(const robj *o, double *target);
|
2010-06-21 18:07:48 -04:00
|
|
|
int getLongLongFromObject(robj *o, long long *target);
|
2011-11-12 13:27:35 -05:00
|
|
|
int getLongDoubleFromObject(robj *o, long double *target);
|
2015-07-26 09:20:46 -04:00
|
|
|
int getLongDoubleFromObjectOrReply(client *c, robj *o, long double *target, const char *msg);
|
2010-06-21 18:07:48 -04:00
|
|
|
char *strEncoding(int encoding);
|
|
|
|
int compareStringObjects(robj *a, robj *b);
|
Fixed compareStringObject() and introduced collateStringObject().
compareStringObject was not always giving the same result when comparing
two exact strings, but encoded as integers or as sds strings, since it
switched to strcmp() when at least one of the strings were not sds
encoded.
For instance the two strings "123" and "123\x00456", where the first
string was integer encoded, would result into the old implementation of
compareStringObject() to return 0 as if the strings were equal, while
instead the second string is "greater" than the first in a binary
comparison.
The same compasion, but with "123" encoded as sds string, would instead
return a value < 0, as it is correct. It is not impossible that the
above caused some obscure bug, since the comparison was not always
deterministic, and compareStringObject() is used in the implementation
of skiplists, hash tables, and so forth.
At the same time, collateStringObject() was introduced by this commit, so
that can be used by SORT command to return sorted strings usign
collation instead of binary comparison. See next commit.
2013-07-12 05:56:52 -04:00
|
|
|
int collateStringObjects(robj *a, robj *b);
|
2010-06-21 18:07:48 -04:00
|
|
|
int equalStringObjects(robj *a, robj *b);
|
2014-03-20 06:47:12 -04:00
|
|
|
unsigned long long estimateObjectIdleTime(robj *o);
|
2019-02-12 08:21:21 -05:00
|
|
|
void trimStringObjectIfNeeded(robj *o);
|
2015-07-26 09:28:00 -04:00
|
|
|
#define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR)
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2010-10-24 10:22:52 -04:00
|
|
|
/* Synchronous I/O with timeout */
|
2012-03-31 05:21:45 -04:00
|
|
|
ssize_t syncWrite(int fd, char *ptr, ssize_t size, long long timeout);
|
|
|
|
ssize_t syncRead(int fd, char *ptr, ssize_t size, long long timeout);
|
|
|
|
ssize_t syncReadLine(int fd, char *ptr, ssize_t size, long long timeout);
|
2010-10-24 10:22:52 -04:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Replication */
|
|
|
|
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc);
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
void replicationFeedSlavesFromMasterStream(list *slaves, char *buf, size_t buflen);
|
2015-07-26 09:20:46 -04:00
|
|
|
void replicationFeedMonitors(client *c, list *monitors, int dictid, robj **argv, int argc);
|
2014-10-14 04:11:26 -04:00
|
|
|
void updateSlavesWaitingBgsave(int bgsaveerr, int type);
|
2010-11-04 12:29:53 -04:00
|
|
|
void replicationCron(void);
|
2013-01-30 12:33:16 -05:00
|
|
|
void replicationHandleMasterDisconnection(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
void replicationCacheMaster(client *c);
|
2013-01-30 12:33:16 -05:00
|
|
|
void resizeReplicationBacklog(long long newsize);
|
2013-03-04 07:22:21 -05:00
|
|
|
void replicationSetMaster(char *ip, int port);
|
|
|
|
void replicationUnsetMaster(void);
|
2013-05-29 05:36:44 -04:00
|
|
|
void refreshGoodSlavesCount(void);
|
2013-06-24 12:57:31 -04:00
|
|
|
void replicationScriptCacheInit(void);
|
|
|
|
void replicationScriptCacheFlush(void);
|
|
|
|
void replicationScriptCacheAdd(sds sha1);
|
|
|
|
int replicationScriptCacheExists(sds sha1);
|
2013-12-04 09:52:20 -05:00
|
|
|
void processClientsWaitingReplicas(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
void unblockClientWaitingReplicas(client *c);
|
2013-12-04 09:52:20 -05:00
|
|
|
int replicationCountAcksByOffset(long long offset);
|
2013-12-10 12:38:26 -05:00
|
|
|
void replicationSendNewlineToMaster(void);
|
2014-01-29 10:39:04 -05:00
|
|
|
long long replicationGetSlaveOffset(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
char *replicationGetSlaveName(client *c);
|
2015-08-04 10:56:00 -04:00
|
|
|
long long getPsyncInitialOffset(void);
|
2015-08-05 07:34:46 -04:00
|
|
|
int replicationSetupSlaveForFullResync(client *slave, long long offset);
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
void changeReplicationId(void);
|
|
|
|
void clearReplicationId2(void);
|
|
|
|
void chopReplicationBacklog(void);
|
2016-11-10 06:35:29 -05:00
|
|
|
void replicationCacheMasterUsingMyself(void);
|
Fix replication of SLAVEOF inside transaction.
In Redis 4.0 replication, with the introduction of PSYNC2, masters and
slaves replicate commands to cascading slaves and to the replication
backlog itself in a different way compared to the past.
Masters actually replicate the effects of client commands.
Slaves just propagate what they receive from masters.
This mechanism can cause problems when the configuration of an instance
is changed from master to slave inside a transaction. For instance
we could send to a master instance the following sequence:
MULTI
SLAVEOF 127.0.0.1 0
EXEC
SLAVEOF NO ONE
Before the fixes in this commit, the MULTI command used to be propagated
into the replication backlog, however after the SLAVEOF command the
instance is a slave, so the EXEC implementation failed to also propagate
the EXEC command. When the slaves of the above instance reconnected,
they were incrementally synchronized just sending a "MULTI". This put
the master client (in the slaves) into MULTI state, breaking the
replication.
Notably even Redis Sentinel uses the above approach in order to guarantee
that configuration changes are always performed together with rewrites
of the configuration and with clients disconnection. Sentiel does:
MULTI
SLAVEOF ...
CONFIG REWRITE
CLIENT KILL TYPE normal
EXEC
So this was a really problematic issue. However even with the fix in
this commit, that will add the final EXEC to the replication stream in
case the instance was switched from master to slave during the
transaction, the result would be to increment the slave replication
offset, so a successive reconnection with the new master, will not
permit a successful partial resynchronization: no way the new master can
provide us with the backlog needed, we incremented our offset to a value
that the new master cannot have.
However the EXEC implementation waits to emit the MULTI, so that if the
commands inside the transaction actually do not need to be replicated,
no commands propagation happens at all. From multi.c:
if (!must_propagate && !(c->cmd->flags & (CMD_READONLY|CMD_ADMIN))) {
execCommandPropagateMulti(c);
must_propagate = 1;
}
The above code is already modified by this commit you are reading.
Now also ADMIN commands do not trigger the emission of MULTI. It is actually
not clear why we do not just check for CMD_WRITE... Probably I wrote it this
way in order to make the code more reliable: better to over-emit MULTI
than not emitting it in time.
So this commit should indeed fix issue #3836 (verified), however it looks
like some reconsideration of this code path is needed in the long term.
BONUS POINT: The reverse bug.
Even in a read only slave "B", in a replication setup like:
A -> B -> C
There are commands without the READONLY nor the ADMIN flag, that are also
not flagged as WRITE commands. An example is just the PING command.
So if we send B the following sequence:
MULTI
PING
SLAVEOF NO ONE
EXEC
The result will be the reverse bug, where only EXEC is emitted, but not the
previous MULTI. However this apparently does not create problems in practice
but it is yet another acknowledge of the fact some work is needed here
in order to make this code path less surprising.
Note that there are many different approaches we could follow. For instance
MULTI/EXEC blocks containing administrative commands may be allowed ONLY
if all the commands are administrative ones, otherwise they could be
denined. When allowed, the commands could simply never be replicated at all.
2017-07-12 05:07:28 -04:00
|
|
|
void feedReplicationBacklog(void *ptr, size_t len);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2010-11-08 05:52:03 -05:00
|
|
|
/* Generic persistence functions */
|
2019-07-01 08:22:29 -04:00
|
|
|
void startLoadingFile(FILE* fp, char* filename);
|
|
|
|
void startLoading(size_t size);
|
2010-11-08 05:52:03 -05:00
|
|
|
void loadingProgress(off_t pos);
|
|
|
|
void stopLoading(void);
|
|
|
|
|
2018-07-31 07:09:38 -04:00
|
|
|
#define DISK_ERROR_TYPE_AOF 1 /* Don't accept writes: AOF errors. */
|
|
|
|
#define DISK_ERROR_TYPE_RDB 2 /* Don't accept writes: RDB errors. */
|
|
|
|
#define DISK_ERROR_TYPE_NONE 0 /* No problems, we can accept writes. */
|
|
|
|
int writeCommandsDeniedByDiskError(void);
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* RDB persistence */
|
2011-05-13 11:31:00 -04:00
|
|
|
#include "rdb.h"
|
PSYNC2: different improvements to Redis replication.
The gist of the changes is that now, partial resynchronizations between
slaves and masters (without the need of a full resync with RDB transfer
and so forth), work in a number of cases when it was impossible
in the past. For instance:
1. When a slave is promoted to mastrer, the slaves of the old master can
partially resynchronize with the new master.
2. Chained slalves (slaves of slaves) can be moved to replicate to other
slaves or the master itsef, without requiring a full resync.
3. The master itself, after being turned into a slave, is able to
partially resynchronize with the new master, when it joins replication
again.
In order to obtain this, the following main changes were operated:
* Slaves also take a replication backlog, not just masters.
* Same stream replication for all the slaves and sub slaves. The
replication stream is identical from the top level master to its slaves
and is also the same from the slaves to their sub-slaves and so forth.
This means that if a slave is later promoted to master, it has the
same replication backlong, and can partially resynchronize with its
slaves (that were previously slaves of the old master).
* A given replication history is no longer identified by the `runid` of
a Redis node. There is instead a `replication ID` which changes every
time the instance has a new history no longer coherent with the past
one. So, for example, slaves publish the same replication history of
their master, however when they are turned into masters, they publish
a new replication ID, but still remember the old ID, so that they are
able to partially resynchronize with slaves of the old master (up to a
given offset).
* The replication protocol was slightly modified so that a new extended
+CONTINUE reply from the master is able to inform the slave of a
replication ID change.
* REPLCONF CAPA is used in order to notify masters that a slave is able
to understand the new +CONTINUE reply.
* The RDB file was extended with an auxiliary field that is able to
select a given DB after loading in the slave, so that the slave can
continue receiving the replication stream from the point it was
disconnected without requiring the master to insert "SELECT" statements.
This is useful in order to guarantee the "same stream" property, because
the slave must be able to accumulate an identical backlog.
* Slave pings to sub-slaves are now sent in a special form, when the
top-level master is disconnected, in order to don't interfer with the
replication stream. We just use out of band "\n" bytes as in other parts
of the Redis protocol.
An old design document is available here:
https://gist.github.com/antirez/ae068f95c0d084891305
However the implementation is not identical to the description because
during the work to implement it, different changes were needed in order
to make things working well.
2016-11-09 05:31:06 -05:00
|
|
|
int rdbSaveRio(rio *rdb, int *error, int flags, rdbSaveInfo *rsi);
|
2019-01-21 05:28:44 -05:00
|
|
|
void killRDBChild(void);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* AOF persistence */
|
2011-09-16 06:36:17 -04:00
|
|
|
void flushAppendOnlyFile(int force);
|
2010-06-21 18:07:48 -04:00
|
|
|
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc);
|
|
|
|
void aofRemoveTempFile(pid_t childpid);
|
|
|
|
int rewriteAppendOnlyFileBackground(void);
|
|
|
|
int loadAppendOnlyFile(char *filename);
|
|
|
|
void stopAppendOnly(void);
|
|
|
|
int startAppendOnly(void);
|
2011-01-07 12:15:14 -05:00
|
|
|
void backgroundRewriteDoneHandler(int exitcode, int bysignal);
|
Allow an AOF rewrite buffer > 2GB (Fix for issue #504).
During the AOF rewrite process, the parent process needs to accumulate
the new writes in an in-memory buffer: when the child will terminate the
AOF rewriting process this buffer (that ist the difference between the
dataset when the rewrite was started, and the current dataset) is
flushed to the new AOF file.
We used to implement this buffer using an sds.c string, but sds.c has a
2GB limit. Sometimes the dataset can be big enough, the amount of writes
so high, and the rewrite process slow enough that we overflow the 2GB
limit, causing a crash, documented on github by issue #504.
In order to prevent this from happening, this commit introduces a new
system to accumulate writes, implemented by a linked list of blocks of
10 MB each, so that we also avoid paying the reallocation cost.
Note that theoretically modern operating systems may implement realloc()
simply as a remaping of the old pages, thus with very good performances,
see for instance the mremap() syscall on Linux. However this is not
always true, and jemalloc by default avoids doing this because there are
issues with the current implementation of mremap().
For this reason we are using a linked list of blocks instead of a single
block that gets reallocated again and again.
The changes in this commit lacks testing, that will be performed before
merging into the unstable branch. This fix will not enter 2.4 because it
is too invasive. However 2.4 will log a warning when the AOF rewrite
buffer is near to the 2GB limit.
2012-05-22 07:03:41 -04:00
|
|
|
void aofRewriteBufferReset(void);
|
|
|
|
unsigned long aofRewriteBufferSize(void);
|
2016-08-09 10:41:40 -04:00
|
|
|
ssize_t aofReadDiffFromParent(void);
|
2019-01-21 05:28:44 -05:00
|
|
|
void killAppendOnlyChild(void);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2016-09-19 07:45:20 -04:00
|
|
|
/* Child info */
|
|
|
|
void openChildInfoPipe(void);
|
|
|
|
void closeChildInfoPipe(void);
|
|
|
|
void sendChildInfo(int process_type);
|
|
|
|
void receiveChildInfo(void);
|
|
|
|
|
2019-07-17 01:51:02 -04:00
|
|
|
/* Fork helpers */
|
|
|
|
int redisFork();
|
|
|
|
int hasForkChild();
|
|
|
|
void sendChildCOWInfo(int ptype, char *pname);
|
|
|
|
|
2018-12-21 11:16:22 -05:00
|
|
|
/* acl.c -- Authentication related prototypes. */
|
2019-02-05 04:48:17 -05:00
|
|
|
extern rax *Users;
|
2019-01-11 05:32:41 -05:00
|
|
|
extern user *DefaultUser;
|
2019-01-10 10:39:32 -05:00
|
|
|
void ACLInit(void);
|
2019-01-16 12:31:05 -05:00
|
|
|
/* Return values for ACLCheckUserCredentials(). */
|
|
|
|
#define ACL_OK 0
|
|
|
|
#define ACL_DENIED_CMD 1
|
|
|
|
#define ACL_DENIED_KEY 2
|
2018-12-21 11:16:22 -05:00
|
|
|
int ACLCheckUserCredentials(robj *username, robj *password);
|
2019-02-25 10:40:58 -05:00
|
|
|
int ACLAuthenticateUser(client *c, robj *username, robj *password);
|
2019-01-09 15:31:29 -05:00
|
|
|
unsigned long ACLGetCommandID(const char *cmdname);
|
2019-01-10 10:33:48 -05:00
|
|
|
user *ACLGetUserByName(const char *name, size_t namelen);
|
2019-01-14 07:22:56 -05:00
|
|
|
int ACLCheckCommandPerm(client *c);
|
2019-01-17 12:05:43 -05:00
|
|
|
int ACLSetUser(user *u, const char *op, ssize_t oplen);
|
2019-01-18 05:49:30 -05:00
|
|
|
sds ACLDefaultUserFirstPassword(void);
|
2019-01-23 10:59:09 -05:00
|
|
|
uint64_t ACLGetCommandCategoryFlagByName(const char *name);
|
2019-02-04 07:00:38 -05:00
|
|
|
int ACLAppendUserForLoading(sds *argv, int argc, int *argc_err);
|
2019-02-04 07:04:35 -05:00
|
|
|
char *ACLSetUserStringError(void);
|
2019-02-04 10:35:15 -05:00
|
|
|
int ACLLoadConfiguredUsers(void);
|
2019-02-05 04:48:17 -05:00
|
|
|
sds ACLDescribeUser(user *u);
|
2019-02-07 11:20:03 -05:00
|
|
|
void ACLLoadUsersAtStartup(void);
|
2019-02-13 11:12:10 -05:00
|
|
|
void addReplyCommandCategories(client *c, struct redisCommand *cmd);
|
2018-12-21 11:16:22 -05:00
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Sorted sets data type */
|
2011-04-28 13:00:33 -04:00
|
|
|
|
2016-04-14 09:58:49 -04:00
|
|
|
/* Input flags. */
|
|
|
|
#define ZADD_NONE 0
|
|
|
|
#define ZADD_INCR (1<<0) /* Increment the score instead of setting it. */
|
|
|
|
#define ZADD_NX (1<<1) /* Don't touch elements not already existing. */
|
2018-07-01 01:24:50 -04:00
|
|
|
#define ZADD_XX (1<<2) /* Only touch elements already existing. */
|
2016-04-14 09:58:49 -04:00
|
|
|
|
|
|
|
/* Output flags. */
|
|
|
|
#define ZADD_NOP (1<<3) /* Operation not performed because of conditionals.*/
|
2018-07-01 01:24:50 -04:00
|
|
|
#define ZADD_NAN (1<<4) /* Only touch elements already existing. */
|
2016-04-14 09:58:49 -04:00
|
|
|
#define ZADD_ADDED (1<<5) /* The element was new and was added. */
|
|
|
|
#define ZADD_UPDATED (1<<6) /* The element already existed, score updated. */
|
|
|
|
|
|
|
|
/* Flags only used by the ZADD command but not by zsetAdd() API: */
|
|
|
|
#define ZADD_CH (1<<16) /* Return num of elements added or updated. */
|
|
|
|
|
2012-02-04 02:11:31 -05:00
|
|
|
/* Struct to hold a inclusive/exclusive range spec by score comparison. */
|
2011-04-28 13:00:33 -04:00
|
|
|
typedef struct {
|
|
|
|
double min, max;
|
|
|
|
int minex, maxex; /* are min or max exclusive? */
|
|
|
|
} zrangespec;
|
|
|
|
|
2012-02-04 02:11:31 -05:00
|
|
|
/* Struct to hold an inclusive/exclusive range spec by lexicographic comparison. */
|
|
|
|
typedef struct {
|
2015-08-04 03:20:55 -04:00
|
|
|
sds min, max; /* May be set to shared.(minstring|maxstring) */
|
2012-02-04 02:11:31 -05:00
|
|
|
int minex, maxex; /* are min or max exclusive? */
|
|
|
|
} zlexrangespec;
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
zskiplist *zslCreate(void);
|
|
|
|
void zslFree(zskiplist *zsl);
|
2015-08-04 03:20:55 -04:00
|
|
|
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele);
|
|
|
|
unsigned char *zzlInsert(unsigned char *zl, sds ele, double score);
|
|
|
|
int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node);
|
2014-04-17 08:30:12 -04:00
|
|
|
zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range);
|
|
|
|
zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range);
|
2011-03-14 08:30:06 -04:00
|
|
|
double zzlGetScore(unsigned char *sptr);
|
|
|
|
void zzlNext(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
|
|
|
|
void zzlPrev(unsigned char *zl, unsigned char **eptr, unsigned char **sptr);
|
2016-04-19 09:22:33 -04:00
|
|
|
unsigned char *zzlFirstInRange(unsigned char *zl, zrangespec *range);
|
2016-04-20 06:38:14 -04:00
|
|
|
unsigned char *zzlLastInRange(unsigned char *zl, zrangespec *range);
|
2017-12-08 02:37:08 -05:00
|
|
|
unsigned long zsetLength(const robj *zobj);
|
2011-03-10 11:50:13 -05:00
|
|
|
void zsetConvert(robj *zobj, int encoding);
|
2016-02-18 04:24:11 -05:00
|
|
|
void zsetConvertToZiplistIfNeeded(robj *zobj, size_t maxelelen);
|
2015-08-04 03:20:55 -04:00
|
|
|
int zsetScore(robj *zobj, sds member, double *score);
|
|
|
|
unsigned long zslGetRank(zskiplist *zsl, double score, sds o);
|
2016-04-14 09:58:49 -04:00
|
|
|
int zsetAdd(robj *zobj, double score, sds ele, int *flags, double *newscore);
|
2016-04-15 06:46:18 -04:00
|
|
|
long zsetRank(robj *zobj, sds ele, int reverse);
|
2016-04-15 09:20:25 -04:00
|
|
|
int zsetDel(robj *zobj, sds ele);
|
2018-05-11 12:00:32 -04:00
|
|
|
void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey, robj *countarg);
|
2016-04-19 09:22:33 -04:00
|
|
|
sds ziplistGetObject(unsigned char *sptr);
|
2016-04-19 11:02:24 -04:00
|
|
|
int zslValueGteMin(double value, zrangespec *spec);
|
|
|
|
int zslValueLteMax(double value, zrangespec *spec);
|
2016-04-20 17:01:40 -04:00
|
|
|
void zslFreeLexRange(zlexrangespec *spec);
|
2016-04-21 03:27:13 -04:00
|
|
|
int zslParseLexRange(robj *min, robj *max, zlexrangespec *spec);
|
|
|
|
unsigned char *zzlFirstInLexRange(unsigned char *zl, zlexrangespec *range);
|
|
|
|
unsigned char *zzlLastInLexRange(unsigned char *zl, zlexrangespec *range);
|
|
|
|
zskiplistNode *zslFirstInLexRange(zskiplist *zsl, zlexrangespec *range);
|
|
|
|
zskiplistNode *zslLastInLexRange(zskiplist *zsl, zlexrangespec *range);
|
2016-04-21 05:17:00 -04:00
|
|
|
int zzlLexValueGteMin(unsigned char *p, zlexrangespec *spec);
|
|
|
|
int zzlLexValueLteMax(unsigned char *p, zlexrangespec *spec);
|
|
|
|
int zslLexValueGteMin(sds value, zlexrangespec *spec);
|
|
|
|
int zslLexValueLteMax(sds value, zlexrangespec *spec);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Core functions */
|
2018-04-11 06:48:26 -04:00
|
|
|
int getMaxmemoryState(size_t *total, size_t *logical, size_t *tofree, float *level);
|
2018-02-21 13:18:34 -05:00
|
|
|
size_t freeMemoryGetNotCountedMemory();
|
2012-02-04 08:05:54 -05:00
|
|
|
int freeMemoryIfNeeded(void);
|
2018-12-12 05:37:15 -05:00
|
|
|
int freeMemoryIfNeededAndSafe(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
int processCommand(client *c);
|
2011-03-06 11:49:22 -05:00
|
|
|
void setupSignalHandlers(void);
|
2010-11-03 06:23:59 -04:00
|
|
|
struct redisCommand *lookupCommand(sds name);
|
|
|
|
struct redisCommand *lookupCommandByCString(char *s);
|
2013-03-06 10:28:26 -05:00
|
|
|
struct redisCommand *lookupCommandOrOriginal(sds name);
|
2015-07-26 09:20:46 -04:00
|
|
|
void call(client *c, int flags);
|
2012-02-28 10:20:41 -05:00
|
|
|
void propagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int flags);
|
2012-02-28 12:03:08 -05:00
|
|
|
void alsoPropagate(struct redisCommand *cmd, int dbid, robj **argv, int argc, int target);
|
2015-07-26 09:20:46 -04:00
|
|
|
void forceCommandPropagation(client *c, int flags);
|
|
|
|
void preventCommandPropagation(client *c);
|
2015-10-29 06:05:27 -04:00
|
|
|
void preventCommandAOF(client *c);
|
|
|
|
void preventCommandReplication(client *c);
|
2010-06-21 18:07:48 -04:00
|
|
|
int prepareForShutdown();
|
2013-02-27 06:27:15 -05:00
|
|
|
#ifdef __GNUC__
|
2015-07-26 09:17:43 -04:00
|
|
|
void serverLog(int level, const char *fmt, ...)
|
2013-02-27 06:27:15 -05:00
|
|
|
__attribute__((format(printf, 2, 3)));
|
|
|
|
#else
|
2015-07-26 09:17:43 -04:00
|
|
|
void serverLog(int level, const char *fmt, ...);
|
2013-02-27 06:27:15 -05:00
|
|
|
#endif
|
2015-07-26 09:17:43 -04:00
|
|
|
void serverLogRaw(int level, const char *msg);
|
|
|
|
void serverLogFromHandler(int level, const char *msg);
|
2014-03-15 19:51:54 -04:00
|
|
|
void usage(void);
|
2010-06-21 18:07:48 -04:00
|
|
|
void updateDictResizePolicy(void);
|
|
|
|
int htNeedsResize(dict *dict);
|
2010-11-03 06:23:59 -04:00
|
|
|
void populateCommandTable(void);
|
2011-01-24 04:56:06 -05:00
|
|
|
void resetCommandTableStats(void);
|
2013-06-28 11:08:03 -04:00
|
|
|
void adjustOpenFilesLimit(void);
|
2013-07-05 05:47:20 -04:00
|
|
|
void closeListeningSockets(int unlink_unix_socket);
|
2014-03-19 07:55:49 -04:00
|
|
|
void updateCachedTime(void);
|
|
|
|
void resetServerStats(void);
|
2016-12-29 20:37:52 -05:00
|
|
|
void activeDefragCycle(void);
|
2014-03-20 06:47:12 -04:00
|
|
|
unsigned int getLRUClock(void);
|
2017-05-09 05:57:09 -04:00
|
|
|
unsigned int LRU_CLOCK(void);
|
2016-04-22 11:43:48 -04:00
|
|
|
const char *evictPolicyToString(void);
|
2016-09-15 03:42:51 -04:00
|
|
|
struct redisMemOverhead *getMemoryOverheadData(void);
|
|
|
|
void freeMemoryOverheadData(struct redisMemOverhead *mh);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2015-10-13 04:58:08 -04:00
|
|
|
#define RESTART_SERVER_NONE 0
|
|
|
|
#define RESTART_SERVER_GRACEFULLY (1<<0) /* Do proper shutdown. */
|
|
|
|
#define RESTART_SERVER_CONFIG_REWRITE (1<<1) /* CONFIG REWRITE before restart.*/
|
|
|
|
int restartServer(int flags, mstime_t delay);
|
|
|
|
|
2010-07-02 13:57:12 -04:00
|
|
|
/* Set data type */
|
2015-07-31 12:01:23 -04:00
|
|
|
robj *setTypeCreate(sds value);
|
|
|
|
int setTypeAdd(robj *subject, sds value);
|
|
|
|
int setTypeRemove(robj *subject, sds value);
|
|
|
|
int setTypeIsMember(robj *subject, sds value);
|
2010-08-21 05:25:13 -04:00
|
|
|
setTypeIterator *setTypeInitIterator(robj *subject);
|
|
|
|
void setTypeReleaseIterator(setTypeIterator *si);
|
2015-07-31 12:01:23 -04:00
|
|
|
int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele);
|
|
|
|
sds setTypeNextObject(setTypeIterator *si);
|
|
|
|
int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele);
|
2014-12-18 05:39:58 -05:00
|
|
|
unsigned long setTypeRandomElements(robj *set, unsigned long count, robj *aux_set);
|
2016-06-20 16:08:06 -04:00
|
|
|
unsigned long setTypeSize(const robj *subject);
|
2010-07-02 13:57:12 -04:00
|
|
|
void setTypeConvert(robj *subject, int enc);
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Hash data type */
|
2016-04-25 09:39:33 -04:00
|
|
|
#define HASH_SET_TAKE_FIELD (1<<0)
|
|
|
|
#define HASH_SET_TAKE_VALUE (1<<1)
|
|
|
|
#define HASH_SET_COPY 0
|
|
|
|
|
2012-01-03 01:14:10 -05:00
|
|
|
void hashTypeConvert(robj *o, int enc);
|
2010-06-21 18:07:48 -04:00
|
|
|
void hashTypeTryConversion(robj *subject, robj **argv, int start, int end);
|
2015-09-23 03:33:23 -04:00
|
|
|
int hashTypeExists(robj *o, sds key);
|
|
|
|
int hashTypeDelete(robj *o, sds key);
|
2016-06-20 16:08:06 -04:00
|
|
|
unsigned long hashTypeLength(const robj *o);
|
2010-06-21 18:07:48 -04:00
|
|
|
hashTypeIterator *hashTypeInitIterator(robj *subject);
|
|
|
|
void hashTypeReleaseIterator(hashTypeIterator *hi);
|
|
|
|
int hashTypeNext(hashTypeIterator *hi);
|
2012-01-03 01:14:10 -05:00
|
|
|
void hashTypeCurrentFromZiplist(hashTypeIterator *hi, int what,
|
|
|
|
unsigned char **vstr,
|
|
|
|
unsigned int *vlen,
|
|
|
|
long long *vll);
|
2015-09-23 03:33:23 -04:00
|
|
|
sds hashTypeCurrentFromHashTable(hashTypeIterator *hi, int what);
|
|
|
|
void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr, unsigned int *vlen, long long *vll);
|
|
|
|
sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
|
2015-07-26 09:20:46 -04:00
|
|
|
robj *hashTypeLookupWriteOrCreate(client *c, robj *key);
|
2015-09-23 03:33:23 -04:00
|
|
|
robj *hashTypeGetValueObject(robj *o, sds field);
|
2016-04-25 09:39:33 -04:00
|
|
|
int hashTypeSet(robj *o, sds field, sds value, int flags);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Pub / Sub */
|
2015-07-26 09:20:46 -04:00
|
|
|
int pubsubUnsubscribeAllChannels(client *c, int notify);
|
|
|
|
int pubsubUnsubscribeAllPatterns(client *c, int notify);
|
2010-06-21 18:07:48 -04:00
|
|
|
void freePubsubPattern(void *p);
|
|
|
|
int listMatchPubsubPattern(void *a, void *b);
|
2011-10-07 10:34:16 -04:00
|
|
|
int pubsubPublishMessage(robj *channel, robj *message);
|
2019-07-05 06:24:28 -04:00
|
|
|
void addReplyPubsubMessage(client *c, robj *channel, robj *msg);
|
2013-01-25 07:19:08 -05:00
|
|
|
|
|
|
|
/* Keyspace events notification */
|
|
|
|
void notifyKeyspaceEvent(int type, char *event, robj *key, int dbid);
|
|
|
|
int keyspaceEventsStringToFlags(char *classes);
|
|
|
|
sds keyspaceEventsFlagsToString(int flags);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Configuration */
|
2011-12-01 07:44:53 -05:00
|
|
|
void loadServerConfig(char *filename, char *options);
|
2010-06-21 18:07:48 -04:00
|
|
|
void appendServerSaveParams(time_t seconds, int changes);
|
2014-03-15 19:51:54 -04:00
|
|
|
void resetServerSaveParams(void);
|
2013-11-19 03:48:12 -05:00
|
|
|
struct rewriteConfigState; /* Forward declaration to export API. */
|
2015-03-11 11:59:56 -04:00
|
|
|
void rewriteConfigRewriteLine(struct rewriteConfigState *state, const char *option, sds line, int force);
|
2013-11-19 04:13:04 -05:00
|
|
|
int rewriteConfig(char *path);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* db.c -- Keyspace access API */
|
|
|
|
int removeExpire(redisDb *db, robj *key);
|
2015-10-02 09:27:57 -04:00
|
|
|
void propagateExpire(redisDb *db, robj *key, int lazy);
|
2010-06-21 18:07:48 -04:00
|
|
|
int expireIfNeeded(redisDb *db, robj *key);
|
2011-11-09 10:51:19 -05:00
|
|
|
long long getExpire(redisDb *db, robj *key);
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
void setExpire(client *c, redisDb *db, robj *key, long long when);
|
2016-06-14 09:33:59 -04:00
|
|
|
robj *lookupKey(redisDb *db, robj *key, int flags);
|
2010-06-21 18:07:48 -04:00
|
|
|
robj *lookupKeyRead(redisDb *db, robj *key);
|
|
|
|
robj *lookupKeyWrite(redisDb *db, robj *key);
|
2015-07-26 09:20:46 -04:00
|
|
|
robj *lookupKeyReadOrReply(client *c, robj *key, robj *reply);
|
|
|
|
robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply);
|
2016-06-14 09:33:59 -04:00
|
|
|
robj *lookupKeyReadWithFlags(redisDb *db, robj *key, int flags);
|
2016-12-16 03:02:50 -05:00
|
|
|
robj *objectCommandLookup(client *c, robj *key);
|
|
|
|
robj *objectCommandLookupOrReply(client *c, robj *key, robj *reply);
|
2018-06-20 03:40:18 -04:00
|
|
|
void objectSetLRUOrLFU(robj *val, long long lfu_freq, long long lru_idle,
|
|
|
|
long long lru_clock);
|
2016-06-14 09:33:59 -04:00
|
|
|
#define LOOKUP_NONE 0
|
|
|
|
#define LOOKUP_NOTOUCH (1<<0)
|
2011-06-14 09:34:27 -04:00
|
|
|
void dbAdd(redisDb *db, robj *key, robj *val);
|
|
|
|
void dbOverwrite(redisDb *db, robj *key, robj *val);
|
|
|
|
void setKey(redisDb *db, robj *key, robj *val);
|
2010-06-21 18:07:48 -04:00
|
|
|
int dbExists(redisDb *db, robj *key);
|
|
|
|
robj *dbRandomKey(redisDb *db);
|
2015-07-30 05:46:31 -04:00
|
|
|
int dbSyncDelete(redisDb *db, robj *key);
|
2010-06-21 18:07:48 -04:00
|
|
|
int dbDelete(redisDb *db, robj *key);
|
2014-03-30 12:32:17 -04:00
|
|
|
robj *dbUnshareStringValue(redisDb *db, robj *key, robj *o);
|
2015-09-28 04:47:45 -04:00
|
|
|
|
|
|
|
#define EMPTYDB_NO_FLAGS 0 /* No flags. */
|
|
|
|
#define EMPTYDB_ASYNC (1<<0) /* Reclaim memory in another thread. */
|
|
|
|
long long emptyDb(int dbnum, int flags, void(callback)(void*));
|
2019-07-01 08:22:29 -04:00
|
|
|
long long emptyDbGeneric(redisDb *dbarray, int dbnum, int flags, void(callback)(void*));
|
|
|
|
long long dbTotalServerKeyCount();
|
2015-09-28 04:47:45 -04:00
|
|
|
|
2015-07-26 09:20:46 -04:00
|
|
|
int selectDb(client *c, int id);
|
2010-12-29 13:39:42 -05:00
|
|
|
void signalModifiedKey(redisDb *db, robj *key);
|
|
|
|
void signalFlushedDb(int dbid);
|
2013-02-25 05:24:42 -05:00
|
|
|
unsigned int getKeysInSlot(unsigned int hashslot, robj **keys, unsigned int count);
|
|
|
|
unsigned int countKeysInSlot(unsigned int hashslot);
|
2014-05-14 04:46:37 -04:00
|
|
|
unsigned int delKeysInSlot(unsigned int hashslot);
|
2013-02-25 05:20:17 -05:00
|
|
|
int verifyClusterConfigWithData(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
void scanGenericCommand(client *c, robj *o, unsigned long cursor);
|
|
|
|
int parseScanCursorOrReply(client *c, robj *o, unsigned long *cursor);
|
2015-07-30 05:46:31 -04:00
|
|
|
void slotToKeyAdd(robj *key);
|
|
|
|
void slotToKeyDel(robj *key);
|
|
|
|
void slotToKeyFlush(void);
|
|
|
|
int dbAsyncDelete(redisDb *db, robj *key);
|
2015-09-28 04:47:45 -04:00
|
|
|
void emptyDbAsync(redisDb *db);
|
|
|
|
void slotToKeyFlushAsync(void);
|
2015-09-28 04:51:25 -04:00
|
|
|
size_t lazyfreeGetPendingObjectsCount(void);
|
2018-07-31 00:07:57 -04:00
|
|
|
void freeObjAsync(robj *o);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2011-03-23 13:09:17 -04:00
|
|
|
/* API to get key arguments from commands */
|
2014-03-10 08:18:41 -04:00
|
|
|
int *getKeysFromCommand(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2011-03-23 13:09:17 -04:00
|
|
|
void getKeysFreeResult(int *result);
|
2014-03-10 08:18:41 -04:00
|
|
|
int *zunionInterGetKeys(struct redisCommand *cmd,robj **argv, int argc, int *numkeys);
|
2014-03-10 10:26:10 -04:00
|
|
|
int *evalGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2014-03-10 11:26:08 -04:00
|
|
|
int *sortGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2015-12-11 12:09:01 -05:00
|
|
|
int *migrateGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2017-04-07 18:31:11 -04:00
|
|
|
int *georadiusGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2017-09-08 05:40:16 -04:00
|
|
|
int *xreadGetKeys(struct redisCommand *cmd, robj **argv, int argc, int *numkeys);
|
2011-03-23 13:09:17 -04:00
|
|
|
|
2011-03-29 11:51:15 -04:00
|
|
|
/* Cluster */
|
|
|
|
void clusterInit(void);
|
|
|
|
unsigned short crc16(const char *buf, int len);
|
|
|
|
unsigned int keyHashSlot(char *key, int keylen);
|
|
|
|
void clusterCron(void);
|
2011-10-07 09:37:34 -04:00
|
|
|
void clusterPropagatePublish(robj *channel, robj *message);
|
2012-11-11 18:45:10 -05:00
|
|
|
void migrateCloseTimedoutSockets(void);
|
2013-09-26 10:54:43 -04:00
|
|
|
void clusterBeforeSleep(void);
|
2018-03-30 07:16:07 -04:00
|
|
|
int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, unsigned char *payload, uint32_t len);
|
2011-03-29 11:51:15 -04:00
|
|
|
|
2012-07-23 06:54:52 -04:00
|
|
|
/* Sentinel */
|
|
|
|
void initSentinelConfig(void);
|
|
|
|
void initSentinel(void);
|
|
|
|
void sentinelTimer(void);
|
|
|
|
char *sentinelHandleConfiguration(char **argv, int argc);
|
2013-11-21 06:27:14 -05:00
|
|
|
void sentinelIsRunning(void);
|
2012-07-23 06:54:52 -04:00
|
|
|
|
2017-07-10 07:38:23 -04:00
|
|
|
/* redis-check-rdb & aof */
|
|
|
|
int redis_check_rdb(char *rdbfilename, FILE *fp);
|
|
|
|
int redis_check_rdb_main(int argc, char **argv, FILE *fp);
|
|
|
|
int redis_check_aof_main(int argc, char **argv);
|
2014-05-09 12:06:06 -04:00
|
|
|
|
2011-04-30 11:46:52 -04:00
|
|
|
/* Scripting */
|
2015-11-05 05:10:46 -05:00
|
|
|
void scriptingInit(int setup);
|
2015-11-13 03:31:01 -05:00
|
|
|
int ldbRemoveChild(pid_t pid);
|
|
|
|
void ldbKillForkedSessions(void);
|
2015-11-14 16:13:32 -05:00
|
|
|
int ldbPendingChildren(void);
|
Refactoring: improve luaCreateFunction() API.
The function in its initial form, and after the fixes for the PSYNC2
bugs, required code duplication in multiple spots. This commit modifies
it in order to always compute the script name independently, and to
return the SDS of the SHA of the body: this way it can be used in all
the places, including for SCRIPT LOAD, without duplicating the code to
create the Lua function name. Note that this requires to re-compute the
body SHA1 in the case of EVAL seeing a script for the first time, but
this should not change scripting performance in any way because new
scripts definition is a rare event happening the first time a script is
seen, and the SHA1 computation is anyway not a very slow process against
the typical Redis script and compared to the actua Lua byte compiling of
the body.
Note that the function used to assert() if a duplicated script was
loaded, however actually now two times over three, we want the function
to handle duplicated scripts just fine: this happens in SCRIPT LOAD and
in RDB AUX "lua" loading. Moreover the assert was not defending against
some obvious failure mode, so now the function always tests against
already defined functions at start.
2017-12-04 05:25:20 -05:00
|
|
|
sds luaCreateFunction(client *c, lua_State *lua, robj *body);
|
2011-04-30 11:46:52 -04:00
|
|
|
|
2013-12-03 11:43:53 -05:00
|
|
|
/* Blocked clients */
|
|
|
|
void processUnblockedClients(void);
|
2015-07-26 09:20:46 -04:00
|
|
|
void blockClient(client *c, int btype);
|
|
|
|
void unblockClient(client *c);
|
2018-09-03 12:39:18 -04:00
|
|
|
void queueClientForReprocessing(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void replyToBlockedClientTimedOut(client *c);
|
|
|
|
int getTimeoutFromObjectOrReply(client *c, robj *object, mstime_t *timeout, int unit);
|
Replication: disconnect blocked clients when switching to slave role.
Bug as old as Redis and blocking operations. It's hard to trigger since
only happens on instance role switch, but the results are quite bad
since an inconsistency between master and slave is created.
How to trigger the bug is a good description of the bug itself.
1. Client does "BLPOP mylist 0" in master.
2. Master is turned into slave, that replicates from New-Master.
3. Client does "LPUSH mylist foo" in New-Master.
4. New-Master propagates write to slave.
5. Slave receives the LPUSH, the blocked client get served.
Now Master "mylist" key has "foo", Slave "mylist" key is empty.
Highlights:
* At step "2" above, the client remains attached, basically escaping any
check performed during command dispatch: read only slave, in that case.
* At step "5" the slave (that was the master), serves the blocked client
consuming a list element, which is not consumed on the master side.
This scenario is technically likely to happen during failovers, however
since Redis Sentinel already disconnects clients using the CLIENT
command when changing the role of the instance, the bug is avoided in
Sentinel deployments.
Closes #2473.
2015-03-24 11:00:09 -04:00
|
|
|
void disconnectAllBlockedClients(void);
|
2017-09-06 09:43:28 -04:00
|
|
|
void handleClientsBlockedOnKeys(void);
|
|
|
|
void signalKeyAsReady(redisDb *db, robj *key);
|
2017-09-06 11:50:11 -04:00
|
|
|
void blockForKeys(client *c, int btype, robj **keys, int numkeys, mstime_t timeout, robj *target, streamID *ids);
|
2013-12-03 11:43:53 -05:00
|
|
|
|
2016-07-06 09:24:06 -04:00
|
|
|
/* expire.c -- Handling of expired keys */
|
|
|
|
void activeExpireCycle(int type);
|
Replication: fix the infamous key leakage of writable slaves + EXPIRE.
BACKGROUND AND USE CASEj
Redis slaves are normally write only, however the supprot a "writable"
mode which is very handy when scaling reads on slaves, that actually
need write operations in order to access data. For instance imagine
having slaves replicating certain Sets keys from the master. When
accessing the data on the slave, we want to peform intersections between
such Sets values. However we don't want to intersect each time: to cache
the intersection for some time often is a good idea.
To do so, it is possible to setup a slave as a writable slave, and
perform the intersection on the slave side, perhaps setting a TTL on the
resulting key so that it will expire after some time.
THE BUG
Problem: in order to have a consistent replication, expiring of keys in
Redis replication is up to the master, that synthesize DEL operations to
send in the replication stream. However slaves logically expire keys
by hiding them from read attempts from clients so that if the master did
not promptly sent a DEL, the client still see logically expired keys
as non existing.
Because slaves don't actively expire keys by actually evicting them but
just masking from the POV of read operations, if a key is created in a
writable slave, and an expire is set, the key will be leaked forever:
1. No DEL will be received from the master, which does not know about
such a key at all.
2. No eviction will be performed by the slave, since it needs to disable
eviction because it's up to masters, otherwise consistency of data is
lost.
THE FIX
In order to fix the problem, the slave should be able to tag keys that
were created in the slave side and have an expire set in some way.
My solution involved using an unique additional dictionary created by
the writable slave only if needed. The dictionary is obviously keyed by
the key name that we need to track: all the keys that are set with an
expire directly by a client writing to the slave are tracked.
The value in the dictionary is a bitmap of all the DBs where such a key
name need to be tracked, so that we can use a single dictionary to track
keys in all the DBs used by the slave (actually this limits the solution
to the first 64 DBs, but the default with Redis is to use 16 DBs).
This solution allows to pay both a small complexity and CPU penalty,
which is zero when the feature is not used, actually. The slave-side
eviction is encapsulated in code which is not coupled with the rest of
the Redis core, if not for the hook to track the keys.
TODO
I'm doing the first smoke tests to see if the feature works as expected:
so far so good. Unit tests should be added before merging into the
4.0 branch.
2016-12-13 04:20:06 -05:00
|
|
|
void expireSlaveKeys(void);
|
|
|
|
void rememberSlaveKeyWithExpire(redisDb *db, robj *key);
|
|
|
|
void flushSlaveKeysWithExpireList(void);
|
2016-12-13 10:02:29 -05:00
|
|
|
size_t getSlaveKeyWithExpireCount(void);
|
2016-07-06 09:24:06 -04:00
|
|
|
|
2016-07-07 09:01:58 -04:00
|
|
|
/* evict.c -- maxmemory handling and LRU eviction. */
|
2016-07-13 04:45:37 -04:00
|
|
|
void evictionPoolAlloc(void);
|
2016-07-15 06:12:52 -04:00
|
|
|
#define LFU_INIT_VAL 5
|
|
|
|
unsigned long LFUGetTimeInMinutes(void);
|
|
|
|
uint8_t LFULogIncr(uint8_t value);
|
2017-10-15 08:17:55 -04:00
|
|
|
unsigned long LFUDecrAndReturn(robj *o);
|
2016-07-07 09:01:58 -04:00
|
|
|
|
2016-12-13 10:27:13 -05:00
|
|
|
/* Keys hashing / comparison functions for dict.c hash tables. */
|
2017-02-20 10:09:54 -05:00
|
|
|
uint64_t dictSdsHash(const void *key);
|
2016-12-13 10:27:13 -05:00
|
|
|
int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
|
|
|
|
void dictSdsDestructor(void *privdata, void *val);
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
/* Git SHA1 */
|
|
|
|
char *redisGitSHA1(void);
|
|
|
|
char *redisGitDirty(void);
|
2012-11-29 08:20:08 -05:00
|
|
|
uint64_t redisBuildId(void);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
|
|
|
/* Commands prototypes */
|
2015-07-26 09:20:46 -04:00
|
|
|
void authCommand(client *c);
|
|
|
|
void pingCommand(client *c);
|
|
|
|
void echoCommand(client *c);
|
|
|
|
void commandCommand(client *c);
|
|
|
|
void setCommand(client *c);
|
|
|
|
void setnxCommand(client *c);
|
|
|
|
void setexCommand(client *c);
|
|
|
|
void psetexCommand(client *c);
|
|
|
|
void getCommand(client *c);
|
|
|
|
void delCommand(client *c);
|
2015-07-30 05:46:31 -04:00
|
|
|
void unlinkCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void existsCommand(client *c);
|
|
|
|
void setbitCommand(client *c);
|
|
|
|
void getbitCommand(client *c);
|
BITFIELD command initial implementation.
The new bitfield command is an extension to the Redis bit operations,
where not just single bit operations are performed, but the array of
bits composing a string, can be addressed at random, not aligned
offsets, with any width unsigned and signed integers like u8, s5, u10
(up to 64 bit signed integers and 63 bit unsigned integers).
The BITFIELD command supports subcommands that can SET, GET, or INCRBY
those arbitrary bit counters, with multiple overflow semantics.
Trivial and credits:
A similar command was imagined a few times in the past, but for
some reason looked a bit far fetched or not well specified.
Finally the command was proposed again in a clear form by
Yoav Steinberg from Redis Labs, that proposed a set of commands on
arbitrary sized integers stored at bit offsets.
Starting from this proposal I wrote an initial specification of a single
command with sub-commands similar to what Yoav envisioned, using short
names for types definitions, and adding control on the overflow.
This commit is the resulting implementation.
Examples:
BITFIELD mykey OVERFLOW wrap INCRBY i2 10 -1 GET i2 10
2016-02-25 17:31:45 -05:00
|
|
|
void bitfieldCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void setrangeCommand(client *c);
|
|
|
|
void getrangeCommand(client *c);
|
|
|
|
void incrCommand(client *c);
|
|
|
|
void decrCommand(client *c);
|
|
|
|
void incrbyCommand(client *c);
|
|
|
|
void decrbyCommand(client *c);
|
|
|
|
void incrbyfloatCommand(client *c);
|
|
|
|
void selectCommand(client *c);
|
SWAPDB command.
This new command swaps two Redis databases, so that immediately all the
clients connected to a given DB will see the data of the other DB, and
the other way around. Example:
SWAPDB 0 1
This will swap DB 0 with DB 1. All the clients connected with DB 0 will
immediately see the new data, exactly like all the clients connected
with DB 1 will see the data that was formerly of DB 0.
MOTIVATION AND HISTORY
---
The command was recently demanded by Pedro Melo, but was suggested in
the past multiple times, and always refused by me.
The reason why it was asked: Imagine you have clients operating in DB 0.
At the same time, you create a new version of the dataset in DB 1.
When the new version of the dataset is available, you immediately want
to swap the two views, so that the clients will transparently use the
new version of the data. At the same time you'll likely destroy the
DB 1 dataset (that contains the old data) and start to build a new
version, to repeat the process.
This is an interesting pattern, but the reason why I always opposed to
implement this, was that FLUSHDB was a blocking command in Redis before
Redis 4.0 improvements. Now we have FLUSHDB ASYNC that releases the
old data in O(1) from the point of view of the client, to reclaim memory
incrementally in a different thread.
At this point, the pattern can really be supported without latency
spikes, so I'm providing this implementation for the users to comment.
In case a very compelling argument will be made against this new command
it may be removed.
BEHAVIOR WITH BLOCKING OPERATIONS
---
If a client is blocking for a list in a given DB, after the swap it will
still be blocked in the same DB ID, since this is the most logical thing
to do: if I was blocked for a list push to list "foo", even after the
swap I want still a LPUSH to reach the key "foo" in the same DB in order
to unblock.
However an interesting thing happens when a client is, for instance,
blocked waiting for new elements in list "foo" of DB 0. Then the DB
0 and 1 are swapped with SWAPDB. However the DB 1 happened to have
a list called "foo" containing elements. When this happens, this
implementation can correctly unblock the client.
It is possible that there are subtle corner cases that are not covered
in the implementation, but since the command is self-contained from the
POV of the implementation and the Redis core, it cannot cause anything
bad if not used.
Tests and documentation are yet to be provided.
2016-10-14 09:28:04 -04:00
|
|
|
void swapdbCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void randomkeyCommand(client *c);
|
|
|
|
void keysCommand(client *c);
|
|
|
|
void scanCommand(client *c);
|
|
|
|
void dbsizeCommand(client *c);
|
|
|
|
void lastsaveCommand(client *c);
|
|
|
|
void saveCommand(client *c);
|
|
|
|
void bgsaveCommand(client *c);
|
|
|
|
void bgrewriteaofCommand(client *c);
|
|
|
|
void shutdownCommand(client *c);
|
|
|
|
void moveCommand(client *c);
|
|
|
|
void renameCommand(client *c);
|
|
|
|
void renamenxCommand(client *c);
|
|
|
|
void lpushCommand(client *c);
|
|
|
|
void rpushCommand(client *c);
|
|
|
|
void lpushxCommand(client *c);
|
|
|
|
void rpushxCommand(client *c);
|
|
|
|
void linsertCommand(client *c);
|
|
|
|
void lpopCommand(client *c);
|
|
|
|
void rpopCommand(client *c);
|
|
|
|
void llenCommand(client *c);
|
|
|
|
void lindexCommand(client *c);
|
|
|
|
void lrangeCommand(client *c);
|
|
|
|
void ltrimCommand(client *c);
|
|
|
|
void typeCommand(client *c);
|
|
|
|
void lsetCommand(client *c);
|
|
|
|
void saddCommand(client *c);
|
|
|
|
void sremCommand(client *c);
|
|
|
|
void smoveCommand(client *c);
|
|
|
|
void sismemberCommand(client *c);
|
|
|
|
void scardCommand(client *c);
|
|
|
|
void spopCommand(client *c);
|
|
|
|
void srandmemberCommand(client *c);
|
|
|
|
void sinterCommand(client *c);
|
|
|
|
void sinterstoreCommand(client *c);
|
|
|
|
void sunionCommand(client *c);
|
|
|
|
void sunionstoreCommand(client *c);
|
|
|
|
void sdiffCommand(client *c);
|
|
|
|
void sdiffstoreCommand(client *c);
|
|
|
|
void sscanCommand(client *c);
|
|
|
|
void syncCommand(client *c);
|
|
|
|
void flushdbCommand(client *c);
|
|
|
|
void flushallCommand(client *c);
|
|
|
|
void sortCommand(client *c);
|
|
|
|
void lremCommand(client *c);
|
|
|
|
void rpoplpushCommand(client *c);
|
|
|
|
void infoCommand(client *c);
|
|
|
|
void mgetCommand(client *c);
|
|
|
|
void monitorCommand(client *c);
|
|
|
|
void expireCommand(client *c);
|
|
|
|
void expireatCommand(client *c);
|
|
|
|
void pexpireCommand(client *c);
|
|
|
|
void pexpireatCommand(client *c);
|
|
|
|
void getsetCommand(client *c);
|
|
|
|
void ttlCommand(client *c);
|
2016-06-14 09:33:59 -04:00
|
|
|
void touchCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void pttlCommand(client *c);
|
|
|
|
void persistCommand(client *c);
|
2018-09-10 04:43:39 -04:00
|
|
|
void replicaofCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void roleCommand(client *c);
|
|
|
|
void debugCommand(client *c);
|
|
|
|
void msetCommand(client *c);
|
|
|
|
void msetnxCommand(client *c);
|
|
|
|
void zaddCommand(client *c);
|
|
|
|
void zincrbyCommand(client *c);
|
|
|
|
void zrangeCommand(client *c);
|
|
|
|
void zrangebyscoreCommand(client *c);
|
|
|
|
void zrevrangebyscoreCommand(client *c);
|
|
|
|
void zrangebylexCommand(client *c);
|
|
|
|
void zrevrangebylexCommand(client *c);
|
|
|
|
void zcountCommand(client *c);
|
|
|
|
void zlexcountCommand(client *c);
|
|
|
|
void zrevrangeCommand(client *c);
|
|
|
|
void zcardCommand(client *c);
|
|
|
|
void zremCommand(client *c);
|
|
|
|
void zscoreCommand(client *c);
|
|
|
|
void zremrangebyscoreCommand(client *c);
|
|
|
|
void zremrangebylexCommand(client *c);
|
2018-05-11 11:31:46 -04:00
|
|
|
void zpopminCommand(client *c);
|
|
|
|
void zpopmaxCommand(client *c);
|
|
|
|
void bzpopminCommand(client *c);
|
|
|
|
void bzpopmaxCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void multiCommand(client *c);
|
|
|
|
void execCommand(client *c);
|
|
|
|
void discardCommand(client *c);
|
|
|
|
void blpopCommand(client *c);
|
|
|
|
void brpopCommand(client *c);
|
|
|
|
void brpoplpushCommand(client *c);
|
|
|
|
void appendCommand(client *c);
|
|
|
|
void strlenCommand(client *c);
|
|
|
|
void zrankCommand(client *c);
|
|
|
|
void zrevrankCommand(client *c);
|
|
|
|
void hsetCommand(client *c);
|
|
|
|
void hsetnxCommand(client *c);
|
|
|
|
void hgetCommand(client *c);
|
|
|
|
void hmsetCommand(client *c);
|
|
|
|
void hmgetCommand(client *c);
|
|
|
|
void hdelCommand(client *c);
|
|
|
|
void hlenCommand(client *c);
|
|
|
|
void hstrlenCommand(client *c);
|
|
|
|
void zremrangebyrankCommand(client *c);
|
|
|
|
void zunionstoreCommand(client *c);
|
|
|
|
void zinterstoreCommand(client *c);
|
|
|
|
void zscanCommand(client *c);
|
|
|
|
void hkeysCommand(client *c);
|
|
|
|
void hvalsCommand(client *c);
|
|
|
|
void hgetallCommand(client *c);
|
|
|
|
void hexistsCommand(client *c);
|
|
|
|
void hscanCommand(client *c);
|
|
|
|
void configCommand(client *c);
|
|
|
|
void hincrbyCommand(client *c);
|
|
|
|
void hincrbyfloatCommand(client *c);
|
|
|
|
void subscribeCommand(client *c);
|
|
|
|
void unsubscribeCommand(client *c);
|
|
|
|
void psubscribeCommand(client *c);
|
|
|
|
void punsubscribeCommand(client *c);
|
|
|
|
void publishCommand(client *c);
|
|
|
|
void pubsubCommand(client *c);
|
|
|
|
void watchCommand(client *c);
|
|
|
|
void unwatchCommand(client *c);
|
|
|
|
void clusterCommand(client *c);
|
|
|
|
void restoreCommand(client *c);
|
|
|
|
void migrateCommand(client *c);
|
|
|
|
void askingCommand(client *c);
|
|
|
|
void readonlyCommand(client *c);
|
|
|
|
void readwriteCommand(client *c);
|
|
|
|
void dumpCommand(client *c);
|
|
|
|
void objectCommand(client *c);
|
2016-09-13 04:26:36 -04:00
|
|
|
void memoryCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void clientCommand(client *c);
|
2018-12-04 06:46:16 -05:00
|
|
|
void helloCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void evalCommand(client *c);
|
|
|
|
void evalShaCommand(client *c);
|
|
|
|
void scriptCommand(client *c);
|
|
|
|
void timeCommand(client *c);
|
|
|
|
void bitopCommand(client *c);
|
|
|
|
void bitcountCommand(client *c);
|
|
|
|
void bitposCommand(client *c);
|
|
|
|
void replconfCommand(client *c);
|
|
|
|
void waitCommand(client *c);
|
|
|
|
void geoencodeCommand(client *c);
|
|
|
|
void geodecodeCommand(client *c);
|
2017-06-30 04:03:37 -04:00
|
|
|
void georadiusbymemberCommand(client *c);
|
|
|
|
void georadiusbymemberroCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void georadiusCommand(client *c);
|
2017-06-30 04:03:37 -04:00
|
|
|
void georadiusroCommand(client *c);
|
2015-07-26 09:20:46 -04:00
|
|
|
void geoaddCommand(client *c);
|
|
|
|
void geohashCommand(client *c);
|
|
|
|
void geoposCommand(client *c);
|
|
|
|
void geodistCommand(client *c);
|
|
|
|
void pfselftestCommand(client *c);
|
|
|
|
void pfaddCommand(client *c);
|
|
|
|
void pfcountCommand(client *c);
|
|
|
|
void pfmergeCommand(client *c);
|
|
|
|
void pfdebugCommand(client *c);
|
|
|
|
void latencyCommand(client *c);
|
2016-03-06 07:44:24 -05:00
|
|
|
void moduleCommand(client *c);
|
2016-08-03 05:12:13 -04:00
|
|
|
void securityWarningCommand(client *c);
|
2017-08-30 06:40:27 -04:00
|
|
|
void xaddCommand(client *c);
|
|
|
|
void xrangeCommand(client *c);
|
2017-11-20 05:25:05 -05:00
|
|
|
void xrevrangeCommand(client *c);
|
2017-09-06 06:03:17 -04:00
|
|
|
void xlenCommand(client *c);
|
2017-09-08 05:40:16 -04:00
|
|
|
void xreadCommand(client *c);
|
2018-01-16 09:38:22 -05:00
|
|
|
void xgroupCommand(client *c);
|
2018-10-16 07:17:14 -04:00
|
|
|
void xsetidCommand(client *c);
|
2018-01-25 10:39:49 -05:00
|
|
|
void xackCommand(client *c);
|
2018-01-26 11:27:34 -05:00
|
|
|
void xpendingCommand(client *c);
|
2018-02-21 05:42:51 -05:00
|
|
|
void xclaimCommand(client *c);
|
2018-03-07 10:08:06 -05:00
|
|
|
void xinfoCommand(client *c);
|
2018-04-18 07:12:09 -04:00
|
|
|
void xdelCommand(client *c);
|
2018-04-19 10:25:29 -04:00
|
|
|
void xtrimCommand(client *c);
|
2018-09-12 05:34:10 -04:00
|
|
|
void lolwutCommand(client *c);
|
2019-01-15 03:36:12 -05:00
|
|
|
void aclCommand(client *c);
|
2010-06-21 18:07:48 -04:00
|
|
|
|
2010-07-27 03:36:42 -04:00
|
|
|
#if defined(__GNUC__)
|
|
|
|
void *calloc(size_t count, size_t size) __attribute__ ((deprecated));
|
|
|
|
void free(void *ptr) __attribute__ ((deprecated));
|
|
|
|
void *malloc(size_t size) __attribute__ ((deprecated));
|
|
|
|
void *realloc(void *ptr, size_t size) __attribute__ ((deprecated));
|
|
|
|
#endif
|
|
|
|
|
2011-10-04 11:22:29 -04:00
|
|
|
/* Debugging stuff */
|
2016-06-20 16:08:06 -04:00
|
|
|
void _serverAssertWithInfo(const client *c, const robj *o, const char *estr, const char *file, int line);
|
|
|
|
void _serverAssert(const char *estr, const char *file, int line);
|
2017-01-18 11:05:10 -05:00
|
|
|
void _serverPanic(const char *file, int line, const char *msg, ...);
|
2011-11-24 09:47:26 -05:00
|
|
|
void bugReportStart(void);
|
2016-06-20 16:08:06 -04:00
|
|
|
void serverLogObjectDebugInfo(const robj *o);
|
2012-01-20 06:20:45 -05:00
|
|
|
void sigsegvHandler(int sig, siginfo_t *info, void *secret);
|
|
|
|
sds genRedisInfoString(char *section);
|
2019-04-16 15:16:12 -04:00
|
|
|
sds genModulesInfoString(sds info);
|
2012-03-27 05:47:51 -04:00
|
|
|
void enableWatchdog(int period);
|
|
|
|
void disableWatchdog(void);
|
|
|
|
void watchdogScheduleSignal(int period);
|
2015-07-26 09:17:43 -04:00
|
|
|
void serverLogHexDump(int level, char *descr, void *value, size_t len);
|
2015-12-16 11:41:20 -05:00
|
|
|
int memtest_preserving_test(unsigned long *m, size_t bytes, int passes);
|
2017-07-06 04:29:19 -04:00
|
|
|
void mixDigest(unsigned char *digest, void *ptr, size_t len);
|
|
|
|
void xorDigest(unsigned char *digest, void *ptr, size_t len);
|
2019-02-27 08:35:58 -05:00
|
|
|
int populateCommandTableParseFlags(struct redisCommand *c, char *strflags);
|
2012-07-23 06:54:52 -04:00
|
|
|
|
|
|
|
#define redisDebug(fmt, ...) \
|
|
|
|
printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__)
|
|
|
|
#define redisDebugMark() \
|
|
|
|
printf("-- MARK %s:%d --\n", __FILE__, __LINE__)
|
|
|
|
|
2010-06-21 18:07:48 -04:00
|
|
|
#endif
|